mirror of
https://github.com/domainaware/parsedmarc.git
synced 2026-03-11 01:01:26 +00:00
Compare commits
80 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
7929919223 | ||
|
|
faa68333a9 | ||
|
|
d34a33e980 | ||
|
|
9040a38842 | ||
|
|
ea0e3b11c1 | ||
|
|
199b782191 | ||
|
|
25f3c3e1d0 | ||
|
|
a14ff66f5a | ||
|
|
fb738bf9c4 | ||
|
|
0e811fe0ff | ||
|
|
56eb565ad2 | ||
|
|
2c3abb3e8c | ||
|
|
326e630f50 | ||
|
|
cdc30e6780 | ||
|
|
f2febf21d3 | ||
|
|
79f47121a4 | ||
|
|
6e6c90e19b | ||
|
|
c4d7455839 | ||
|
|
95e6fb85a1 | ||
|
|
298d5b6e6e | ||
|
|
a3c5bb906b | ||
|
|
d49ce6a13f | ||
|
|
adb0d31382 | ||
|
|
ae5d20ecf5 | ||
|
|
e98fdfa96b | ||
|
|
9551c8b467 | ||
|
|
d987943c22 | ||
|
|
3d8a99b5d3 | ||
|
|
5aaaedf463 | ||
|
|
2e3ee25ec9 | ||
|
|
33eb2aaf62 | ||
|
|
1387fb4899 | ||
|
|
4d97bd25aa | ||
|
|
17a612df0c | ||
|
|
221bc332ef | ||
|
|
a2a75f7a81 | ||
|
|
50fcb51577 | ||
|
|
dd9ef90773 | ||
|
|
0e3a4b0f06 | ||
|
|
343b53ef18 | ||
|
|
792079a3e8 | ||
|
|
1f3a1fc843 | ||
|
|
34fa0c145d | ||
|
|
6719a06388 | ||
|
|
eafa435868 | ||
|
|
5d772c3b36 | ||
|
|
72cabbef23 | ||
|
|
3d74cd6ac0 | ||
|
|
d1ac59a016 | ||
|
|
7fdd53008f | ||
|
|
35331d4b84 | ||
|
|
de9edd3590 | ||
|
|
abf4bdba13 | ||
|
|
7b842740f5 | ||
|
|
ebe3ccf40a | ||
|
|
808285658f | ||
|
|
bc1dae29bd | ||
|
|
4b904444e5 | ||
|
|
3608bce344 | ||
|
|
fe809c4c3f | ||
|
|
a76c2f9621 | ||
|
|
bb8f4002bf | ||
|
|
b5773c6b4a | ||
|
|
b99bd67225 | ||
|
|
af9ad568ec | ||
|
|
748164d177 | ||
|
|
487e5e1149 | ||
|
|
73010cf964 | ||
|
|
a4a5475aa8 | ||
|
|
dab78880df | ||
|
|
fb54e3b742 | ||
|
|
6799f10364 | ||
|
|
445c9565a4 | ||
|
|
4b786846ae | ||
|
|
23ae563cd8 | ||
|
|
cdd000e675 | ||
|
|
7d58abc67b | ||
|
|
a18ae439de | ||
|
|
d7061330a8 | ||
|
|
9d5654b8ec |
72
.github/ISSUE_TEMPLATE/bug_report.yml
vendored
Normal file
72
.github/ISSUE_TEMPLATE/bug_report.yml
vendored
Normal file
@@ -0,0 +1,72 @@
|
||||
name: Bug report
|
||||
description: Report a reproducible parsedmarc bug
|
||||
title: "[Bug]: "
|
||||
labels:
|
||||
- bug
|
||||
body:
|
||||
- type: input
|
||||
id: version
|
||||
attributes:
|
||||
label: parsedmarc version
|
||||
description: Include the parsedmarc version or commit if known.
|
||||
placeholder: 9.x.x
|
||||
validations:
|
||||
required: true
|
||||
- type: dropdown
|
||||
id: input_backend
|
||||
attributes:
|
||||
label: Input backend
|
||||
description: Which input path or mailbox backend is involved?
|
||||
options:
|
||||
- IMAP
|
||||
- MS Graph
|
||||
- Gmail API
|
||||
- Maildir
|
||||
- mbox
|
||||
- Local file / direct parse
|
||||
- Other
|
||||
validations:
|
||||
required: true
|
||||
- type: textarea
|
||||
id: environment
|
||||
attributes:
|
||||
label: Environment
|
||||
description: Runtime, container image, OS, Python version, or deployment details.
|
||||
placeholder: Docker on Debian, Python 3.12, parsedmarc installed from PyPI
|
||||
validations:
|
||||
required: true
|
||||
- type: textarea
|
||||
id: config
|
||||
attributes:
|
||||
label: Sanitized config
|
||||
description: Include the relevant config fragment with secrets removed.
|
||||
render: ini
|
||||
- type: textarea
|
||||
id: steps
|
||||
attributes:
|
||||
label: Steps to reproduce
|
||||
description: Describe the smallest reproducible sequence you can.
|
||||
placeholder: |
|
||||
1. Configure parsedmarc with ...
|
||||
2. Run ...
|
||||
3. Observe ...
|
||||
validations:
|
||||
required: true
|
||||
- type: textarea
|
||||
id: expected_actual
|
||||
attributes:
|
||||
label: Expected vs actual behavior
|
||||
description: What did you expect, and what happened instead?
|
||||
validations:
|
||||
required: true
|
||||
- type: textarea
|
||||
id: logs
|
||||
attributes:
|
||||
label: Logs or traceback
|
||||
description: Paste sanitized logs or a traceback if available.
|
||||
render: text
|
||||
- type: textarea
|
||||
id: samples
|
||||
attributes:
|
||||
label: Sample report availability
|
||||
description: If you can share a sanitized sample report or message, note that here.
|
||||
5
.github/ISSUE_TEMPLATE/config.yml
vendored
Normal file
5
.github/ISSUE_TEMPLATE/config.yml
vendored
Normal file
@@ -0,0 +1,5 @@
|
||||
blank_issues_enabled: true
|
||||
contact_links:
|
||||
- name: Security issue
|
||||
url: https://github.com/domainaware/parsedmarc/security/policy
|
||||
about: Please use the security policy and avoid filing public issues for undisclosed vulnerabilities.
|
||||
30
.github/ISSUE_TEMPLATE/feature_request.yml
vendored
Normal file
30
.github/ISSUE_TEMPLATE/feature_request.yml
vendored
Normal file
@@ -0,0 +1,30 @@
|
||||
name: Feature request
|
||||
description: Suggest a new feature or behavior change
|
||||
title: "[Feature]: "
|
||||
labels:
|
||||
- enhancement
|
||||
body:
|
||||
- type: textarea
|
||||
id: problem
|
||||
attributes:
|
||||
label: Problem statement
|
||||
description: What workflow or limitation are you trying to solve?
|
||||
validations:
|
||||
required: true
|
||||
- type: textarea
|
||||
id: proposal
|
||||
attributes:
|
||||
label: Proposed behavior
|
||||
description: Describe the feature or behavior you want.
|
||||
validations:
|
||||
required: true
|
||||
- type: textarea
|
||||
id: alternatives
|
||||
attributes:
|
||||
label: Alternatives considered
|
||||
description: Describe workarounds or alternative approaches you considered.
|
||||
- type: textarea
|
||||
id: impact
|
||||
attributes:
|
||||
label: Compatibility or operational impact
|
||||
description: Note config, output, performance, or deployment implications if relevant.
|
||||
24
.github/pull_request_template.md
vendored
Normal file
24
.github/pull_request_template.md
vendored
Normal file
@@ -0,0 +1,24 @@
|
||||
## Summary
|
||||
|
||||
-
|
||||
|
||||
## Why
|
||||
|
||||
-
|
||||
|
||||
## Testing
|
||||
|
||||
-
|
||||
|
||||
## Backward Compatibility / Risk
|
||||
|
||||
-
|
||||
|
||||
## Related Issue
|
||||
|
||||
- Closes #
|
||||
|
||||
## Checklist
|
||||
|
||||
- [ ] Tests added or updated if behavior changed
|
||||
- [ ] Docs updated if config or user-facing behavior changed
|
||||
39
.github/workflows/python-tests.yml
vendored
39
.github/workflows/python-tests.yml
vendored
@@ -10,7 +10,32 @@ on:
|
||||
branches: [ master ]
|
||||
|
||||
jobs:
|
||||
build:
|
||||
lint-docs-build:
|
||||
runs-on: ubuntu-latest
|
||||
|
||||
steps:
|
||||
- uses: actions/checkout@v5
|
||||
- name: Set up Python
|
||||
uses: actions/setup-python@v6
|
||||
with:
|
||||
python-version: "3.13"
|
||||
- name: Install Python dependencies
|
||||
run: |
|
||||
python -m pip install --upgrade pip
|
||||
pip install .[build]
|
||||
- name: Check code style
|
||||
run: |
|
||||
ruff check .
|
||||
- name: Test building documentation
|
||||
run: |
|
||||
cd docs
|
||||
make html
|
||||
- name: Test building packages
|
||||
run: |
|
||||
hatch build
|
||||
|
||||
test:
|
||||
needs: lint-docs-build
|
||||
runs-on: ubuntu-latest
|
||||
|
||||
services:
|
||||
@@ -30,7 +55,7 @@ jobs:
|
||||
strategy:
|
||||
fail-fast: false
|
||||
matrix:
|
||||
python-version: ["3.9", "3.10", "3.11", "3.12", "3.13", "3.14"]
|
||||
python-version: ["3.10", "3.11", "3.12", "3.13", "3.14"]
|
||||
|
||||
steps:
|
||||
- uses: actions/checkout@v5
|
||||
@@ -46,13 +71,6 @@ jobs:
|
||||
run: |
|
||||
python -m pip install --upgrade pip
|
||||
pip install .[build]
|
||||
- name: Test building documentation
|
||||
run: |
|
||||
cd docs
|
||||
make html
|
||||
- name: Check code style
|
||||
run: |
|
||||
ruff check .
|
||||
- name: Run unit tests
|
||||
run: |
|
||||
pytest --cov --cov-report=xml tests.py
|
||||
@@ -61,9 +79,6 @@ jobs:
|
||||
pip install -e .
|
||||
parsedmarc --debug -c ci.ini samples/aggregate/*
|
||||
parsedmarc --debug -c ci.ini samples/forensic/*
|
||||
- name: Test building packages
|
||||
run: |
|
||||
hatch build
|
||||
- name: Upload coverage to Codecov
|
||||
uses: codecov/codecov-action@v5
|
||||
with:
|
||||
|
||||
302
.vscode/settings.json
vendored
302
.vscode/settings.json
vendored
@@ -1,150 +1,166 @@
|
||||
{
|
||||
"[python]": {
|
||||
"editor.defaultFormatter": "charliermarsh.ruff",
|
||||
"editor.formatOnSave": true,
|
||||
|
||||
// Let Ruff handle lint fixes + import sorting on save
|
||||
"editor.codeActionsOnSave": {
|
||||
"source.fixAll.ruff": "explicit",
|
||||
"source.organizeImports.ruff": "explicit"
|
||||
}
|
||||
},
|
||||
"markdownlint.config": {
|
||||
"MD024": false
|
||||
},
|
||||
"cSpell.words": [
|
||||
"adkim",
|
||||
"akamaiedge",
|
||||
"amsmath",
|
||||
"andrewmcgilvray",
|
||||
"arcname",
|
||||
"aspf",
|
||||
"autoclass",
|
||||
"automodule",
|
||||
"backported",
|
||||
"bellsouth",
|
||||
"boto",
|
||||
"brakhane",
|
||||
"Brightmail",
|
||||
"CEST",
|
||||
"CHACHA",
|
||||
"checkdmarc",
|
||||
"Codecov",
|
||||
"confnew",
|
||||
"dateparser",
|
||||
"dateutil",
|
||||
"Davmail",
|
||||
"DBIP",
|
||||
"dearmor",
|
||||
"deflist",
|
||||
"devel",
|
||||
"DMARC",
|
||||
"Dmarcian",
|
||||
"dnspython",
|
||||
"dollarmath",
|
||||
"dpkg",
|
||||
"exampleuser",
|
||||
"expiringdict",
|
||||
"fieldlist",
|
||||
"GELF",
|
||||
"genindex",
|
||||
"geoip",
|
||||
"geoipupdate",
|
||||
"Geolite",
|
||||
"geolocation",
|
||||
"githubpages",
|
||||
"Grafana",
|
||||
"hostnames",
|
||||
"htpasswd",
|
||||
"httpasswd",
|
||||
"httplib",
|
||||
"IMAP",
|
||||
"imapclient",
|
||||
"infile",
|
||||
"Interaktive",
|
||||
"IPDB",
|
||||
"journalctl",
|
||||
"keepalive",
|
||||
"keyout",
|
||||
"keyrings",
|
||||
"Leeman",
|
||||
"libemail",
|
||||
"linkify",
|
||||
"LISTSERV",
|
||||
"lxml",
|
||||
"mailparser",
|
||||
"mailrelay",
|
||||
"mailsuite",
|
||||
"maxdepth",
|
||||
"MAXHEADERS",
|
||||
"maxmind",
|
||||
"mbox",
|
||||
"mfrom",
|
||||
"michaeldavie",
|
||||
"mikesiegel",
|
||||
"Mimecast",
|
||||
"mitigations",
|
||||
"MMDB",
|
||||
"modindex",
|
||||
"msgconvert",
|
||||
"msgraph",
|
||||
"MSSP",
|
||||
"multiprocess",
|
||||
"Munge",
|
||||
"ndjson",
|
||||
"newkey",
|
||||
"Nhcm",
|
||||
"nojekyll",
|
||||
"nondigest",
|
||||
"nosecureimap",
|
||||
"nosniff",
|
||||
"nwettbewerb",
|
||||
"opensearch",
|
||||
"opensearchpy",
|
||||
"parsedmarc",
|
||||
"passsword",
|
||||
"Postorius",
|
||||
"premade",
|
||||
"procs",
|
||||
"publicsuffix",
|
||||
"publicsuffixlist",
|
||||
"publixsuffix",
|
||||
"pygelf",
|
||||
"pypy",
|
||||
"pytest",
|
||||
"quickstart",
|
||||
"Reindex",
|
||||
"replyto",
|
||||
"reversename",
|
||||
"Rollup",
|
||||
"Rpdm",
|
||||
"SAMEORIGIN",
|
||||
"sdist",
|
||||
"Servernameone",
|
||||
"setuptools",
|
||||
"smartquotes",
|
||||
"SMTPTLS",
|
||||
"sortlists",
|
||||
"sortmaps",
|
||||
"sourcetype",
|
||||
"STARTTLS",
|
||||
"tasklist",
|
||||
"timespan",
|
||||
"tlsa",
|
||||
"tlsrpt",
|
||||
"toctree",
|
||||
"TQDDM",
|
||||
"tqdm",
|
||||
"truststore",
|
||||
"Übersicht",
|
||||
"uids",
|
||||
"Uncategorized",
|
||||
"unparasable",
|
||||
"uper",
|
||||
"urllib",
|
||||
"Valimail",
|
||||
"venv",
|
||||
"Vhcw",
|
||||
"viewcode",
|
||||
"virtualenv",
|
||||
"WBITS",
|
||||
"webmail",
|
||||
"Wettbewerber",
|
||||
"Whalen",
|
||||
"whitespaces",
|
||||
"xennn",
|
||||
"xmltodict",
|
||||
"xpack",
|
||||
"zscholl"
|
||||
"adkim",
|
||||
"akamaiedge",
|
||||
"amsmath",
|
||||
"andrewmcgilvray",
|
||||
"arcname",
|
||||
"aspf",
|
||||
"autoclass",
|
||||
"automodule",
|
||||
"backported",
|
||||
"bellsouth",
|
||||
"boto",
|
||||
"brakhane",
|
||||
"Brightmail",
|
||||
"CEST",
|
||||
"CHACHA",
|
||||
"checkdmarc",
|
||||
"Codecov",
|
||||
"confnew",
|
||||
"dateparser",
|
||||
"dateutil",
|
||||
"Davmail",
|
||||
"DBIP",
|
||||
"dearmor",
|
||||
"deflist",
|
||||
"devel",
|
||||
"DMARC",
|
||||
"Dmarcian",
|
||||
"dnspython",
|
||||
"dollarmath",
|
||||
"dpkg",
|
||||
"exampleuser",
|
||||
"expiringdict",
|
||||
"fieldlist",
|
||||
"GELF",
|
||||
"genindex",
|
||||
"geoip",
|
||||
"geoipupdate",
|
||||
"Geolite",
|
||||
"geolocation",
|
||||
"githubpages",
|
||||
"Grafana",
|
||||
"hostnames",
|
||||
"htpasswd",
|
||||
"httpasswd",
|
||||
"httplib",
|
||||
"ifhost",
|
||||
"IMAP",
|
||||
"imapclient",
|
||||
"infile",
|
||||
"Interaktive",
|
||||
"IPDB",
|
||||
"journalctl",
|
||||
"kafkaclient",
|
||||
"keepalive",
|
||||
"keyout",
|
||||
"keyrings",
|
||||
"Leeman",
|
||||
"libemail",
|
||||
"linkify",
|
||||
"LISTSERV",
|
||||
"loganalytics",
|
||||
"lxml",
|
||||
"mailparser",
|
||||
"mailrelay",
|
||||
"mailsuite",
|
||||
"maxdepth",
|
||||
"MAXHEADERS",
|
||||
"maxmind",
|
||||
"mbox",
|
||||
"mfrom",
|
||||
"mhdw",
|
||||
"michaeldavie",
|
||||
"mikesiegel",
|
||||
"Mimecast",
|
||||
"mitigations",
|
||||
"MMDB",
|
||||
"modindex",
|
||||
"msgconvert",
|
||||
"msgraph",
|
||||
"MSSP",
|
||||
"multiprocess",
|
||||
"Munge",
|
||||
"ndjson",
|
||||
"newkey",
|
||||
"Nhcm",
|
||||
"nojekyll",
|
||||
"nondigest",
|
||||
"nosecureimap",
|
||||
"nosniff",
|
||||
"nwettbewerb",
|
||||
"opensearch",
|
||||
"opensearchpy",
|
||||
"parsedmarc",
|
||||
"passsword",
|
||||
"pbar",
|
||||
"Postorius",
|
||||
"premade",
|
||||
"privatesuffix",
|
||||
"procs",
|
||||
"publicsuffix",
|
||||
"publicsuffixlist",
|
||||
"publixsuffix",
|
||||
"pygelf",
|
||||
"pypy",
|
||||
"pytest",
|
||||
"quickstart",
|
||||
"Reindex",
|
||||
"replyto",
|
||||
"reversename",
|
||||
"Rollup",
|
||||
"Rpdm",
|
||||
"SAMEORIGIN",
|
||||
"sdist",
|
||||
"Servernameone",
|
||||
"setuptools",
|
||||
"smartquotes",
|
||||
"SMTPTLS",
|
||||
"sortlists",
|
||||
"sortmaps",
|
||||
"sourcetype",
|
||||
"STARTTLS",
|
||||
"tasklist",
|
||||
"timespan",
|
||||
"tlsa",
|
||||
"tlsrpt",
|
||||
"toctree",
|
||||
"TQDDM",
|
||||
"tqdm",
|
||||
"truststore",
|
||||
"Übersicht",
|
||||
"uids",
|
||||
"Uncategorized",
|
||||
"unparasable",
|
||||
"uper",
|
||||
"urllib",
|
||||
"Valimail",
|
||||
"venv",
|
||||
"Vhcw",
|
||||
"viewcode",
|
||||
"virtualenv",
|
||||
"WBITS",
|
||||
"webmail",
|
||||
"Wettbewerber",
|
||||
"Whalen",
|
||||
"whitespaces",
|
||||
"xennn",
|
||||
"xmltodict",
|
||||
"xpack",
|
||||
"zscholl"
|
||||
],
|
||||
}
|
||||
64
AGENTS.md
Normal file
64
AGENTS.md
Normal file
@@ -0,0 +1,64 @@
|
||||
# AGENTS.md
|
||||
|
||||
This file provides guidance to AI agents when working with code in this repository.
|
||||
|
||||
## Project Overview
|
||||
|
||||
parsedmarc is a Python module and CLI utility for parsing DMARC aggregate (RUA), forensic (RUF), and SMTP TLS reports. It reads reports from IMAP, Microsoft Graph, Gmail API, Maildir, mbox files, or direct file paths, and outputs to JSON/CSV, Elasticsearch, OpenSearch, Splunk, Kafka, S3, Azure Log Analytics, syslog, or webhooks.
|
||||
|
||||
## Common Commands
|
||||
|
||||
```bash
|
||||
# Install with dev/build dependencies
|
||||
pip install .[build]
|
||||
|
||||
# Run all tests with coverage
|
||||
pytest --cov --cov-report=xml tests.py
|
||||
|
||||
# Run a single test
|
||||
pytest tests.py::Test::testAggregateSamples
|
||||
|
||||
# Lint and format
|
||||
ruff check .
|
||||
ruff format .
|
||||
|
||||
# Test CLI with sample reports
|
||||
parsedmarc --debug -c ci.ini samples/aggregate/*
|
||||
parsedmarc --debug -c ci.ini samples/forensic/*
|
||||
|
||||
# Build docs
|
||||
cd docs && make html
|
||||
|
||||
# Build distribution
|
||||
hatch build
|
||||
```
|
||||
|
||||
To skip DNS lookups during testing, set `GITHUB_ACTIONS=true`.
|
||||
|
||||
## Architecture
|
||||
|
||||
**Data flow:** Input sources → CLI (`cli.py:_main`) → Parse (`__init__.py`) → Enrich (DNS/GeoIP via `utils.py`) → Output integrations
|
||||
|
||||
### Key modules
|
||||
|
||||
- `parsedmarc/__init__.py` — Core parsing logic. Main functions: `parse_report_file()`, `parse_report_email()`, `parse_aggregate_report_xml()`, `parse_forensic_report()`, `parse_smtp_tls_report_json()`, `get_dmarc_reports_from_mailbox()`, `watch_inbox()`
|
||||
- `parsedmarc/cli.py` — CLI entry point (`_main`), config file parsing, output orchestration
|
||||
- `parsedmarc/types.py` — TypedDict definitions for all report types (`AggregateReport`, `ForensicReport`, `SMTPTLSReport`, `ParsingResults`)
|
||||
- `parsedmarc/utils.py` — IP/DNS/GeoIP enrichment, base64 decoding, compression handling
|
||||
- `parsedmarc/mail/` — Polymorphic mail connections: `IMAPConnection`, `GmailConnection`, `MSGraphConnection`, `MaildirConnection`
|
||||
- `parsedmarc/{elastic,opensearch,splunk,kafkaclient,loganalytics,syslog,s3,webhook,gelf}.py` — Output integrations
|
||||
|
||||
### Report type system
|
||||
|
||||
`ReportType = Literal["aggregate", "forensic", "smtp_tls"]`. Exception hierarchy: `ParserError` → `InvalidDMARCReport` → `InvalidAggregateReport`/`InvalidForensicReport`, and `InvalidSMTPTLSReport`.
|
||||
|
||||
### Caching
|
||||
|
||||
IP address info cached for 4 hours, seen aggregate report IDs cached for 1 hour (via `ExpiringDict`).
|
||||
|
||||
## Code Style
|
||||
|
||||
- Ruff for formatting and linting (configured in `.vscode/settings.json`)
|
||||
- TypedDict for structured data, type hints throughout
|
||||
- Python ≥3.10 required
|
||||
- Tests are in a single `tests.py` file using unittest; sample reports live in `samples/`
|
||||
102
CHANGELOG.md
102
CHANGELOG.md
@@ -1,5 +1,107 @@
|
||||
# Changelog
|
||||
|
||||
## 9.2.0
|
||||
|
||||
### Added
|
||||
|
||||
- OpenSearch AWS SigV4 authentication support (PR #673)
|
||||
- IMAP move/delete compatibility fallbacks (PR #671)
|
||||
- `fail_on_output_error` CLI option for sink failures (PR #672)
|
||||
- Gmail service account auth mode for non-interactive runs (PR #676)
|
||||
- Microsoft Graph certificate authentication support (PRs #692 and #693)
|
||||
- Microsoft Graph well-known folder fallback for root listing failures (PR #618 and #684 close #609)
|
||||
|
||||
### Fixed
|
||||
|
||||
- Pass mailbox since filter through `watch_inbox` callback (PR #670 closes issue #581)
|
||||
- `parsedmarc.mail.gmail.GmailConnection.delete_message` now properly calls the Gmail API (PR #668)
|
||||
- Avoid extra mailbox fetch in batch and test mode (PR #691 closes #533)
|
||||
|
||||
## 9.1.2
|
||||
|
||||
### Fixes
|
||||
|
||||
- Fix duplicate detection for normalized aggregate reports in Elasticsearch/OpenSearch (PR #666 fixes issue #665)
|
||||
|
||||
## 9.1.1
|
||||
|
||||
### Fixes
|
||||
|
||||
- Fix the use of Elasticsearch and OpenSearch API keys (PR #660 fixes issue #653)
|
||||
|
||||
### Changes
|
||||
|
||||
- Drop support for Python 3.9 (PR #661)
|
||||
|
||||
## 9.1.0
|
||||
|
||||
## Enhancements
|
||||
|
||||
- Add TCP and TLS support for syslog output. (#656)
|
||||
- Skip DNS lookups in GitHub Actions to prevent DNS timeouts during tests timeouts. (#657)
|
||||
- Remove microseconds from DMARC aggregate report time ranges before parsing them.
|
||||
|
||||
## 9.0.10
|
||||
|
||||
- Support Python 3.14+
|
||||
|
||||
## 9.0.9
|
||||
|
||||
### Fixes
|
||||
|
||||
- Validate that a string is base64-encoded before trying to base64 decode it. (PRs #648 and #649)
|
||||
|
||||
## 9.0.8
|
||||
|
||||
### Fixes
|
||||
|
||||
- Fix logging configuration not propagating to child parser processes (#646).
|
||||
- Update `mailsuite` dependency to `?=1.11.1` to solve issues with iCloud IMAP (#493).
|
||||
|
||||
## 9.0.7
|
||||
|
||||
## Fixes
|
||||
|
||||
- Fix IMAP `since` option (#PR 645 closes issues #581 and #643).
|
||||
|
||||
## 9.0.6
|
||||
|
||||
### Fixes
|
||||
|
||||
- Fix #638.
|
||||
- Fix/clarify report extraction and parsing behavior for multiple input types (bytes, base64 strings, and file-like objects).
|
||||
- Fix type mismatches that could cause runtime issues in SMTP emailing and CLI option handling.
|
||||
|
||||
### Improvements
|
||||
|
||||
- Improve type hints across the library (Pylance/Pyright friendliness) and reduce false-positive linter errors.
|
||||
- Emails in Microsoft 365 are now marked read as they are read. This provides constancy with other mailbox types, and gives you a indication of when emails are being read as they are processed in batches. (Close #625)
|
||||
|
||||
### Compatibility / Dependencies
|
||||
|
||||
- Set Python requirement to `>=3.9,<3.14`.
|
||||
- Bump `mailsuite` requirement to `>=1.11.0`.
|
||||
|
||||
## 9.0.5
|
||||
|
||||
## Fixes
|
||||
|
||||
- Fix report type detection introduced in `9.0.4`.
|
||||
|
||||
## 9.0.4 (Yanked)
|
||||
|
||||
### Fixes
|
||||
|
||||
- Fix saving reports to OpenSearch ([#637](https://github.com/domainaware/parsedmarc/issues/637))
|
||||
- Fix parsing certain DMARC failure/forensic reports
|
||||
- Some fixes to type hints (incomplete, but published as-is due to the above bugs)
|
||||
|
||||
## 9.0.3
|
||||
|
||||
### Fixes
|
||||
|
||||
- Set `requires-python` to `>=3.9, <3.14` to avoid [this bug](https://github.com/python/cpython/issues/142307)
|
||||
|
||||
## 9.0.2
|
||||
|
||||
## Improvements
|
||||
|
||||
78
CONTRIBUTING.md
Normal file
78
CONTRIBUTING.md
Normal file
@@ -0,0 +1,78 @@
|
||||
# Contributing
|
||||
|
||||
Thanks for contributing to parsedmarc.
|
||||
|
||||
## Local setup
|
||||
|
||||
Use a virtual environment for local development.
|
||||
|
||||
```bash
|
||||
python3 -m venv .venv
|
||||
. .venv/bin/activate
|
||||
python -m pip install --upgrade pip
|
||||
pip install .[build]
|
||||
```
|
||||
|
||||
## Before opening a pull request
|
||||
|
||||
Run the checks that match your change:
|
||||
|
||||
```bash
|
||||
ruff check .
|
||||
pytest --cov --cov-report=xml tests.py
|
||||
```
|
||||
|
||||
If you changed documentation:
|
||||
|
||||
```bash
|
||||
cd docs
|
||||
make html
|
||||
```
|
||||
|
||||
If you changed CLI behavior or parsing logic, it is also useful to exercise the
|
||||
sample reports:
|
||||
|
||||
```bash
|
||||
parsedmarc --debug -c ci.ini samples/aggregate/*
|
||||
parsedmarc --debug -c ci.ini samples/forensic/*
|
||||
```
|
||||
|
||||
To skip DNS lookups during tests, set:
|
||||
|
||||
```bash
|
||||
GITHUB_ACTIONS=true
|
||||
```
|
||||
|
||||
## Pull request guidelines
|
||||
|
||||
- Keep pull requests small and focused. Separate bug fixes, docs updates, and
|
||||
repo-maintenance changes where practical.
|
||||
- Add or update tests when behavior changes.
|
||||
- Update docs when configuration or user-facing behavior changes.
|
||||
- Include a short summary, the reason for the change, and the testing you ran.
|
||||
- Link the related issue when there is one.
|
||||
|
||||
## Branch maintenance
|
||||
|
||||
Upstream `master` may move quickly. Before asking for review or after another PR
|
||||
lands, rebase your branch onto the current upstream branch and force-push with
|
||||
lease if needed:
|
||||
|
||||
```bash
|
||||
git fetch upstream
|
||||
git rebase upstream/master
|
||||
git push --force-with-lease
|
||||
```
|
||||
|
||||
## CI and coverage
|
||||
|
||||
GitHub Actions is the source of truth for linting, docs, and test status.
|
||||
|
||||
Codecov patch coverage is usually the most relevant signal for small PRs. Project
|
||||
coverage can be noisier when the base comparison is stale, so interpret it in
|
||||
the context of the actual diff.
|
||||
|
||||
## Questions
|
||||
|
||||
Use GitHub issues for bugs and feature requests. If you are not sure whether a
|
||||
change is wanted, opening an issue first is usually the safest path.
|
||||
18
README.md
18
README.md
@@ -53,12 +53,12 @@ for RHEL or Debian.
|
||||
| Version | Supported | Reason |
|
||||
|---------|-----------|------------------------------------------------------------|
|
||||
| < 3.6 | ❌ | End of Life (EOL) |
|
||||
| 3.6 | ❌ | Used in RHHEL 8, but not supported by project dependencies |
|
||||
| 3.7 | ❌ | End of Life (EOL) |
|
||||
| 3.8 | ❌ | End of Life (EOL) |
|
||||
| 3.9 | ✅ | Supported until August 2026 (Debian 11); May 2032 (RHEL 9) |
|
||||
| 3.10 | ✅ | Actively maintained |
|
||||
| 3.11 | ✅ | Actively maintained; supported until June 2028 (Debian 12) |
|
||||
| 3.12 | ✅ | Actively maintained; supported until May 2035 (RHEL 10) |
|
||||
| 3.13 | ✅ | Actively maintained; supported until June 2030 (Debian 13) |
|
||||
| 3.14 | ✅ | Actively maintained |
|
||||
| 3.6 | ❌ | Used in RHEL 8, but not supported by project dependencies |
|
||||
| 3.7 | ❌ | End of Life (EOL) |
|
||||
| 3.8 | ❌ | End of Life (EOL) |
|
||||
| 3.9 | ❌ | Used in Debian 11 and RHEL 9, but not supported by project dependencies |
|
||||
| 3.10 | ✅ | Actively maintained |
|
||||
| 3.11 | ✅ | Actively maintained; supported until June 2028 (Debian 12) |
|
||||
| 3.12 | ✅ | Actively maintained; supported until May 2035 (RHEL 10) |
|
||||
| 3.13 | ✅ | Actively maintained; supported until June 2030 (Debian 13) |
|
||||
| 3.14 | ✅ | Supported (requires `imapclient>=3.1.0`) |
|
||||
|
||||
29
SECURITY.md
Normal file
29
SECURITY.md
Normal file
@@ -0,0 +1,29 @@
|
||||
# Security Policy
|
||||
|
||||
## Reporting a vulnerability
|
||||
|
||||
Please do not open a public GitHub issue for an undisclosed security
|
||||
vulnerability. Use GitHub private vulnerability reporting in the Security tab of this project instead.
|
||||
|
||||
When reporting a vulnerability, include:
|
||||
|
||||
- the affected parsedmarc version or commit
|
||||
- the component or integration involved
|
||||
- clear reproduction details if available
|
||||
- potential impact
|
||||
- any suggested mitigation or workaround
|
||||
|
||||
## Supported versions
|
||||
|
||||
Security fixes will be applied to the latest released version and
|
||||
the current `master` branch.
|
||||
|
||||
Older versions will not receive backported fixes.
|
||||
|
||||
## Disclosure process
|
||||
|
||||
After a report is received, maintainers can validate the issue, assess impact,
|
||||
and coordinate a fix before public disclosure.
|
||||
|
||||
Please avoid publishing proof-of-concept details until maintainers have had a
|
||||
reasonable opportunity to investigate and release a fix or mitigation.
|
||||
1
build.sh
1
build.sh
@@ -9,7 +9,6 @@ fi
|
||||
. venv/bin/activate
|
||||
pip install .[build]
|
||||
ruff format .
|
||||
ruff check .
|
||||
cd docs
|
||||
make clean
|
||||
make html
|
||||
|
||||
1
ci.ini
1
ci.ini
@@ -3,6 +3,7 @@ save_aggregate = True
|
||||
save_forensic = True
|
||||
save_smtp_tls = True
|
||||
debug = True
|
||||
offline = True
|
||||
|
||||
[elasticsearch]
|
||||
hosts = http://localhost:9200
|
||||
|
||||
11
codecov.yml
Normal file
11
codecov.yml
Normal file
@@ -0,0 +1,11 @@
|
||||
codecov:
|
||||
require_ci_to_pass: true
|
||||
|
||||
coverage:
|
||||
status:
|
||||
project:
|
||||
default:
|
||||
informational: true
|
||||
patch:
|
||||
default:
|
||||
informational: false
|
||||
@@ -28,6 +28,13 @@
|
||||
:members:
|
||||
```
|
||||
|
||||
## parsedmarc.types
|
||||
|
||||
```{eval-rst}
|
||||
.. automodule:: parsedmarc.types
|
||||
:members:
|
||||
```
|
||||
|
||||
## parsedmarc.utils
|
||||
|
||||
```{eval-rst}
|
||||
|
||||
@@ -53,16 +53,15 @@ for RHEL or Debian.
|
||||
| Version | Supported | Reason |
|
||||
|---------|-----------|------------------------------------------------------------|
|
||||
| < 3.6 | ❌ | End of Life (EOL) |
|
||||
| 3.6 | ❌ | Used in RHHEL 8, but not supported by project dependencies |
|
||||
| 3.7 | ❌ | End of Life (EOL) |
|
||||
| 3.8 | ❌ | End of Life (EOL) |
|
||||
| 3.9 | ✅ | Supported until August 2026 (Debian 11); May 2032 (RHEL 9) |
|
||||
| 3.10 | ✅ | Actively maintained |
|
||||
| 3.11 | ✅ | Actively maintained; supported until June 2028 (Debian 12) |
|
||||
| 3.12 | ✅ | Actively maintained; supported until May 2035 (RHEL 10) |
|
||||
| 3.13 | ✅ | Actively maintained; supported until June 2030 (Debian 13) |
|
||||
| 3.14 | ✅ | Actively maintained |
|
||||
|
||||
| 3.6 | ❌ | Used in RHEL 8, but not supported by project dependencies |
|
||||
| 3.7 | ❌ | End of Life (EOL) |
|
||||
| 3.8 | ❌ | End of Life (EOL) |
|
||||
| 3.9 | ❌ | Used in Debian 11 and RHEL 9, but not supported by project dependencies |
|
||||
| 3.10 | ✅ | Actively maintained |
|
||||
| 3.11 | ✅ | Actively maintained; supported until June 2028 (Debian 12) |
|
||||
| 3.12 | ✅ | Actively maintained; supported until May 2035 (RHEL 10) |
|
||||
| 3.13 | ✅ | Actively maintained; supported until June 2030 (Debian 13) |
|
||||
| 3.14 | ✅ | Supported (requires `imapclient>=3.1.0`) |
|
||||
|
||||
```{toctree}
|
||||
:caption: 'Contents'
|
||||
|
||||
@@ -162,10 +162,10 @@ sudo -u parsedmarc virtualenv /opt/parsedmarc/venv
|
||||
```
|
||||
|
||||
CentOS/RHEL 8 systems use Python 3.6 by default, so on those systems
|
||||
explicitly tell `virtualenv` to use `python3.9` instead
|
||||
explicitly tell `virtualenv` to use `python3.10` instead
|
||||
|
||||
```bash
|
||||
sudo -u parsedmarc virtualenv -p python3.9 /opt/parsedmarc/venv
|
||||
sudo -u parsedmarc virtualenv -p python3.10 /opt/parsedmarc/venv
|
||||
```
|
||||
|
||||
Activate the virtualenv
|
||||
|
||||
@@ -146,6 +146,9 @@ The full set of configuration options are:
|
||||
- `dns_timeout` - float: DNS timeout period
|
||||
- `debug` - bool: Print debugging messages
|
||||
- `silent` - bool: Only print errors (Default: `True`)
|
||||
- `fail_on_output_error` - bool: Exit with a non-zero status code if
|
||||
any configured output destination fails while saving/publishing
|
||||
reports (Default: `False`)
|
||||
- `log_file` - str: Write log messages to a file at this path
|
||||
- `n_procs` - int: Number of process to run in parallel when
|
||||
parsing in CLI mode (Default: `1`)
|
||||
@@ -171,8 +174,8 @@ The full set of configuration options are:
|
||||
- `check_timeout` - int: Number of seconds to wait for a IMAP
|
||||
IDLE response or the number of seconds until the next
|
||||
mail check (Default: `30`)
|
||||
- `since` - str: Search for messages since certain time. (Examples: `5m|3h|2d|1w`)
|
||||
Acceptable units - {"m":"minutes", "h":"hours", "d":"days", "w":"weeks"}.
|
||||
- `since` - str: Search for messages since certain time. (Examples: `5m|3h|2d|1w`)
|
||||
Acceptable units - {"m":"minutes", "h":"hours", "d":"days", "w":"weeks"}.
|
||||
Defaults to `1d` if incorrect value is provided.
|
||||
- `imap`
|
||||
- `host` - str: The IMAP server hostname or IP address
|
||||
@@ -200,7 +203,7 @@ The full set of configuration options are:
|
||||
- `password` - str: The IMAP password
|
||||
- `msgraph`
|
||||
- `auth_method` - str: Authentication method, valid types are
|
||||
`UsernamePassword`, `DeviceCode`, or `ClientSecret`
|
||||
`UsernamePassword`, `DeviceCode`, `ClientSecret`, or `Certificate`
|
||||
(Default: `UsernamePassword`).
|
||||
- `user` - str: The M365 user, required when the auth method is
|
||||
UsernamePassword
|
||||
@@ -208,6 +211,11 @@ The full set of configuration options are:
|
||||
method is UsernamePassword
|
||||
- `client_id` - str: The app registration's client ID
|
||||
- `client_secret` - str: The app registration's secret
|
||||
- `certificate_path` - str: Path to a PEM or PKCS12 certificate
|
||||
including the private key. Required when the auth method is
|
||||
`Certificate`
|
||||
- `certificate_password` - str: Optional password for the
|
||||
certificate file when using `Certificate` auth
|
||||
- `tenant_id` - str: The Azure AD tenant ID. This is required
|
||||
for all auth methods except UsernamePassword.
|
||||
- `mailbox` - str: The mailbox name. This defaults to the
|
||||
@@ -240,11 +248,14 @@ The full set of configuration options are:
|
||||
group and use that as the group id.
|
||||
|
||||
```powershell
|
||||
New-ApplicationAccessPolicy -AccessRight RestrictAccess
|
||||
New-ApplicationAccessPolicy -AccessRight RestrictAccess
|
||||
-AppId "<CLIENT_ID>" -PolicyScopeGroupId "<MAILBOX>"
|
||||
-Description "Restrict access to dmarc reports mailbox."
|
||||
```
|
||||
|
||||
The same application permission and mailbox scoping guidance
|
||||
applies to the `Certificate` auth method.
|
||||
|
||||
:::
|
||||
- `elasticsearch`
|
||||
- `hosts` - str: A comma separated list of hostnames and ports
|
||||
@@ -281,6 +292,10 @@ The full set of configuration options are:
|
||||
- `user` - str: Basic auth username
|
||||
- `password` - str: Basic auth password
|
||||
- `api_key` - str: API key
|
||||
- `auth_type` - str: Authentication type: `basic` (default) or `awssigv4` (the key `authentication_type` is accepted as an alias for this option)
|
||||
- `aws_region` - str: AWS region for SigV4 authentication
|
||||
(required when `auth_type = awssigv4`)
|
||||
- `aws_service` - str: AWS service for SigV4 signing (Default: `es`)
|
||||
- `ssl` - bool: Use an encrypted SSL/TLS connection
|
||||
(Default: `True`)
|
||||
- `timeout` - float: Timeout in seconds (Default: 60)
|
||||
@@ -336,16 +351,77 @@ The full set of configuration options are:
|
||||
- `secret_access_key` - str: The secret access key (Optional)
|
||||
- `syslog`
|
||||
- `server` - str: The Syslog server name or IP address
|
||||
- `port` - int: The UDP port to use (Default: `514`)
|
||||
- `port` - int: The port to use (Default: `514`)
|
||||
- `protocol` - str: The protocol to use: `udp`, `tcp`, or `tls` (Default: `udp`)
|
||||
- `cafile_path` - str: Path to CA certificate file for TLS server verification (Optional)
|
||||
- `certfile_path` - str: Path to client certificate file for TLS authentication (Optional)
|
||||
- `keyfile_path` - str: Path to client private key file for TLS authentication (Optional)
|
||||
- `timeout` - float: Connection timeout in seconds for TCP/TLS (Default: `5.0`)
|
||||
- `retry_attempts` - int: Number of retry attempts for failed connections (Default: `3`)
|
||||
- `retry_delay` - int: Delay in seconds between retry attempts (Default: `5`)
|
||||
|
||||
**Example UDP configuration (default):**
|
||||
|
||||
```ini
|
||||
[syslog]
|
||||
server = syslog.example.com
|
||||
port = 514
|
||||
```
|
||||
|
||||
**Example TCP configuration:**
|
||||
|
||||
```ini
|
||||
[syslog]
|
||||
server = syslog.example.com
|
||||
port = 6514
|
||||
protocol = tcp
|
||||
timeout = 10.0
|
||||
retry_attempts = 5
|
||||
```
|
||||
|
||||
**Example TLS configuration with server verification:**
|
||||
|
||||
```ini
|
||||
[syslog]
|
||||
server = syslog.example.com
|
||||
port = 6514
|
||||
protocol = tls
|
||||
cafile_path = /path/to/ca-cert.pem
|
||||
timeout = 10.0
|
||||
```
|
||||
|
||||
**Example TLS configuration with mutual authentication:**
|
||||
|
||||
```ini
|
||||
[syslog]
|
||||
server = syslog.example.com
|
||||
port = 6514
|
||||
protocol = tls
|
||||
cafile_path = /path/to/ca-cert.pem
|
||||
certfile_path = /path/to/client-cert.pem
|
||||
keyfile_path = /path/to/client-key.pem
|
||||
timeout = 10.0
|
||||
retry_attempts = 3
|
||||
retry_delay = 5
|
||||
```
|
||||
- `gmail_api`
|
||||
- `credentials_file` - str: Path to file containing the
|
||||
credentials, None to disable (Default: `None`)
|
||||
- `token_file` - str: Path to save the token file
|
||||
(Default: `.token`)
|
||||
|
||||
- `auth_mode` - str: Authentication mode, `installed_app` (default)
|
||||
or `service_account`
|
||||
- `service_account_user` - str: Delegated mailbox user for Gmail
|
||||
service account auth (required for domain-wide delegation). Also
|
||||
accepted as `delegated_user` for backward compatibility.
|
||||
|
||||
:::{note}
|
||||
credentials_file and token_file can be got with [quickstart](https://developers.google.com/gmail/api/quickstart/python).Please change the scope to `https://www.googleapis.com/auth/gmail.modify`.
|
||||
:::
|
||||
:::{note}
|
||||
When `auth_mode = service_account`, `credentials_file` must point to a
|
||||
Google service account key JSON file, and `token_file` is not used.
|
||||
:::
|
||||
- `include_spam_trash` - bool: Include messages in Spam and
|
||||
Trash when searching reports (Default: `False`)
|
||||
- `scopes` - str: Comma separated list of scopes to use when
|
||||
@@ -442,7 +518,7 @@ Update the limit to 2k per example:
|
||||
PUT _cluster/settings
|
||||
{
|
||||
"persistent" : {
|
||||
"cluster.max_shards_per_node" : 2000
|
||||
"cluster.max_shards_per_node" : 2000
|
||||
}
|
||||
}
|
||||
```
|
||||
@@ -450,6 +526,33 @@ PUT _cluster/settings
|
||||
Increasing this value increases resource usage.
|
||||
:::
|
||||
|
||||
## Performance tuning
|
||||
|
||||
For large mailbox imports or backfills, parsedmarc can consume a noticeable amount
|
||||
of memory, especially when it runs on the same host as Elasticsearch or
|
||||
OpenSearch. The following settings can reduce peak memory usage and make long
|
||||
imports more predictable:
|
||||
|
||||
- Reduce `mailbox.batch_size` to smaller values such as `100-500` instead of
|
||||
processing a very large message set at once. Smaller batches trade throughput
|
||||
for lower peak memory use and less sink pressure.
|
||||
- Keep `n_procs` low for mailbox-heavy runs. In practice, `1-2` workers is often
|
||||
a safer starting point for large backfills than aggressive parallelism.
|
||||
- Use `mailbox.since` to process reports in smaller time windows such as `1d`,
|
||||
`7d`, or another interval that fits the backlog. This makes it easier to catch
|
||||
up incrementally instead of loading an entire mailbox history in one run.
|
||||
- Set `strip_attachment_payloads = True` when forensic reports contain large
|
||||
attachments and you do not need to retain the raw payloads in the parsed
|
||||
output.
|
||||
- Prefer running parsedmarc separately from Elasticsearch or OpenSearch, or
|
||||
reserve enough RAM for both services if they must share a host.
|
||||
- For very large imports, prefer incremental supervised runs, such as a
|
||||
scheduler or systemd service, over infrequent massive backfills.
|
||||
|
||||
These are operational tuning recommendations rather than hard requirements, but
|
||||
they are often enough to avoid memory pressure and reduce failures during
|
||||
high-volume mailbox processing.
|
||||
|
||||
## Multi-tenant support
|
||||
|
||||
Starting in `8.19.0`, ParseDMARC provides multi-tenant support by placing data into separate OpenSearch or Elasticsearch index prefixes. To set this up, create a YAML file that is formatted where each key is a tenant name, and the value is a list of domains related to that tenant, not including subdomains, like this:
|
||||
|
||||
File diff suppressed because one or more lines are too long
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
@@ -1,2 +1,3 @@
|
||||
__version__ = "9.0.2"
|
||||
__version__ = "9.2.0"
|
||||
|
||||
USER_AGENT = f"parsedmarc/{__version__}"
|
||||
|
||||
@@ -2,30 +2,28 @@
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from typing import Optional, Union, Any
|
||||
from typing import Any, Optional, Union
|
||||
|
||||
from collections import OrderedDict
|
||||
|
||||
from elasticsearch_dsl.search import Q
|
||||
from elasticsearch.helpers import reindex
|
||||
from elasticsearch_dsl import (
|
||||
connections,
|
||||
Object,
|
||||
Boolean,
|
||||
Date,
|
||||
Document,
|
||||
Index,
|
||||
Nested,
|
||||
InnerDoc,
|
||||
Integer,
|
||||
Text,
|
||||
Boolean,
|
||||
Ip,
|
||||
Date,
|
||||
Nested,
|
||||
Object,
|
||||
Search,
|
||||
Text,
|
||||
connections,
|
||||
)
|
||||
from elasticsearch.helpers import reindex
|
||||
from elasticsearch_dsl.search import Q
|
||||
|
||||
from parsedmarc import InvalidForensicReport
|
||||
from parsedmarc.log import logger
|
||||
from parsedmarc.utils import human_timestamp_to_datetime
|
||||
from parsedmarc import InvalidForensicReport
|
||||
|
||||
|
||||
class ElasticsearchError(Exception):
|
||||
@@ -94,17 +92,17 @@ class _AggregateReportDoc(Document):
|
||||
spf_results = Nested(_SPFResult)
|
||||
|
||||
def add_policy_override(self, type_: str, comment: str):
|
||||
self.policy_overrides.append(_PolicyOverride(type=type_, comment=comment))
|
||||
self.policy_overrides.append(_PolicyOverride(type=type_, comment=comment)) # pyright: ignore[reportCallIssue]
|
||||
|
||||
def add_dkim_result(self, domain: str, selector: str, result: _DKIMResult):
|
||||
self.dkim_results.append(
|
||||
_DKIMResult(domain=domain, selector=selector, result=result)
|
||||
)
|
||||
) # pyright: ignore[reportCallIssue]
|
||||
|
||||
def add_spf_result(self, domain: str, scope: str, result: _SPFResult):
|
||||
self.spf_results.append(_SPFResult(domain=domain, scope=scope, result=result))
|
||||
self.spf_results.append(_SPFResult(domain=domain, scope=scope, result=result)) # pyright: ignore[reportCallIssue]
|
||||
|
||||
def save(self, **kwargs):
|
||||
def save(self, **kwargs): # pyright: ignore[reportIncompatibleMethodOverride]
|
||||
self.passed_dmarc = False
|
||||
self.passed_dmarc = self.spf_aligned or self.dkim_aligned
|
||||
|
||||
@@ -138,25 +136,25 @@ class _ForensicSampleDoc(InnerDoc):
|
||||
attachments = Nested(_EmailAttachmentDoc)
|
||||
|
||||
def add_to(self, display_name: str, address: str):
|
||||
self.to.append(_EmailAddressDoc(display_name=display_name, address=address))
|
||||
self.to.append(_EmailAddressDoc(display_name=display_name, address=address)) # pyright: ignore[reportCallIssue]
|
||||
|
||||
def add_reply_to(self, display_name: str, address: str):
|
||||
self.reply_to.append(
|
||||
_EmailAddressDoc(display_name=display_name, address=address)
|
||||
)
|
||||
) # pyright: ignore[reportCallIssue]
|
||||
|
||||
def add_cc(self, display_name: str, address: str):
|
||||
self.cc.append(_EmailAddressDoc(display_name=display_name, address=address))
|
||||
self.cc.append(_EmailAddressDoc(display_name=display_name, address=address)) # pyright: ignore[reportCallIssue]
|
||||
|
||||
def add_bcc(self, display_name: str, address: str):
|
||||
self.bcc.append(_EmailAddressDoc(display_name=display_name, address=address))
|
||||
self.bcc.append(_EmailAddressDoc(display_name=display_name, address=address)) # pyright: ignore[reportCallIssue]
|
||||
|
||||
def add_attachment(self, filename: str, content_type: str, sha256: str):
|
||||
self.attachments.append(
|
||||
_EmailAttachmentDoc(
|
||||
filename=filename, content_type=content_type, sha256=sha256
|
||||
)
|
||||
)
|
||||
) # pyright: ignore[reportCallIssue]
|
||||
|
||||
|
||||
class _ForensicReportDoc(Document):
|
||||
@@ -203,11 +201,11 @@ class _SMTPTLSPolicyDoc(InnerDoc):
|
||||
|
||||
def add_failure_details(
|
||||
self,
|
||||
result_type: str,
|
||||
ip_address: str,
|
||||
receiving_ip: str,
|
||||
receiving_mx_helo: str,
|
||||
failed_session_count: int,
|
||||
result_type: Optional[str] = None,
|
||||
ip_address: Optional[str] = None,
|
||||
receiving_ip: Optional[str] = None,
|
||||
receiving_mx_helo: Optional[str] = None,
|
||||
failed_session_count: Optional[int] = None,
|
||||
sending_mta_ip: Optional[str] = None,
|
||||
receiving_mx_hostname: Optional[str] = None,
|
||||
additional_information_uri: Optional[str] = None,
|
||||
@@ -224,7 +222,7 @@ class _SMTPTLSPolicyDoc(InnerDoc):
|
||||
additional_information=additional_information_uri,
|
||||
failure_reason_code=failure_reason_code,
|
||||
)
|
||||
self.failure_details.append(_details)
|
||||
self.failure_details.append(_details) # pyright: ignore[reportCallIssue]
|
||||
|
||||
|
||||
class _SMTPTLSReportDoc(Document):
|
||||
@@ -258,7 +256,7 @@ class _SMTPTLSReportDoc(Document):
|
||||
policy_string=policy_string,
|
||||
mx_host_patterns=mx_host_patterns,
|
||||
failure_details=failure_details,
|
||||
)
|
||||
) # pyright: ignore[reportCallIssue]
|
||||
|
||||
|
||||
class AlreadySaved(ValueError):
|
||||
@@ -268,12 +266,12 @@ class AlreadySaved(ValueError):
|
||||
def set_hosts(
|
||||
hosts: Union[str, list[str]],
|
||||
*,
|
||||
use_ssl: Optional[bool] = False,
|
||||
use_ssl: bool = False,
|
||||
ssl_cert_path: Optional[str] = None,
|
||||
username: Optional[str] = None,
|
||||
password: Optional[str] = None,
|
||||
api_key: Optional[str] = None,
|
||||
timeout: Optional[float] = 60.0,
|
||||
timeout: float = 60.0,
|
||||
):
|
||||
"""
|
||||
Sets the Elasticsearch hosts to use
|
||||
@@ -297,7 +295,7 @@ def set_hosts(
|
||||
conn_params["ca_certs"] = ssl_cert_path
|
||||
else:
|
||||
conn_params["verify_certs"] = False
|
||||
if username:
|
||||
if username and password:
|
||||
conn_params["http_auth"] = username + ":" + password
|
||||
if api_key:
|
||||
conn_params["api_key"] = api_key
|
||||
@@ -369,7 +367,7 @@ def migrate_indexes(
|
||||
}
|
||||
Index(new_index_name).create()
|
||||
Index(new_index_name).put_mapping(doc_type=doc, body=body)
|
||||
reindex(connections.get_connection(), aggregate_index_name, new_index_name)
|
||||
reindex(connections.get_connection(), aggregate_index_name, new_index_name) # pyright: ignore[reportArgumentType]
|
||||
Index(aggregate_index_name).delete()
|
||||
|
||||
for forensic_index in forensic_indexes:
|
||||
@@ -377,18 +375,18 @@ def migrate_indexes(
|
||||
|
||||
|
||||
def save_aggregate_report_to_elasticsearch(
|
||||
aggregate_report: OrderedDict[str, Any],
|
||||
aggregate_report: dict[str, Any],
|
||||
index_suffix: Optional[str] = None,
|
||||
index_prefix: Optional[str] = None,
|
||||
monthly_indexes: Optional[bool] = False,
|
||||
number_of_shards: Optional[int] = 1,
|
||||
number_of_replicas: Optional[int] = 0,
|
||||
number_of_shards: int = 1,
|
||||
number_of_replicas: int = 0,
|
||||
):
|
||||
"""
|
||||
Saves a parsed DMARC aggregate report to Elasticsearch
|
||||
|
||||
Args:
|
||||
aggregate_report (OrderedDict): A parsed forensic report
|
||||
aggregate_report (dict): A parsed forensic report
|
||||
index_suffix (str): The suffix of the name of the index to save to
|
||||
index_prefix (str): The prefix of the name of the index to save to
|
||||
monthly_indexes (bool): Use monthly indexes instead of daily indexes
|
||||
@@ -412,11 +410,11 @@ def save_aggregate_report_to_elasticsearch(
|
||||
else:
|
||||
index_date = begin_date.strftime("%Y-%m-%d")
|
||||
|
||||
org_name_query = Q(dict(match_phrase=dict(org_name=org_name)))
|
||||
report_id_query = Q(dict(match_phrase=dict(report_id=report_id)))
|
||||
domain_query = Q(dict(match_phrase={"published_policy.domain": domain}))
|
||||
begin_date_query = Q(dict(match=dict(date_begin=begin_date)))
|
||||
end_date_query = Q(dict(match=dict(date_end=end_date)))
|
||||
org_name_query = Q(dict(match_phrase=dict(org_name=org_name))) # type: ignore
|
||||
report_id_query = Q(dict(match_phrase=dict(report_id=report_id))) # pyright: ignore[reportArgumentType]
|
||||
domain_query = Q(dict(match_phrase={"published_policy.domain": domain})) # pyright: ignore[reportArgumentType]
|
||||
begin_date_query = Q(dict(range=dict(date_begin=dict(gte=begin_date)))) # pyright: ignore[reportArgumentType]
|
||||
end_date_query = Q(dict(range=dict(date_end=dict(lte=end_date)))) # pyright: ignore[reportArgumentType]
|
||||
|
||||
if index_suffix is not None:
|
||||
search_index = "dmarc_aggregate_{0}*".format(index_suffix)
|
||||
@@ -428,13 +426,12 @@ def save_aggregate_report_to_elasticsearch(
|
||||
query = org_name_query & report_id_query & domain_query
|
||||
query = query & begin_date_query & end_date_query
|
||||
search.query = query
|
||||
begin_date_human = begin_date.strftime("%Y-%m-%d %H:%M:%SZ")
|
||||
end_date_human = end_date.strftime("%Y-%m-%d %H:%M:%SZ")
|
||||
|
||||
try:
|
||||
existing = search.execute()
|
||||
except Exception as error_:
|
||||
begin_date_human = begin_date.strftime("%Y-%m-%d %H:%M:%SZ")
|
||||
end_date_human = end_date.strftime("%Y-%m-%d %H:%M:%SZ")
|
||||
|
||||
raise ElasticsearchError(
|
||||
"Elasticsearch's search for existing report \
|
||||
error: {}".format(error_.__str__())
|
||||
@@ -530,7 +527,7 @@ def save_aggregate_report_to_elasticsearch(
|
||||
number_of_shards=number_of_shards, number_of_replicas=number_of_replicas
|
||||
)
|
||||
create_indexes([index], index_settings)
|
||||
agg_doc.meta.index = index
|
||||
agg_doc.meta.index = index # pyright: ignore[reportOptionalMemberAccess, reportAttributeAccessIssue]
|
||||
|
||||
try:
|
||||
agg_doc.save()
|
||||
@@ -539,8 +536,8 @@ def save_aggregate_report_to_elasticsearch(
|
||||
|
||||
|
||||
def save_forensic_report_to_elasticsearch(
|
||||
forensic_report: OrderedDict[str, Any],
|
||||
index_suffix: Optional[any] = None,
|
||||
forensic_report: dict[str, Any],
|
||||
index_suffix: Optional[Any] = None,
|
||||
index_prefix: Optional[str] = None,
|
||||
monthly_indexes: Optional[bool] = False,
|
||||
number_of_shards: int = 1,
|
||||
@@ -550,7 +547,7 @@ def save_forensic_report_to_elasticsearch(
|
||||
Saves a parsed DMARC forensic report to Elasticsearch
|
||||
|
||||
Args:
|
||||
forensic_report (OrderedDict): A parsed forensic report
|
||||
forensic_report (dict): A parsed forensic report
|
||||
index_suffix (str): The suffix of the name of the index to save to
|
||||
index_prefix (str): The prefix of the name of the index to save to
|
||||
monthly_indexes (bool): Use monthly indexes instead of daily
|
||||
@@ -570,7 +567,7 @@ def save_forensic_report_to_elasticsearch(
|
||||
sample_date = forensic_report["parsed_sample"]["date"]
|
||||
sample_date = human_timestamp_to_datetime(sample_date)
|
||||
original_headers = forensic_report["parsed_sample"]["headers"]
|
||||
headers = OrderedDict()
|
||||
headers: dict[str, Any] = {}
|
||||
for original_header in original_headers:
|
||||
headers[original_header.lower()] = original_headers[original_header]
|
||||
|
||||
@@ -584,7 +581,7 @@ def save_forensic_report_to_elasticsearch(
|
||||
if index_prefix is not None:
|
||||
search_index = "{0}{1}".format(index_prefix, search_index)
|
||||
search = Search(index=search_index)
|
||||
q = Q(dict(match=dict(arrival_date=arrival_date_epoch_milliseconds)))
|
||||
q = Q(dict(match=dict(arrival_date=arrival_date_epoch_milliseconds))) # pyright: ignore[reportArgumentType]
|
||||
|
||||
from_ = None
|
||||
to_ = None
|
||||
@@ -599,7 +596,7 @@ def save_forensic_report_to_elasticsearch(
|
||||
|
||||
from_ = dict()
|
||||
from_["sample.headers.from"] = headers["from"]
|
||||
from_query = Q(dict(match_phrase=from_))
|
||||
from_query = Q(dict(match_phrase=from_)) # pyright: ignore[reportArgumentType]
|
||||
q = q & from_query
|
||||
if "to" in headers:
|
||||
# We convert the TO header from a string list to a flat string.
|
||||
@@ -611,12 +608,12 @@ def save_forensic_report_to_elasticsearch(
|
||||
|
||||
to_ = dict()
|
||||
to_["sample.headers.to"] = headers["to"]
|
||||
to_query = Q(dict(match_phrase=to_))
|
||||
to_query = Q(dict(match_phrase=to_)) # pyright: ignore[reportArgumentType]
|
||||
q = q & to_query
|
||||
if "subject" in headers:
|
||||
subject = headers["subject"]
|
||||
subject_query = {"match_phrase": {"sample.headers.subject": subject}}
|
||||
q = q & Q(subject_query)
|
||||
q = q & Q(subject_query) # pyright: ignore[reportArgumentType]
|
||||
|
||||
search.query = q
|
||||
existing = search.execute()
|
||||
@@ -694,7 +691,7 @@ def save_forensic_report_to_elasticsearch(
|
||||
number_of_shards=number_of_shards, number_of_replicas=number_of_replicas
|
||||
)
|
||||
create_indexes([index], index_settings)
|
||||
forensic_doc.meta.index = index
|
||||
forensic_doc.meta.index = index # pyright: ignore[reportAttributeAccessIssue, reportOptionalMemberAccess]
|
||||
try:
|
||||
forensic_doc.save()
|
||||
except Exception as e:
|
||||
@@ -706,18 +703,18 @@ def save_forensic_report_to_elasticsearch(
|
||||
|
||||
|
||||
def save_smtp_tls_report_to_elasticsearch(
|
||||
report: OrderedDict[str, Any],
|
||||
index_suffix: str = None,
|
||||
index_prefix: str = None,
|
||||
monthly_indexes: Optional[bool] = False,
|
||||
number_of_shards: Optional[int] = 1,
|
||||
number_of_replicas: Optional[int] = 0,
|
||||
report: dict[str, Any],
|
||||
index_suffix: Optional[str] = None,
|
||||
index_prefix: Optional[str] = None,
|
||||
monthly_indexes: bool = False,
|
||||
number_of_shards: int = 1,
|
||||
number_of_replicas: int = 0,
|
||||
):
|
||||
"""
|
||||
Saves a parsed SMTP TLS report to Elasticsearch
|
||||
|
||||
Args:
|
||||
report (OrderedDict): A parsed SMTP TLS report
|
||||
report (dict): A parsed SMTP TLS report
|
||||
index_suffix (str): The suffix of the name of the index to save to
|
||||
index_prefix (str): The prefix of the name of the index to save to
|
||||
monthly_indexes (bool): Use monthly indexes instead of daily indexes
|
||||
@@ -741,10 +738,10 @@ def save_smtp_tls_report_to_elasticsearch(
|
||||
report["begin_date"] = begin_date
|
||||
report["end_date"] = end_date
|
||||
|
||||
org_name_query = Q(dict(match_phrase=dict(org_name=org_name)))
|
||||
report_id_query = Q(dict(match_phrase=dict(report_id=report_id)))
|
||||
begin_date_query = Q(dict(match=dict(date_begin=begin_date)))
|
||||
end_date_query = Q(dict(match=dict(date_end=end_date)))
|
||||
org_name_query = Q(dict(match_phrase=dict(org_name=org_name))) # pyright: ignore[reportArgumentType]
|
||||
report_id_query = Q(dict(match_phrase=dict(report_id=report_id))) # pyright: ignore[reportArgumentType]
|
||||
begin_date_query = Q(dict(match=dict(date_begin=begin_date))) # pyright: ignore[reportArgumentType]
|
||||
end_date_query = Q(dict(match=dict(date_end=end_date))) # pyright: ignore[reportArgumentType]
|
||||
|
||||
if index_suffix is not None:
|
||||
search_index = "smtp_tls_{0}*".format(index_suffix)
|
||||
@@ -845,10 +842,10 @@ def save_smtp_tls_report_to_elasticsearch(
|
||||
additional_information_uri=additional_information_uri,
|
||||
failure_reason_code=failure_reason_code,
|
||||
)
|
||||
smtp_tls_doc.policies.append(policy_doc)
|
||||
smtp_tls_doc.policies.append(policy_doc) # pyright: ignore[reportCallIssue]
|
||||
|
||||
create_indexes([index], index_settings)
|
||||
smtp_tls_doc.meta.index = index
|
||||
smtp_tls_doc.meta.index = index # pyright: ignore[reportOptionalMemberAccess, reportAttributeAccessIssue]
|
||||
|
||||
try:
|
||||
smtp_tls_doc.save()
|
||||
|
||||
@@ -2,21 +2,18 @@
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from typing import Any
|
||||
|
||||
import logging
|
||||
import logging.handlers
|
||||
import json
|
||||
import threading
|
||||
from collections import OrderedDict
|
||||
from typing import Any
|
||||
|
||||
from pygelf import GelfTcpHandler, GelfTlsHandler, GelfUdpHandler
|
||||
|
||||
from parsedmarc import (
|
||||
parsed_aggregate_reports_to_csv_rows,
|
||||
parsed_forensic_reports_to_csv_rows,
|
||||
parsed_smtp_tls_reports_to_csv_rows,
|
||||
)
|
||||
from pygelf import GelfTcpHandler, GelfUdpHandler, GelfTlsHandler
|
||||
|
||||
|
||||
log_context_data = threading.local()
|
||||
|
||||
@@ -53,7 +50,7 @@ class GelfClient(object):
|
||||
)
|
||||
self.logger.addHandler(self.handler)
|
||||
|
||||
def save_aggregate_report_to_gelf(self, aggregate_reports: OrderedDict[str, Any]):
|
||||
def save_aggregate_report_to_gelf(self, aggregate_reports: list[dict[str, Any]]):
|
||||
rows = parsed_aggregate_reports_to_csv_rows(aggregate_reports)
|
||||
for row in rows:
|
||||
log_context_data.parsedmarc = row
|
||||
@@ -61,12 +58,14 @@ class GelfClient(object):
|
||||
|
||||
log_context_data.parsedmarc = None
|
||||
|
||||
def save_forensic_report_to_gelf(self, forensic_reports: OrderedDict[str, Any]):
|
||||
def save_forensic_report_to_gelf(self, forensic_reports: list[dict[str, Any]]):
|
||||
rows = parsed_forensic_reports_to_csv_rows(forensic_reports)
|
||||
for row in rows:
|
||||
self.logger.info(json.dumps(row))
|
||||
log_context_data.parsedmarc = row
|
||||
self.logger.info("parsedmarc forensic report")
|
||||
|
||||
def save_smtp_tls_report_to_gelf(self, smtp_tls_reports: OrderedDict[str, Any]):
|
||||
def save_smtp_tls_report_to_gelf(self, smtp_tls_reports: dict[str, Any]):
|
||||
rows = parsed_smtp_tls_reports_to_csv_rows(smtp_tls_reports)
|
||||
for row in rows:
|
||||
self.logger.info(json.dumps(row))
|
||||
log_context_data.parsedmarc = row
|
||||
self.logger.info("parsedmarc smtptls report")
|
||||
|
||||
@@ -2,19 +2,16 @@
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from typing import Any, Optional
|
||||
from ssl import SSLContext
|
||||
|
||||
import json
|
||||
from ssl import create_default_context
|
||||
from ssl import SSLContext, create_default_context
|
||||
from typing import Any, Optional, Union
|
||||
|
||||
from kafka import KafkaProducer
|
||||
from kafka.errors import NoBrokersAvailable, UnknownTopicOrPartitionError
|
||||
from collections import OrderedDict
|
||||
from parsedmarc.utils import human_timestamp_to_datetime
|
||||
|
||||
from parsedmarc import __version__
|
||||
from parsedmarc.log import logger
|
||||
from parsedmarc.utils import human_timestamp_to_datetime
|
||||
|
||||
|
||||
class KafkaError(RuntimeError):
|
||||
@@ -49,7 +46,7 @@ class KafkaClient(object):
|
||||
``$ConnectionString``, and the password is the
|
||||
Azure Event Hub connection string.
|
||||
"""
|
||||
config = dict(
|
||||
config: dict[str, Any] = dict(
|
||||
value_serializer=lambda v: json.dumps(v).encode("utf-8"),
|
||||
bootstrap_servers=kafka_hosts,
|
||||
client_id="parsedmarc-{0}".format(__version__),
|
||||
@@ -66,7 +63,7 @@ class KafkaClient(object):
|
||||
raise KafkaError("No Kafka brokers available")
|
||||
|
||||
@staticmethod
|
||||
def strip_metadata(report: OrderedDict[str, Any]):
|
||||
def strip_metadata(report: dict[str, Any]):
|
||||
"""
|
||||
Duplicates org_name, org_email and report_id into JSON root
|
||||
and removes report_metadata key to bring it more inline
|
||||
@@ -80,7 +77,7 @@ class KafkaClient(object):
|
||||
return report
|
||||
|
||||
@staticmethod
|
||||
def generate_date_range(report: OrderedDict[str, Any]):
|
||||
def generate_date_range(report: dict[str, Any]):
|
||||
"""
|
||||
Creates a date_range timestamp with format YYYY-MM-DD-T-HH:MM:SS
|
||||
based on begin and end dates for easier parsing in Kibana.
|
||||
@@ -98,7 +95,9 @@ class KafkaClient(object):
|
||||
return date_range
|
||||
|
||||
def save_aggregate_reports_to_kafka(
|
||||
self, aggregate_reports: list[OrderedDict][str, Any], aggregate_topic: str
|
||||
self,
|
||||
aggregate_reports: Union[dict[str, Any], list[dict[str, Any]]],
|
||||
aggregate_topic: str,
|
||||
):
|
||||
"""
|
||||
Saves aggregate DMARC reports to Kafka
|
||||
@@ -109,9 +108,7 @@ class KafkaClient(object):
|
||||
aggregate_topic (str): The name of the Kafka topic
|
||||
|
||||
"""
|
||||
if isinstance(aggregate_reports, dict) or isinstance(
|
||||
aggregate_reports, OrderedDict
|
||||
):
|
||||
if isinstance(aggregate_reports, dict):
|
||||
aggregate_reports = [aggregate_reports]
|
||||
|
||||
if len(aggregate_reports) < 1:
|
||||
@@ -143,7 +140,9 @@ class KafkaClient(object):
|
||||
raise KafkaError("Kafka error: {0}".format(e.__str__()))
|
||||
|
||||
def save_forensic_reports_to_kafka(
|
||||
self, forensic_reports: OrderedDict[str, Any], forensic_topic: str
|
||||
self,
|
||||
forensic_reports: Union[dict[str, Any], list[dict[str, Any]]],
|
||||
forensic_topic: str,
|
||||
):
|
||||
"""
|
||||
Saves forensic DMARC reports to Kafka, sends individual
|
||||
@@ -175,7 +174,9 @@ class KafkaClient(object):
|
||||
raise KafkaError("Kafka error: {0}".format(e.__str__()))
|
||||
|
||||
def save_smtp_tls_reports_to_kafka(
|
||||
self, smtp_tls_reports: list[OrderedDict[str, Any]], smtp_tls_topic: str
|
||||
self,
|
||||
smtp_tls_reports: Union[list[dict[str, Any]], dict[str, Any]],
|
||||
smtp_tls_topic: str,
|
||||
):
|
||||
"""
|
||||
Saves SMTP TLS reports to Kafka, sends individual
|
||||
|
||||
@@ -3,13 +3,13 @@
|
||||
from __future__ import annotations
|
||||
|
||||
from typing import Any
|
||||
from collections import OrderedDict
|
||||
|
||||
from parsedmarc.log import logger
|
||||
from azure.core.exceptions import HttpResponseError
|
||||
from azure.identity import ClientSecretCredential
|
||||
from azure.monitor.ingestion import LogsIngestionClient
|
||||
|
||||
from parsedmarc.log import logger
|
||||
|
||||
|
||||
class LogAnalyticsException(Exception):
|
||||
"""Raised when an Elasticsearch error occurs"""
|
||||
@@ -110,7 +110,7 @@ class LogAnalyticsClient(object):
|
||||
|
||||
def publish_json(
|
||||
self,
|
||||
results: OrderedDict[str, OrderedDict[str, Any]],
|
||||
results,
|
||||
logs_client: LogsIngestionClient,
|
||||
dcr_stream: str,
|
||||
):
|
||||
@@ -133,7 +133,7 @@ class LogAnalyticsClient(object):
|
||||
|
||||
def publish_results(
|
||||
self,
|
||||
results: OrderedDict[str, OrderedDict[str, Any]],
|
||||
results: dict[str, Any],
|
||||
save_aggregate: bool,
|
||||
save_forensic: bool,
|
||||
save_smtp_tls: bool,
|
||||
|
||||
@@ -10,6 +10,7 @@ from typing import List
|
||||
|
||||
from google.auth.transport.requests import Request
|
||||
from google.oauth2.credentials import Credentials
|
||||
from google.oauth2 import service_account
|
||||
from google_auth_oauthlib.flow import InstalledAppFlow
|
||||
from googleapiclient.discovery import build
|
||||
from googleapiclient.errors import HttpError
|
||||
@@ -18,7 +19,29 @@ from parsedmarc.log import logger
|
||||
from parsedmarc.mail.mailbox_connection import MailboxConnection
|
||||
|
||||
|
||||
def _get_creds(token_file, credentials_file, scopes, oauth2_port):
|
||||
def _get_creds(
|
||||
token_file,
|
||||
credentials_file,
|
||||
scopes,
|
||||
oauth2_port,
|
||||
auth_mode="installed_app",
|
||||
service_account_user=None,
|
||||
):
|
||||
normalized_auth_mode = (auth_mode or "installed_app").strip().lower()
|
||||
if normalized_auth_mode == "service_account":
|
||||
creds = service_account.Credentials.from_service_account_file(
|
||||
credentials_file,
|
||||
scopes=scopes,
|
||||
)
|
||||
if service_account_user:
|
||||
creds = creds.with_subject(service_account_user)
|
||||
return creds
|
||||
if normalized_auth_mode != "installed_app":
|
||||
raise ValueError(
|
||||
f"Unsupported Gmail auth_mode '{auth_mode}'. "
|
||||
"Expected 'installed_app' or 'service_account'."
|
||||
)
|
||||
|
||||
creds = None
|
||||
|
||||
if Path(token_file).exists():
|
||||
@@ -47,8 +70,17 @@ class GmailConnection(MailboxConnection):
|
||||
reports_folder: str,
|
||||
oauth2_port: int,
|
||||
paginate_messages: bool,
|
||||
auth_mode: str = "installed_app",
|
||||
service_account_user: str | None = None,
|
||||
):
|
||||
creds = _get_creds(token_file, credentials_file, scopes, oauth2_port)
|
||||
creds = _get_creds(
|
||||
token_file,
|
||||
credentials_file,
|
||||
scopes,
|
||||
oauth2_port,
|
||||
auth_mode=auth_mode,
|
||||
service_account_user=service_account_user,
|
||||
)
|
||||
self.service = build("gmail", "v1", credentials=creds)
|
||||
self.include_spam_trash = include_spam_trash
|
||||
self.reports_label_id = self._find_label_id_for_label(reports_folder)
|
||||
@@ -116,17 +148,17 @@ class GmailConnection(MailboxConnection):
|
||||
else:
|
||||
return [id for id in self._fetch_all_message_ids(reports_label_id)]
|
||||
|
||||
def fetch_message(self, message_id):
|
||||
def fetch_message(self, message_id) -> str:
|
||||
msg = (
|
||||
self.service.users()
|
||||
.messages()
|
||||
.get(userId="me", id=message_id, format="raw")
|
||||
.execute()
|
||||
)
|
||||
return urlsafe_b64decode(msg["raw"])
|
||||
return urlsafe_b64decode(msg["raw"]).decode(errors="replace")
|
||||
|
||||
def delete_message(self, message_id: str):
|
||||
self.service.users().messages().delete(userId="me", id=message_id)
|
||||
self.service.users().messages().delete(userId="me", id=message_id).execute()
|
||||
|
||||
def move_message(self, message_id: str, folder_name: str):
|
||||
label_id = self._find_label_id_for_label(folder_name)
|
||||
|
||||
@@ -6,29 +6,35 @@ from enum import Enum
|
||||
from functools import lru_cache
|
||||
from pathlib import Path
|
||||
from time import sleep
|
||||
from typing import List, Optional
|
||||
from typing import Any, List, Optional, Union
|
||||
|
||||
from azure.identity import (
|
||||
UsernamePasswordCredential,
|
||||
DeviceCodeCredential,
|
||||
ClientSecretCredential,
|
||||
CertificateCredential,
|
||||
TokenCachePersistenceOptions,
|
||||
AuthenticationRecord,
|
||||
)
|
||||
from msgraph.core import GraphClient
|
||||
from requests.exceptions import RequestException
|
||||
|
||||
from parsedmarc.log import logger
|
||||
from parsedmarc.mail.mailbox_connection import MailboxConnection
|
||||
|
||||
GRAPH_REQUEST_RETRY_ATTEMPTS = 3
|
||||
GRAPH_REQUEST_RETRY_DELAY_SECONDS = 5
|
||||
|
||||
|
||||
class AuthMethod(Enum):
|
||||
DeviceCode = 1
|
||||
UsernamePassword = 2
|
||||
ClientSecret = 3
|
||||
Certificate = 4
|
||||
|
||||
|
||||
def _get_cache_args(token_path: Path, allow_unencrypted_storage):
|
||||
cache_args = {
|
||||
cache_args: dict[str, Any] = {
|
||||
"cache_persistence_options": TokenCachePersistenceOptions(
|
||||
name="parsedmarc", allow_unencrypted_storage=allow_unencrypted_storage
|
||||
)
|
||||
@@ -83,30 +89,55 @@ def _generate_credential(auth_method: str, token_path: Path, **kwargs):
|
||||
tenant_id=kwargs["tenant_id"],
|
||||
client_secret=kwargs["client_secret"],
|
||||
)
|
||||
elif auth_method == AuthMethod.Certificate.name:
|
||||
cert_path = kwargs.get("certificate_path")
|
||||
if not cert_path:
|
||||
raise ValueError(
|
||||
"certificate_path is required when auth_method is 'Certificate'"
|
||||
)
|
||||
credential = CertificateCredential(
|
||||
client_id=kwargs["client_id"],
|
||||
tenant_id=kwargs["tenant_id"],
|
||||
certificate_path=cert_path,
|
||||
password=kwargs.get("certificate_password"),
|
||||
)
|
||||
else:
|
||||
raise RuntimeError(f"Auth method {auth_method} not found")
|
||||
return credential
|
||||
|
||||
|
||||
class MSGraphConnection(MailboxConnection):
|
||||
_WELL_KNOWN_FOLDERS = {
|
||||
"inbox": "inbox",
|
||||
"archive": "archive",
|
||||
"drafts": "drafts",
|
||||
"sentitems": "sentitems",
|
||||
"deleteditems": "deleteditems",
|
||||
"junkemail": "junkemail",
|
||||
}
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
auth_method: str,
|
||||
mailbox: str,
|
||||
graph_url: str,
|
||||
client_id: str,
|
||||
client_secret: str,
|
||||
username: str,
|
||||
password: str,
|
||||
client_secret: Optional[str],
|
||||
username: Optional[str],
|
||||
password: Optional[str],
|
||||
tenant_id: str,
|
||||
token_file: str,
|
||||
allow_unencrypted_storage: bool,
|
||||
certificate_path: Optional[str] = None,
|
||||
certificate_password: Optional[Union[str, bytes]] = None,
|
||||
):
|
||||
token_path = Path(token_file)
|
||||
credential = _generate_credential(
|
||||
auth_method,
|
||||
client_id=client_id,
|
||||
client_secret=client_secret,
|
||||
certificate_path=certificate_path,
|
||||
certificate_password=certificate_password,
|
||||
username=username,
|
||||
password=password,
|
||||
tenant_id=tenant_id,
|
||||
@@ -117,10 +148,10 @@ class MSGraphConnection(MailboxConnection):
|
||||
"credential": credential,
|
||||
"cloud": graph_url,
|
||||
}
|
||||
if not isinstance(credential, ClientSecretCredential):
|
||||
if not isinstance(credential, (ClientSecretCredential, CertificateCredential)):
|
||||
scopes = ["Mail.ReadWrite"]
|
||||
# Detect if mailbox is shared
|
||||
if mailbox and username != mailbox:
|
||||
if mailbox and username and username != mailbox:
|
||||
scopes = ["Mail.ReadWrite.Shared"]
|
||||
auth_record = credential.authenticate(scopes=scopes)
|
||||
_cache_auth_record(auth_record, token_path)
|
||||
@@ -129,6 +160,23 @@ class MSGraphConnection(MailboxConnection):
|
||||
self._client = GraphClient(**client_params)
|
||||
self.mailbox_name = mailbox
|
||||
|
||||
def _request_with_retries(self, method_name: str, *args, **kwargs):
|
||||
for attempt in range(1, GRAPH_REQUEST_RETRY_ATTEMPTS + 1):
|
||||
try:
|
||||
return getattr(self._client, method_name)(*args, **kwargs)
|
||||
except RequestException as error:
|
||||
if attempt == GRAPH_REQUEST_RETRY_ATTEMPTS:
|
||||
raise
|
||||
logger.warning(
|
||||
"Transient MS Graph %s error on attempt %s/%s: %s",
|
||||
method_name.upper(),
|
||||
attempt,
|
||||
GRAPH_REQUEST_RETRY_ATTEMPTS,
|
||||
error,
|
||||
)
|
||||
sleep(GRAPH_REQUEST_RETRY_DELAY_SECONDS)
|
||||
raise RuntimeError("no retry attempts configured")
|
||||
|
||||
def create_folder(self, folder_name: str):
|
||||
sub_url = ""
|
||||
path_parts = folder_name.split("/")
|
||||
@@ -143,7 +191,7 @@ class MSGraphConnection(MailboxConnection):
|
||||
|
||||
request_body = {"displayName": folder_name}
|
||||
request_url = f"/users/{self.mailbox_name}/mailFolders{sub_url}"
|
||||
resp = self._client.post(request_url, json=request_body)
|
||||
resp = self._request_with_retries("post", request_url, json=request_body)
|
||||
if resp.status_code == 409:
|
||||
logger.debug(f"Folder {folder_name} already exists, skipping creation")
|
||||
elif resp.status_code == 201:
|
||||
@@ -151,9 +199,9 @@ class MSGraphConnection(MailboxConnection):
|
||||
else:
|
||||
logger.warning(f"Unknown response {resp.status_code} {resp.json()}")
|
||||
|
||||
def fetch_messages(self, folder_name: str, **kwargs) -> List[str]:
|
||||
def fetch_messages(self, reports_folder: str, **kwargs) -> List[str]:
|
||||
"""Returns a list of message UIDs in the specified folder"""
|
||||
folder_id = self._find_folder_id_from_folder_path(folder_name)
|
||||
folder_id = self._find_folder_id_from_folder_path(reports_folder)
|
||||
url = f"/users/{self.mailbox_name}/mailFolders/{folder_id}/messages"
|
||||
since = kwargs.get("since")
|
||||
if not since:
|
||||
@@ -166,14 +214,14 @@ class MSGraphConnection(MailboxConnection):
|
||||
|
||||
def _get_all_messages(self, url, batch_size, since):
|
||||
messages: list
|
||||
params = {"$select": "id"}
|
||||
params: dict[str, Union[str, int]] = {"$select": "id"}
|
||||
if since:
|
||||
params["$filter"] = f"receivedDateTime ge {since}"
|
||||
if batch_size and batch_size > 0:
|
||||
params["$top"] = batch_size
|
||||
else:
|
||||
params["$top"] = 100
|
||||
result = self._client.get(url, params=params)
|
||||
result = self._request_with_retries("get", url, params=params)
|
||||
if result.status_code != 200:
|
||||
raise RuntimeError(f"Failed to fetch messages {result.text}")
|
||||
messages = result.json()["value"]
|
||||
@@ -181,7 +229,7 @@ class MSGraphConnection(MailboxConnection):
|
||||
while "@odata.nextLink" in result.json() and (
|
||||
since is not None or (batch_size == 0 or batch_size - len(messages) > 0)
|
||||
):
|
||||
result = self._client.get(result.json()["@odata.nextLink"])
|
||||
result = self._request_with_retries("get", result.json()["@odata.nextLink"])
|
||||
if result.status_code != 200:
|
||||
raise RuntimeError(f"Failed to fetch messages {result.text}")
|
||||
messages.extend(result.json()["value"])
|
||||
@@ -190,7 +238,7 @@ class MSGraphConnection(MailboxConnection):
|
||||
def mark_message_read(self, message_id: str):
|
||||
"""Marks a message as read"""
|
||||
url = f"/users/{self.mailbox_name}/messages/{message_id}"
|
||||
resp = self._client.patch(url, json={"isRead": "true"})
|
||||
resp = self._request_with_retries("patch", url, json={"isRead": "true"})
|
||||
if resp.status_code != 200:
|
||||
raise RuntimeWarning(
|
||||
f"Failed to mark message read{resp.status_code}: {resp.json()}"
|
||||
@@ -198,7 +246,7 @@ class MSGraphConnection(MailboxConnection):
|
||||
|
||||
def fetch_message(self, message_id: str, **kwargs):
|
||||
url = f"/users/{self.mailbox_name}/messages/{message_id}/$value"
|
||||
result = self._client.get(url)
|
||||
result = self._request_with_retries("get", url)
|
||||
if result.status_code != 200:
|
||||
raise RuntimeWarning(
|
||||
f"Failed to fetch message{result.status_code}: {result.json()}"
|
||||
@@ -210,7 +258,7 @@ class MSGraphConnection(MailboxConnection):
|
||||
|
||||
def delete_message(self, message_id: str):
|
||||
url = f"/users/{self.mailbox_name}/messages/{message_id}"
|
||||
resp = self._client.delete(url)
|
||||
resp = self._request_with_retries("delete", url)
|
||||
if resp.status_code != 204:
|
||||
raise RuntimeWarning(
|
||||
f"Failed to delete message {resp.status_code}: {resp.json()}"
|
||||
@@ -220,7 +268,7 @@ class MSGraphConnection(MailboxConnection):
|
||||
folder_id = self._find_folder_id_from_folder_path(folder_name)
|
||||
request_body = {"destinationId": folder_id}
|
||||
url = f"/users/{self.mailbox_name}/messages/{message_id}/move"
|
||||
resp = self._client.post(url, json=request_body)
|
||||
resp = self._request_with_retries("post", url, json=request_body)
|
||||
if resp.status_code != 201:
|
||||
raise RuntimeWarning(
|
||||
f"Failed to move message {resp.status_code}: {resp.json()}"
|
||||
@@ -248,6 +296,19 @@ class MSGraphConnection(MailboxConnection):
|
||||
else:
|
||||
return self._find_folder_id_with_parent(folder_name, None)
|
||||
|
||||
def _get_well_known_folder_id(self, folder_name: str) -> Optional[str]:
|
||||
folder_key = folder_name.lower().replace(" ", "").replace("-", "")
|
||||
alias = self._WELL_KNOWN_FOLDERS.get(folder_key)
|
||||
if alias is None:
|
||||
return None
|
||||
|
||||
url = f"/users/{self.mailbox_name}/mailFolders/{alias}?$select=id,displayName"
|
||||
folder_resp = self._request_with_retries("get", url)
|
||||
if folder_resp.status_code != 200:
|
||||
return None
|
||||
payload = folder_resp.json()
|
||||
return payload.get("id")
|
||||
|
||||
def _find_folder_id_with_parent(
|
||||
self, folder_name: str, parent_folder_id: Optional[str]
|
||||
):
|
||||
@@ -256,8 +317,12 @@ class MSGraphConnection(MailboxConnection):
|
||||
sub_url = f"/{parent_folder_id}/childFolders"
|
||||
url = f"/users/{self.mailbox_name}/mailFolders{sub_url}"
|
||||
filter = f"?$filter=displayName eq '{folder_name}'"
|
||||
folders_resp = self._client.get(url + filter)
|
||||
folders_resp = self._request_with_retries("get", url + filter)
|
||||
if folders_resp.status_code != 200:
|
||||
if parent_folder_id is None:
|
||||
well_known_folder_id = self._get_well_known_folder_id(folder_name)
|
||||
if well_known_folder_id:
|
||||
return well_known_folder_id
|
||||
raise RuntimeWarning(f"Failed to list folders.{folders_resp.json()}")
|
||||
folders: list = folders_resp.json()["value"]
|
||||
matched_folders = [
|
||||
|
||||
@@ -2,7 +2,7 @@
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from typing import Optional
|
||||
from typing import cast
|
||||
|
||||
from time import sleep
|
||||
|
||||
@@ -17,15 +17,14 @@ from parsedmarc.mail.mailbox_connection import MailboxConnection
|
||||
class IMAPConnection(MailboxConnection):
|
||||
def __init__(
|
||||
self,
|
||||
host: Optional[str] = None,
|
||||
*,
|
||||
user: Optional[str] = None,
|
||||
password: Optional[str] = None,
|
||||
port: Optional[str] = None,
|
||||
ssl: Optional[bool] = True,
|
||||
verify: Optional[bool] = True,
|
||||
timeout: Optional[int] = 30,
|
||||
max_retries: Optional[int] = 4,
|
||||
host: str,
|
||||
user: str,
|
||||
password: str,
|
||||
port: int = 993,
|
||||
ssl: bool = True,
|
||||
verify: bool = True,
|
||||
timeout: int = 30,
|
||||
max_retries: int = 4,
|
||||
):
|
||||
self._username = user
|
||||
self._password = password
|
||||
@@ -47,19 +46,37 @@ class IMAPConnection(MailboxConnection):
|
||||
def fetch_messages(self, reports_folder: str, **kwargs):
|
||||
self._client.select_folder(reports_folder)
|
||||
since = kwargs.get("since")
|
||||
if since:
|
||||
return self._client.search(["SINCE", since])
|
||||
if since is not None:
|
||||
return self._client.search(f"SINCE {since}")
|
||||
else:
|
||||
return self._client.search()
|
||||
|
||||
def fetch_message(self, message_id: int):
|
||||
return self._client.fetch_message(message_id, parse=False)
|
||||
return cast(str, self._client.fetch_message(message_id, parse=False))
|
||||
|
||||
def delete_message(self, message_id: int):
|
||||
self._client.delete_messages([message_id])
|
||||
try:
|
||||
self._client.delete_messages([message_id])
|
||||
except IMAPClientError as error:
|
||||
logger.warning(
|
||||
"IMAP delete fallback for message %s due to server error: %s",
|
||||
message_id,
|
||||
error,
|
||||
)
|
||||
self._client.add_flags([message_id], [r"\Deleted"], silent=True)
|
||||
self._client.expunge()
|
||||
|
||||
def move_message(self, message_id: int, folder_name: str):
|
||||
self._client.move_messages([message_id], folder_name)
|
||||
try:
|
||||
self._client.move_messages([message_id], folder_name)
|
||||
except IMAPClientError as error:
|
||||
logger.warning(
|
||||
"IMAP move fallback for message %s due to server error: %s",
|
||||
message_id,
|
||||
error,
|
||||
)
|
||||
self._client.copy([message_id], folder_name)
|
||||
self.delete_message(message_id)
|
||||
|
||||
def keepalive(self):
|
||||
self._client.noop()
|
||||
|
||||
@@ -13,16 +13,16 @@ class MailboxConnection(ABC):
|
||||
def create_folder(self, folder_name: str):
|
||||
raise NotImplementedError
|
||||
|
||||
def fetch_messages(self, reports_folder: str, **kwargs) -> list[str]:
|
||||
def fetch_messages(self, reports_folder: str, **kwargs):
|
||||
raise NotImplementedError
|
||||
|
||||
def fetch_message(self, message_id) -> str:
|
||||
raise NotImplementedError
|
||||
|
||||
def delete_message(self, message_id: str):
|
||||
def delete_message(self, message_id):
|
||||
raise NotImplementedError
|
||||
|
||||
def move_message(self, message_id: str, folder_name: str):
|
||||
def move_message(self, message_id, folder_name: str):
|
||||
raise NotImplementedError
|
||||
|
||||
def keepalive(self):
|
||||
|
||||
@@ -2,21 +2,20 @@
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from typing import Optional
|
||||
|
||||
import mailbox
|
||||
import os
|
||||
from time import sleep
|
||||
from typing import Dict
|
||||
|
||||
from parsedmarc.log import logger
|
||||
from parsedmarc.mail.mailbox_connection import MailboxConnection
|
||||
import mailbox
|
||||
import os
|
||||
|
||||
|
||||
class MaildirConnection(MailboxConnection):
|
||||
def __init__(
|
||||
self,
|
||||
maildir_path: Optional[bool] = None,
|
||||
maildir_create: Optional[bool] = False,
|
||||
maildir_path: str,
|
||||
maildir_create: bool = False,
|
||||
):
|
||||
self._maildir_path = maildir_path
|
||||
self._maildir_create = maildir_create
|
||||
@@ -33,27 +32,31 @@ class MaildirConnection(MailboxConnection):
|
||||
)
|
||||
raise Exception(ex)
|
||||
self._client = mailbox.Maildir(maildir_path, create=maildir_create)
|
||||
self._subfolder_client = {}
|
||||
self._subfolder_client: Dict[str, mailbox.Maildir] = {}
|
||||
|
||||
def create_folder(self, folder_name: str):
|
||||
self._subfolder_client[folder_name] = self._client.add_folder(folder_name)
|
||||
self._client.add_folder(folder_name)
|
||||
|
||||
def fetch_messages(self, reports_folder: str, **kwargs):
|
||||
return self._client.keys()
|
||||
|
||||
def fetch_message(self, message_id: str):
|
||||
return self._client.get(message_id).as_string()
|
||||
def fetch_message(self, message_id: str) -> str:
|
||||
msg = self._client.get(message_id)
|
||||
if msg is not None:
|
||||
msg = msg.as_string()
|
||||
if msg is not None:
|
||||
return msg
|
||||
return ""
|
||||
|
||||
def delete_message(self, message_id: str):
|
||||
self._client.remove(message_id)
|
||||
|
||||
def move_message(self, message_id: str, folder_name: str):
|
||||
message_data = self._client.get(message_id)
|
||||
if folder_name not in self._subfolder_client.keys():
|
||||
self._subfolder_client = mailbox.Maildir(
|
||||
os.join(self.maildir_path, folder_name), create=self.maildir_create
|
||||
)
|
||||
if message_data is None:
|
||||
return
|
||||
if folder_name not in self._subfolder_client:
|
||||
self._subfolder_client[folder_name] = self._client.add_folder(folder_name)
|
||||
self._subfolder_client[folder_name].add(message_data)
|
||||
self._client.remove(message_id)
|
||||
|
||||
|
||||
@@ -2,30 +2,31 @@
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from typing import Optional, Union, Any
|
||||
|
||||
from collections import OrderedDict
|
||||
from typing import Any, Optional, Union
|
||||
|
||||
import boto3
|
||||
from opensearchpy import (
|
||||
Q,
|
||||
connections,
|
||||
Object,
|
||||
AWSV4SignerAuth,
|
||||
Boolean,
|
||||
Date,
|
||||
Document,
|
||||
Index,
|
||||
Nested,
|
||||
InnerDoc,
|
||||
Integer,
|
||||
Text,
|
||||
Boolean,
|
||||
Ip,
|
||||
Date,
|
||||
Nested,
|
||||
Object,
|
||||
Q,
|
||||
RequestsHttpConnection,
|
||||
Search,
|
||||
Text,
|
||||
connections,
|
||||
)
|
||||
from opensearchpy.helpers import reindex
|
||||
|
||||
from parsedmarc import InvalidForensicReport
|
||||
from parsedmarc.log import logger
|
||||
from parsedmarc.utils import human_timestamp_to_datetime
|
||||
from parsedmarc import InvalidForensicReport
|
||||
|
||||
|
||||
class OpenSearchError(Exception):
|
||||
@@ -104,7 +105,7 @@ class _AggregateReportDoc(Document):
|
||||
def add_spf_result(self, domain: str, scope: str, result: _SPFResult):
|
||||
self.spf_results.append(_SPFResult(domain=domain, scope=scope, result=result))
|
||||
|
||||
def save(self, **kwargs):
|
||||
def save(self, **kwargs): # pyright: ignore[reportIncompatibleMethodOverride]
|
||||
self.passed_dmarc = False
|
||||
self.passed_dmarc = self.spf_aligned or self.dkim_aligned
|
||||
|
||||
@@ -203,11 +204,11 @@ class _SMTPTLSPolicyDoc(InnerDoc):
|
||||
|
||||
def add_failure_details(
|
||||
self,
|
||||
result_type: str,
|
||||
ip_address: str,
|
||||
receiving_ip: str,
|
||||
receiving_mx_helo: str,
|
||||
failed_session_count: int,
|
||||
result_type: Optional[str] = None,
|
||||
ip_address: Optional[str] = None,
|
||||
receiving_ip: Optional[str] = None,
|
||||
receiving_mx_helo: Optional[str] = None,
|
||||
failed_session_count: Optional[int] = None,
|
||||
sending_mta_ip: Optional[str] = None,
|
||||
receiving_mx_hostname: Optional[str] = None,
|
||||
additional_information_uri: Optional[str] = None,
|
||||
@@ -274,6 +275,9 @@ def set_hosts(
|
||||
password: Optional[str] = None,
|
||||
api_key: Optional[str] = None,
|
||||
timeout: Optional[float] = 60.0,
|
||||
auth_type: str = "basic",
|
||||
aws_region: Optional[str] = None,
|
||||
aws_service: str = "es",
|
||||
):
|
||||
"""
|
||||
Sets the OpenSearch hosts to use
|
||||
@@ -286,6 +290,9 @@ def set_hosts(
|
||||
password (str): The password to use for authentication
|
||||
api_key (str): The Base64 encoded API key to use for authentication
|
||||
timeout (float): Timeout in seconds
|
||||
auth_type (str): OpenSearch auth mode: basic (default) or awssigv4
|
||||
aws_region (str): AWS region for SigV4 auth (required for awssigv4)
|
||||
aws_service (str): AWS service for SigV4 signing (default: es)
|
||||
"""
|
||||
if not isinstance(hosts, list):
|
||||
hosts = [hosts]
|
||||
@@ -297,10 +304,32 @@ def set_hosts(
|
||||
conn_params["ca_certs"] = ssl_cert_path
|
||||
else:
|
||||
conn_params["verify_certs"] = False
|
||||
if username:
|
||||
conn_params["http_auth"] = username + ":" + password
|
||||
if api_key:
|
||||
conn_params["api_key"] = api_key
|
||||
normalized_auth_type = (auth_type or "basic").strip().lower()
|
||||
if normalized_auth_type == "awssigv4":
|
||||
if not aws_region:
|
||||
raise OpenSearchError(
|
||||
"OpenSearch AWS SigV4 auth requires 'aws_region' to be set"
|
||||
)
|
||||
session = boto3.Session()
|
||||
credentials = session.get_credentials()
|
||||
if credentials is None:
|
||||
raise OpenSearchError(
|
||||
"Unable to load AWS credentials for OpenSearch SigV4 authentication"
|
||||
)
|
||||
conn_params["http_auth"] = AWSV4SignerAuth(
|
||||
credentials, aws_region, aws_service
|
||||
)
|
||||
conn_params["connection_class"] = RequestsHttpConnection
|
||||
elif normalized_auth_type == "basic":
|
||||
if username and password:
|
||||
conn_params["http_auth"] = username + ":" + password
|
||||
if api_key:
|
||||
conn_params["api_key"] = api_key
|
||||
else:
|
||||
raise OpenSearchError(
|
||||
f"Unsupported OpenSearch auth_type '{auth_type}'. "
|
||||
"Expected 'basic' or 'awssigv4'."
|
||||
)
|
||||
connections.create_connection(**conn_params)
|
||||
|
||||
|
||||
@@ -376,19 +405,19 @@ def migrate_indexes(
|
||||
pass
|
||||
|
||||
|
||||
def save_aggregate_report_to_elasticsearch(
|
||||
aggregate_report: OrderedDict[str, Any],
|
||||
def save_aggregate_report_to_opensearch(
|
||||
aggregate_report: dict[str, Any],
|
||||
index_suffix: Optional[str] = None,
|
||||
index_prefix: Optional[str] = None,
|
||||
monthly_indexes: Optional[bool] = False,
|
||||
number_of_shards: Optional[int] = 1,
|
||||
number_of_replicas: Optional[int] = 0,
|
||||
monthly_indexes: bool = False,
|
||||
number_of_shards: int = 1,
|
||||
number_of_replicas: int = 0,
|
||||
):
|
||||
"""
|
||||
Saves a parsed DMARC aggregate report to OpenSearch
|
||||
|
||||
Args:
|
||||
aggregate_report (OrderedDict): A parsed forensic report
|
||||
aggregate_report (dict): A parsed forensic report
|
||||
index_suffix (str): The suffix of the name of the index to save to
|
||||
index_prefix (str): The prefix of the name of the index to save to
|
||||
monthly_indexes (bool): Use monthly indexes instead of daily indexes
|
||||
@@ -415,8 +444,8 @@ def save_aggregate_report_to_elasticsearch(
|
||||
org_name_query = Q(dict(match_phrase=dict(org_name=org_name)))
|
||||
report_id_query = Q(dict(match_phrase=dict(report_id=report_id)))
|
||||
domain_query = Q(dict(match_phrase={"published_policy.domain": domain}))
|
||||
begin_date_query = Q(dict(match=dict(date_begin=begin_date)))
|
||||
end_date_query = Q(dict(match=dict(date_end=end_date)))
|
||||
begin_date_query = Q(dict(range=dict(date_begin=dict(gte=begin_date))))
|
||||
end_date_query = Q(dict(range=dict(date_end=dict(lte=end_date))))
|
||||
|
||||
if index_suffix is not None:
|
||||
search_index = "dmarc_aggregate_{0}*".format(index_suffix)
|
||||
@@ -428,13 +457,12 @@ def save_aggregate_report_to_elasticsearch(
|
||||
query = org_name_query & report_id_query & domain_query
|
||||
query = query & begin_date_query & end_date_query
|
||||
search.query = query
|
||||
begin_date_human = begin_date.strftime("%Y-%m-%d %H:%M:%SZ")
|
||||
end_date_human = end_date.strftime("%Y-%m-%d %H:%M:%SZ")
|
||||
|
||||
try:
|
||||
existing = search.execute()
|
||||
except Exception as error_:
|
||||
begin_date_human = begin_date.strftime("%Y-%m-%d %H:%M:%SZ")
|
||||
end_date_human = end_date.strftime("%Y-%m-%d %H:%M:%SZ")
|
||||
|
||||
raise OpenSearchError(
|
||||
"OpenSearch's search for existing report \
|
||||
error: {}".format(error_.__str__())
|
||||
@@ -538,11 +566,11 @@ def save_aggregate_report_to_elasticsearch(
|
||||
raise OpenSearchError("OpenSearch error: {0}".format(e.__str__()))
|
||||
|
||||
|
||||
def save_forensic_report_to_elasticsearch(
|
||||
forensic_report: OrderedDict[str, Any],
|
||||
index_suffix: Optional[any] = None,
|
||||
def save_forensic_report_to_opensearch(
|
||||
forensic_report: dict[str, Any],
|
||||
index_suffix: Optional[str] = None,
|
||||
index_prefix: Optional[str] = None,
|
||||
monthly_indexes: Optional[bool] = False,
|
||||
monthly_indexes: bool = False,
|
||||
number_of_shards: int = 1,
|
||||
number_of_replicas: int = 0,
|
||||
):
|
||||
@@ -550,7 +578,7 @@ def save_forensic_report_to_elasticsearch(
|
||||
Saves a parsed DMARC forensic report to OpenSearch
|
||||
|
||||
Args:
|
||||
forensic_report (OrderedDict): A parsed forensic report
|
||||
forensic_report (dict): A parsed forensic report
|
||||
index_suffix (str): The suffix of the name of the index to save to
|
||||
index_prefix (str): The prefix of the name of the index to save to
|
||||
monthly_indexes (bool): Use monthly indexes instead of daily
|
||||
@@ -570,7 +598,7 @@ def save_forensic_report_to_elasticsearch(
|
||||
sample_date = forensic_report["parsed_sample"]["date"]
|
||||
sample_date = human_timestamp_to_datetime(sample_date)
|
||||
original_headers = forensic_report["parsed_sample"]["headers"]
|
||||
headers = OrderedDict()
|
||||
headers: dict[str, Any] = {}
|
||||
for original_header in original_headers:
|
||||
headers[original_header.lower()] = original_headers[original_header]
|
||||
|
||||
@@ -705,19 +733,19 @@ def save_forensic_report_to_elasticsearch(
|
||||
)
|
||||
|
||||
|
||||
def save_smtp_tls_report_to_elasticsearch(
|
||||
report: OrderedDict[str, Any],
|
||||
index_suffix: str = None,
|
||||
index_prefix: str = None,
|
||||
monthly_indexes: Optional[bool] = False,
|
||||
number_of_shards: Optional[int] = 1,
|
||||
number_of_replicas: Optional[int] = 0,
|
||||
def save_smtp_tls_report_to_opensearch(
|
||||
report: dict[str, Any],
|
||||
index_suffix: Optional[str] = None,
|
||||
index_prefix: Optional[str] = None,
|
||||
monthly_indexes: bool = False,
|
||||
number_of_shards: int = 1,
|
||||
number_of_replicas: int = 0,
|
||||
):
|
||||
"""
|
||||
Saves a parsed SMTP TLS report to OpenSearch
|
||||
|
||||
Args:
|
||||
report (OrderedDict): A parsed SMTP TLS report
|
||||
report (dict): A parsed SMTP TLS report
|
||||
index_suffix (str): The suffix of the name of the index to save to
|
||||
index_prefix (str): The prefix of the name of the index to save to
|
||||
monthly_indexes (bool): Use monthly indexes instead of daily indexes
|
||||
|
||||
@@ -132,6 +132,7 @@ asu-vei.ru,ASU-VEI,Industrial
|
||||
atextelecom.com.br,ATEX Telecom,ISP
|
||||
atmailcloud.com,atmail,Email Provider
|
||||
ats.ca,ATS Healthcare,Healthcare
|
||||
att.net,AT&T,ISP
|
||||
atw.ne.jp,ATW,Web Host
|
||||
au-net.ne.jp,KDDI,ISP
|
||||
au.com,au,ISP
|
||||
@@ -242,6 +243,7 @@ carandainet.com.br,CN Internet,ISP
|
||||
cardhealth.com,Cardinal Health,Healthcare
|
||||
cardinal.com,Cardinal Health,Healthcare
|
||||
cardinalhealth.com,Cardinal Health,Healthcare
|
||||
cardinalscriptnet.com,Cardinal Health,Healthcare
|
||||
carecentrix.com,CareCentrix,Healthcare
|
||||
carleton.edu,Carlton College,Education
|
||||
carrierzone.com,carrierzone,Email Security
|
||||
@@ -697,6 +699,7 @@ hdsupply-email.com,HD Supply,Retail
|
||||
healthall.com,UC Health,Healthcare
|
||||
healthcaresupplypros.com,Healthcare Supply Pros,Healthcare
|
||||
healthproductsforyou.com,Health Products For You,Healthcare
|
||||
healthtouch.com,Cardinal Health,Healthcare
|
||||
helloserver6.com,1st Source Web,Marketing
|
||||
helpforcb.com,InterServer,Web Host
|
||||
helpscout.net,Help Scout,SaaS
|
||||
@@ -753,6 +756,8 @@ hostwindsdns.com,Hostwinds,Web Host
|
||||
hotnet.net.il,Hot Net Internet Services,ISP
|
||||
hp.com,HP,Technology
|
||||
hringdu.is,Hringdu,ISP
|
||||
hslda.net,Home School Legal Defense Association (HSLDA),Education
|
||||
hslda.org,Home School Legal Defense Association (HSLDA),Education
|
||||
hspherefilter.com,"DynamicNet, Inc. (DNI)",Web Host
|
||||
htc.net,HTC,ISP
|
||||
htmlservices.it,HTMLServices.it,MSP
|
||||
@@ -763,6 +768,7 @@ hughston.com,Hughston Clinic,Healthcare
|
||||
hvvc.us,Hivelocity,Web Host
|
||||
i2ts.ne.jp,i2ts,Web Host
|
||||
i4i.com,i4i,Technology
|
||||
ibindley.com,Cardinal Health,Healthcare
|
||||
ice.co.cr,Grupo ICE,Industrial
|
||||
icehosting.nl,IceHosting,Web Host
|
||||
icewarpcloud.in,IceWrap,Email Provider
|
||||
@@ -832,6 +838,7 @@ ip-5-196-151.eu,OVH,Web Host
|
||||
ip-51-161-36.net,OVH,Web Host
|
||||
ip-51-195-53.eu,OVH,Web Host
|
||||
ip-51-254-53.eu,OVH,Web Host
|
||||
ip-51-38-67.eu,OVH,Web Host
|
||||
ip-51-77-42.eu,OVH,Web Host
|
||||
ip-51-83-140.eu,OVH,Web Host
|
||||
ip-51-89-240.eu,OVH,Web Host
|
||||
@@ -1217,6 +1224,7 @@ nettoday.co.th,Net Today,Web Host
|
||||
netventure.pl,Netventure,MSP
|
||||
netvigator.com,HKT,ISP
|
||||
netvision.net.il,013 Netvision,ISP
|
||||
network-tech.com,Network Technologies International (NTI),SaaS
|
||||
network.kz,network.kz,ISP
|
||||
network80.com,Network80,Web Host
|
||||
neubox.net,Neubox,Web Host
|
||||
|
||||
|
@@ -2,13 +2,11 @@
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
from typing import Any
|
||||
|
||||
import json
|
||||
import boto3
|
||||
|
||||
from collections import OrderedDict
|
||||
|
||||
from parsedmarc.log import logger
|
||||
from parsedmarc.utils import human_timestamp_to_datetime
|
||||
|
||||
@@ -53,18 +51,18 @@ class S3Client(object):
|
||||
aws_access_key_id=access_key_id,
|
||||
aws_secret_access_key=secret_access_key,
|
||||
)
|
||||
self.bucket = self.s3.Bucket(self.bucket_name)
|
||||
self.bucket = self.s3.Bucket(self.bucket_name) # type: ignore
|
||||
|
||||
def save_aggregate_report_to_s3(self, report: OrderedDict[str, Any]):
|
||||
def save_aggregate_report_to_s3(self, report: dict[str, Any]):
|
||||
self.save_report_to_s3(report, "aggregate")
|
||||
|
||||
def save_forensic_report_to_s3(self, report: OrderedDict[str, Any]):
|
||||
def save_forensic_report_to_s3(self, report: dict[str, Any]):
|
||||
self.save_report_to_s3(report, "forensic")
|
||||
|
||||
def save_smtp_tls_report_to_s3(self, report: OrderedDict[str, Any]):
|
||||
def save_smtp_tls_report_to_s3(self, report: dict[str, Any]):
|
||||
self.save_report_to_s3(report, "smtp_tls")
|
||||
|
||||
def save_report_to_s3(self, report: OrderedDict[str, Any], report_type: str):
|
||||
def save_report_to_s3(self, report: dict[str, Any], report_type: str):
|
||||
if report_type == "smtp_tls":
|
||||
report_date = report["begin_date"]
|
||||
report_id = report["report_id"]
|
||||
|
||||
@@ -2,16 +2,13 @@
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from typing import Any
|
||||
|
||||
from collections import OrderedDict
|
||||
|
||||
from urllib.parse import urlparse
|
||||
import socket
|
||||
import json
|
||||
import socket
|
||||
from typing import Any, Union
|
||||
from urllib.parse import urlparse
|
||||
|
||||
import urllib3
|
||||
import requests
|
||||
import urllib3
|
||||
|
||||
from parsedmarc.constants import USER_AGENT
|
||||
from parsedmarc.log import logger
|
||||
@@ -35,7 +32,7 @@ class HECClient(object):
|
||||
url: str,
|
||||
access_token: str,
|
||||
index: str,
|
||||
source: bool = "parsedmarc",
|
||||
source: str = "parsedmarc",
|
||||
verify=True,
|
||||
timeout=60,
|
||||
):
|
||||
@@ -51,9 +48,9 @@ class HECClient(object):
|
||||
timeout (float): Number of seconds to wait for the server to send
|
||||
data before giving up
|
||||
"""
|
||||
url = urlparse(url)
|
||||
parsed_url = urlparse(url)
|
||||
self.url = "{0}://{1}/services/collector/event/1.0".format(
|
||||
url.scheme, url.netloc
|
||||
parsed_url.scheme, parsed_url.netloc
|
||||
)
|
||||
self.access_token = access_token.lstrip("Splunk ")
|
||||
self.index = index
|
||||
@@ -62,7 +59,9 @@ class HECClient(object):
|
||||
self.session = requests.Session()
|
||||
self.timeout = timeout
|
||||
self.session.verify = verify
|
||||
self._common_data = dict(host=self.host, source=self.source, index=self.index)
|
||||
self._common_data: dict[str, Union[str, int, float, dict]] = dict(
|
||||
host=self.host, source=self.source, index=self.index
|
||||
)
|
||||
|
||||
self.session.headers = {
|
||||
"User-Agent": USER_AGENT,
|
||||
@@ -70,7 +69,8 @@ class HECClient(object):
|
||||
}
|
||||
|
||||
def save_aggregate_reports_to_splunk(
|
||||
self, aggregate_reports: list[OrderedDict[str, Any]]
|
||||
self,
|
||||
aggregate_reports: Union[list[dict[str, Any]], dict[str, Any]],
|
||||
):
|
||||
"""
|
||||
Saves aggregate DMARC reports to Splunk
|
||||
@@ -91,7 +91,7 @@ class HECClient(object):
|
||||
json_str = ""
|
||||
for report in aggregate_reports:
|
||||
for record in report["records"]:
|
||||
new_report = dict()
|
||||
new_report: dict[str, Union[str, int, float, dict]] = dict()
|
||||
for metadata in report["report_metadata"]:
|
||||
new_report[metadata] = report["report_metadata"][metadata]
|
||||
new_report["interval_begin"] = record["interval_begin"]
|
||||
@@ -135,7 +135,8 @@ class HECClient(object):
|
||||
raise SplunkError(response["text"])
|
||||
|
||||
def save_forensic_reports_to_splunk(
|
||||
self, forensic_reports: list[OrderedDict[str, Any]]
|
||||
self,
|
||||
forensic_reports: Union[list[dict[str, Any]], dict[str, Any]],
|
||||
):
|
||||
"""
|
||||
Saves forensic DMARC reports to Splunk
|
||||
@@ -170,7 +171,9 @@ class HECClient(object):
|
||||
if response["code"] != 0:
|
||||
raise SplunkError(response["text"])
|
||||
|
||||
def save_smtp_tls_reports_to_splunk(self, reports: OrderedDict[str, Any]):
|
||||
def save_smtp_tls_reports_to_splunk(
|
||||
self, reports: Union[list[dict[str, Any]], dict[str, Any]]
|
||||
):
|
||||
"""
|
||||
Saves aggregate DMARC reports to Splunk
|
||||
|
||||
|
||||
@@ -3,14 +3,13 @@
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
import logging
|
||||
import logging.handlers
|
||||
|
||||
from typing import Any
|
||||
|
||||
from collections import OrderedDict
|
||||
|
||||
import json
|
||||
import socket
|
||||
import ssl
|
||||
import time
|
||||
from typing import Any, Optional
|
||||
|
||||
from parsedmarc import (
|
||||
parsed_aggregate_reports_to_csv_rows,
|
||||
@@ -22,37 +21,161 @@ from parsedmarc import (
|
||||
class SyslogClient(object):
|
||||
"""A client for Syslog"""
|
||||
|
||||
def __init__(self, server_name: str, server_port: int):
|
||||
def __init__(
|
||||
self,
|
||||
server_name: str,
|
||||
server_port: int,
|
||||
protocol: str = "udp",
|
||||
cafile_path: Optional[str] = None,
|
||||
certfile_path: Optional[str] = None,
|
||||
keyfile_path: Optional[str] = None,
|
||||
timeout: float = 5.0,
|
||||
retry_attempts: int = 3,
|
||||
retry_delay: int = 5,
|
||||
):
|
||||
"""
|
||||
Initializes the SyslogClient
|
||||
Args:
|
||||
server_name (str): The Syslog server
|
||||
server_port (int): The Syslog UDP port
|
||||
server_port (int): The Syslog port
|
||||
protocol (str): The protocol to use: "udp", "tcp", or "tls" (Default: "udp")
|
||||
cafile_path (str): Path to CA certificate file for TLS server verification (Optional)
|
||||
certfile_path (str): Path to client certificate file for TLS authentication (Optional)
|
||||
keyfile_path (str): Path to client private key file for TLS authentication (Optional)
|
||||
timeout (float): Connection timeout in seconds for TCP/TLS (Default: 5.0)
|
||||
retry_attempts (int): Number of retry attempts for failed connections (Default: 3)
|
||||
retry_delay (int): Delay in seconds between retry attempts (Default: 5)
|
||||
"""
|
||||
self.server_name = server_name
|
||||
self.server_port = server_port
|
||||
self.protocol = protocol.lower()
|
||||
self.timeout = timeout
|
||||
self.retry_attempts = retry_attempts
|
||||
self.retry_delay = retry_delay
|
||||
|
||||
self.logger = logging.getLogger("parsedmarc_syslog")
|
||||
self.logger.setLevel(logging.INFO)
|
||||
log_handler = logging.handlers.SysLogHandler(address=(server_name, server_port))
|
||||
|
||||
# Create the appropriate syslog handler based on protocol
|
||||
log_handler = self._create_syslog_handler(
|
||||
server_name,
|
||||
server_port,
|
||||
self.protocol,
|
||||
cafile_path,
|
||||
certfile_path,
|
||||
keyfile_path,
|
||||
timeout,
|
||||
retry_attempts,
|
||||
retry_delay,
|
||||
)
|
||||
|
||||
self.logger.addHandler(log_handler)
|
||||
|
||||
def save_aggregate_report_to_syslog(
|
||||
self, aggregate_reports: list[OrderedDict[str, Any]]
|
||||
):
|
||||
def _create_syslog_handler(
|
||||
self,
|
||||
server_name: str,
|
||||
server_port: int,
|
||||
protocol: str,
|
||||
cafile_path: Optional[str],
|
||||
certfile_path: Optional[str],
|
||||
keyfile_path: Optional[str],
|
||||
timeout: float,
|
||||
retry_attempts: int,
|
||||
retry_delay: int,
|
||||
) -> logging.handlers.SysLogHandler:
|
||||
"""
|
||||
Creates a SysLogHandler with the specified protocol and TLS settings
|
||||
"""
|
||||
if protocol == "udp":
|
||||
# UDP protocol (default, backward compatible)
|
||||
return logging.handlers.SysLogHandler(
|
||||
address=(server_name, server_port),
|
||||
socktype=socket.SOCK_DGRAM,
|
||||
)
|
||||
elif protocol in ["tcp", "tls"]:
|
||||
# TCP or TLS protocol with retry logic
|
||||
for attempt in range(1, retry_attempts + 1):
|
||||
try:
|
||||
if protocol == "tcp":
|
||||
# TCP without TLS
|
||||
handler = logging.handlers.SysLogHandler(
|
||||
address=(server_name, server_port),
|
||||
socktype=socket.SOCK_STREAM,
|
||||
)
|
||||
# Set timeout on the socket
|
||||
if hasattr(handler, "socket") and handler.socket:
|
||||
handler.socket.settimeout(timeout)
|
||||
return handler
|
||||
else:
|
||||
# TLS protocol
|
||||
# Create SSL context with secure defaults
|
||||
ssl_context = ssl.create_default_context()
|
||||
|
||||
# Explicitly set minimum TLS version to 1.2 for security
|
||||
ssl_context.minimum_version = ssl.TLSVersion.TLSv1_2
|
||||
|
||||
# Configure server certificate verification
|
||||
if cafile_path:
|
||||
ssl_context.load_verify_locations(cafile=cafile_path)
|
||||
|
||||
# Configure client certificate authentication
|
||||
if certfile_path and keyfile_path:
|
||||
ssl_context.load_cert_chain(
|
||||
certfile=certfile_path,
|
||||
keyfile=keyfile_path,
|
||||
)
|
||||
elif certfile_path or keyfile_path:
|
||||
# Warn if only one of the two required parameters is provided
|
||||
self.logger.warning(
|
||||
"Both certfile_path and keyfile_path are required for "
|
||||
"client certificate authentication. Client authentication "
|
||||
"will not be used."
|
||||
)
|
||||
|
||||
# Create TCP handler first
|
||||
handler = logging.handlers.SysLogHandler(
|
||||
address=(server_name, server_port),
|
||||
socktype=socket.SOCK_STREAM,
|
||||
)
|
||||
|
||||
# Wrap socket with TLS
|
||||
if hasattr(handler, "socket") and handler.socket:
|
||||
handler.socket = ssl_context.wrap_socket(
|
||||
handler.socket,
|
||||
server_hostname=server_name,
|
||||
)
|
||||
handler.socket.settimeout(timeout)
|
||||
|
||||
return handler
|
||||
|
||||
except Exception as e:
|
||||
if attempt < retry_attempts:
|
||||
self.logger.warning(
|
||||
f"Syslog connection attempt {attempt}/{retry_attempts} failed: {e}. "
|
||||
f"Retrying in {retry_delay} seconds..."
|
||||
)
|
||||
time.sleep(retry_delay)
|
||||
else:
|
||||
self.logger.error(
|
||||
f"Syslog connection failed after {retry_attempts} attempts: {e}"
|
||||
)
|
||||
raise
|
||||
else:
|
||||
raise ValueError(
|
||||
f"Invalid protocol '{protocol}'. Must be 'udp', 'tcp', or 'tls'."
|
||||
)
|
||||
|
||||
def save_aggregate_report_to_syslog(self, aggregate_reports: list[dict[str, Any]]):
|
||||
rows = parsed_aggregate_reports_to_csv_rows(aggregate_reports)
|
||||
for row in rows:
|
||||
self.logger.info(json.dumps(row))
|
||||
|
||||
def save_forensic_report_to_syslog(
|
||||
self, forensic_reports: list[OrderedDict[str, Any]]
|
||||
):
|
||||
def save_forensic_report_to_syslog(self, forensic_reports: list[dict[str, Any]]):
|
||||
rows = parsed_forensic_reports_to_csv_rows(forensic_reports)
|
||||
for row in rows:
|
||||
self.logger.info(json.dumps(row))
|
||||
|
||||
def save_smtp_tls_report_to_syslog(
|
||||
self, smtp_tls_reports: list[OrderedDict[str, Any]]
|
||||
):
|
||||
def save_smtp_tls_report_to_syslog(self, smtp_tls_reports: list[dict[str, Any]]):
|
||||
rows = parsed_smtp_tls_reports_to_csv_rows(smtp_tls_reports)
|
||||
for row in rows:
|
||||
self.logger.info(json.dumps(row))
|
||||
|
||||
220
parsedmarc/types.py
Normal file
220
parsedmarc/types.py
Normal file
@@ -0,0 +1,220 @@
|
||||
from __future__ import annotations
|
||||
|
||||
from typing import Any, Dict, List, Literal, Optional, TypedDict, Union
|
||||
|
||||
# NOTE: This module is intentionally Python 3.10 compatible.
|
||||
# - No PEP 604 unions (A | B)
|
||||
# - No typing.NotRequired / Required (3.11+) to avoid an extra dependency.
|
||||
# For optional keys, use total=False TypedDicts.
|
||||
|
||||
|
||||
ReportType = Literal["aggregate", "forensic", "smtp_tls"]
|
||||
|
||||
|
||||
class AggregateReportMetadata(TypedDict):
|
||||
org_name: str
|
||||
org_email: str
|
||||
org_extra_contact_info: Optional[str]
|
||||
report_id: str
|
||||
begin_date: str
|
||||
end_date: str
|
||||
timespan_requires_normalization: bool
|
||||
original_timespan_seconds: int
|
||||
errors: List[str]
|
||||
|
||||
|
||||
class AggregatePolicyPublished(TypedDict):
|
||||
domain: str
|
||||
adkim: str
|
||||
aspf: str
|
||||
p: str
|
||||
sp: str
|
||||
pct: str
|
||||
fo: str
|
||||
|
||||
|
||||
class IPSourceInfo(TypedDict):
|
||||
ip_address: str
|
||||
country: Optional[str]
|
||||
reverse_dns: Optional[str]
|
||||
base_domain: Optional[str]
|
||||
name: Optional[str]
|
||||
type: Optional[str]
|
||||
|
||||
|
||||
class AggregateAlignment(TypedDict):
|
||||
spf: bool
|
||||
dkim: bool
|
||||
dmarc: bool
|
||||
|
||||
|
||||
class AggregateIdentifiers(TypedDict):
|
||||
header_from: str
|
||||
envelope_from: Optional[str]
|
||||
envelope_to: Optional[str]
|
||||
|
||||
|
||||
class AggregatePolicyOverrideReason(TypedDict):
|
||||
type: Optional[str]
|
||||
comment: Optional[str]
|
||||
|
||||
|
||||
class AggregateAuthResultDKIM(TypedDict):
|
||||
domain: str
|
||||
result: str
|
||||
selector: str
|
||||
|
||||
|
||||
class AggregateAuthResultSPF(TypedDict):
|
||||
domain: str
|
||||
result: str
|
||||
scope: str
|
||||
|
||||
|
||||
class AggregateAuthResults(TypedDict):
|
||||
dkim: List[AggregateAuthResultDKIM]
|
||||
spf: List[AggregateAuthResultSPF]
|
||||
|
||||
|
||||
class AggregatePolicyEvaluated(TypedDict):
|
||||
disposition: str
|
||||
dkim: str
|
||||
spf: str
|
||||
policy_override_reasons: List[AggregatePolicyOverrideReason]
|
||||
|
||||
|
||||
class AggregateRecord(TypedDict):
|
||||
interval_begin: str
|
||||
interval_end: str
|
||||
source: IPSourceInfo
|
||||
count: int
|
||||
alignment: AggregateAlignment
|
||||
policy_evaluated: AggregatePolicyEvaluated
|
||||
disposition: str
|
||||
identifiers: AggregateIdentifiers
|
||||
auth_results: AggregateAuthResults
|
||||
|
||||
|
||||
class AggregateReport(TypedDict):
|
||||
xml_schema: str
|
||||
report_metadata: AggregateReportMetadata
|
||||
policy_published: AggregatePolicyPublished
|
||||
records: List[AggregateRecord]
|
||||
|
||||
|
||||
class EmailAddress(TypedDict):
|
||||
display_name: Optional[str]
|
||||
address: str
|
||||
local: Optional[str]
|
||||
domain: Optional[str]
|
||||
|
||||
|
||||
class EmailAttachment(TypedDict, total=False):
|
||||
filename: Optional[str]
|
||||
mail_content_type: Optional[str]
|
||||
sha256: Optional[str]
|
||||
|
||||
|
||||
ParsedEmail = TypedDict(
|
||||
"ParsedEmail",
|
||||
{
|
||||
# This is a lightly-specified version of mailsuite/mailparser JSON.
|
||||
# It focuses on the fields parsedmarc uses in forensic handling.
|
||||
"headers": Dict[str, Any],
|
||||
"subject": Optional[str],
|
||||
"filename_safe_subject": Optional[str],
|
||||
"date": Optional[str],
|
||||
"from": EmailAddress,
|
||||
"to": List[EmailAddress],
|
||||
"cc": List[EmailAddress],
|
||||
"bcc": List[EmailAddress],
|
||||
"attachments": List[EmailAttachment],
|
||||
"body": Optional[str],
|
||||
"has_defects": bool,
|
||||
"defects": Any,
|
||||
"defects_categories": Any,
|
||||
},
|
||||
total=False,
|
||||
)
|
||||
|
||||
|
||||
class ForensicReport(TypedDict):
|
||||
feedback_type: Optional[str]
|
||||
user_agent: Optional[str]
|
||||
version: Optional[str]
|
||||
original_envelope_id: Optional[str]
|
||||
original_mail_from: Optional[str]
|
||||
original_rcpt_to: Optional[str]
|
||||
arrival_date: str
|
||||
arrival_date_utc: str
|
||||
authentication_results: Optional[str]
|
||||
delivery_result: Optional[str]
|
||||
auth_failure: List[str]
|
||||
authentication_mechanisms: List[str]
|
||||
dkim_domain: Optional[str]
|
||||
reported_domain: str
|
||||
sample_headers_only: bool
|
||||
source: IPSourceInfo
|
||||
sample: str
|
||||
parsed_sample: ParsedEmail
|
||||
|
||||
|
||||
class SMTPTLSFailureDetails(TypedDict):
|
||||
result_type: str
|
||||
failed_session_count: int
|
||||
|
||||
|
||||
class SMTPTLSFailureDetailsOptional(SMTPTLSFailureDetails, total=False):
|
||||
sending_mta_ip: str
|
||||
receiving_ip: str
|
||||
receiving_mx_hostname: str
|
||||
receiving_mx_helo: str
|
||||
additional_info_uri: str
|
||||
failure_reason_code: str
|
||||
ip_address: str
|
||||
|
||||
|
||||
class SMTPTLSPolicySummary(TypedDict):
|
||||
policy_domain: str
|
||||
policy_type: str
|
||||
successful_session_count: int
|
||||
failed_session_count: int
|
||||
|
||||
|
||||
class SMTPTLSPolicy(SMTPTLSPolicySummary, total=False):
|
||||
policy_strings: List[str]
|
||||
mx_host_patterns: List[str]
|
||||
failure_details: List[SMTPTLSFailureDetailsOptional]
|
||||
|
||||
|
||||
class SMTPTLSReport(TypedDict):
|
||||
organization_name: str
|
||||
begin_date: str
|
||||
end_date: str
|
||||
contact_info: Union[str, List[str]]
|
||||
report_id: str
|
||||
policies: List[SMTPTLSPolicy]
|
||||
|
||||
|
||||
class AggregateParsedReport(TypedDict):
|
||||
report_type: Literal["aggregate"]
|
||||
report: AggregateReport
|
||||
|
||||
|
||||
class ForensicParsedReport(TypedDict):
|
||||
report_type: Literal["forensic"]
|
||||
report: ForensicReport
|
||||
|
||||
|
||||
class SMTPTLSParsedReport(TypedDict):
|
||||
report_type: Literal["smtp_tls"]
|
||||
report: SMTPTLSReport
|
||||
|
||||
|
||||
ParsedReport = Union[AggregateParsedReport, ForensicParsedReport, SMTPTLSParsedReport]
|
||||
|
||||
|
||||
class ParsingResults(TypedDict):
|
||||
aggregate_reports: List[AggregateReport]
|
||||
forensic_reports: List[ForensicReport]
|
||||
smtp_tls_reports: List[SMTPTLSReport]
|
||||
@@ -4,26 +4,23 @@
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from typing import Optional, Union
|
||||
|
||||
import logging
|
||||
import os
|
||||
from datetime import datetime
|
||||
from datetime import timezone
|
||||
from datetime import timedelta
|
||||
from collections import OrderedDict
|
||||
from expiringdict import ExpiringDict
|
||||
import tempfile
|
||||
import subprocess
|
||||
import shutil
|
||||
import mailparser
|
||||
import json
|
||||
import hashlib
|
||||
import base64
|
||||
import mailbox
|
||||
import re
|
||||
import csv
|
||||
import hashlib
|
||||
import io
|
||||
import json
|
||||
import logging
|
||||
import mailbox
|
||||
import os
|
||||
import re
|
||||
import shutil
|
||||
import subprocess
|
||||
import tempfile
|
||||
from datetime import datetime, timedelta, timezone
|
||||
from typing import Optional, TypedDict, Union, cast
|
||||
|
||||
import mailparser
|
||||
from expiringdict import ExpiringDict
|
||||
|
||||
try:
|
||||
from importlib.resources import files
|
||||
@@ -32,19 +29,19 @@ except ImportError:
|
||||
from importlib.resources import files
|
||||
|
||||
|
||||
from dateutil.parser import parse as parse_date
|
||||
import dns.reversename
|
||||
import dns.resolver
|
||||
import dns.exception
|
||||
import dns.resolver
|
||||
import dns.reversename
|
||||
import geoip2.database
|
||||
import geoip2.errors
|
||||
import publicsuffixlist
|
||||
import requests
|
||||
from dateutil.parser import parse as parse_date
|
||||
|
||||
from parsedmarc.log import logger
|
||||
import parsedmarc.resources.dbip
|
||||
import parsedmarc.resources.maps
|
||||
from parsedmarc.constants import USER_AGENT
|
||||
from parsedmarc.log import logger
|
||||
|
||||
parenthesis_regex = re.compile(r"\s*\(.*\)\s*")
|
||||
|
||||
@@ -67,6 +64,23 @@ class DownloadError(RuntimeError):
|
||||
"""Raised when an error occurs when downloading a file"""
|
||||
|
||||
|
||||
class ReverseDNSService(TypedDict):
|
||||
name: str
|
||||
type: Optional[str]
|
||||
|
||||
|
||||
ReverseDNSMap = dict[str, ReverseDNSService]
|
||||
|
||||
|
||||
class IPAddressInfo(TypedDict):
|
||||
ip_address: str
|
||||
reverse_dns: Optional[str]
|
||||
country: Optional[str]
|
||||
base_domain: Optional[str]
|
||||
name: Optional[str]
|
||||
type: Optional[str]
|
||||
|
||||
|
||||
def decode_base64(data: str) -> bytes:
|
||||
"""
|
||||
Decodes a base64 string, with padding being optional
|
||||
@@ -78,14 +92,14 @@ def decode_base64(data: str) -> bytes:
|
||||
bytes: The decoded bytes
|
||||
|
||||
"""
|
||||
data = bytes(data, encoding="ascii")
|
||||
missing_padding = len(data) % 4
|
||||
data_bytes = bytes(data, encoding="ascii")
|
||||
missing_padding = len(data_bytes) % 4
|
||||
if missing_padding != 0:
|
||||
data += b"=" * (4 - missing_padding)
|
||||
return base64.b64decode(data)
|
||||
data_bytes += b"=" * (4 - missing_padding)
|
||||
return base64.b64decode(data_bytes)
|
||||
|
||||
|
||||
def get_base_domain(domain: str) -> str:
|
||||
def get_base_domain(domain: str) -> Optional[str]:
|
||||
"""
|
||||
Gets the base domain name for the given domain
|
||||
|
||||
@@ -114,8 +128,8 @@ def query_dns(
|
||||
record_type: str,
|
||||
*,
|
||||
cache: Optional[ExpiringDict] = None,
|
||||
nameservers: list[str] = None,
|
||||
timeout: int = 2.0,
|
||||
nameservers: Optional[list[str]] = None,
|
||||
timeout: float = 2.0,
|
||||
) -> list[str]:
|
||||
"""
|
||||
Queries DNS
|
||||
@@ -135,9 +149,9 @@ def query_dns(
|
||||
record_type = record_type.upper()
|
||||
cache_key = "{0}_{1}".format(domain, record_type)
|
||||
if cache:
|
||||
records = cache.get(cache_key, None)
|
||||
if records:
|
||||
return records
|
||||
cached_records = cache.get(cache_key, None)
|
||||
if isinstance(cached_records, list):
|
||||
return cast(list[str], cached_records)
|
||||
|
||||
resolver = dns.resolver.Resolver()
|
||||
timeout = float(timeout)
|
||||
@@ -151,26 +165,12 @@ def query_dns(
|
||||
resolver.nameservers = nameservers
|
||||
resolver.timeout = timeout
|
||||
resolver.lifetime = timeout
|
||||
if record_type == "TXT":
|
||||
resource_records = list(
|
||||
map(
|
||||
lambda r: r.strings,
|
||||
resolver.resolve(domain, record_type, lifetime=timeout),
|
||||
)
|
||||
)
|
||||
_resource_record = [
|
||||
resource_record[0][:0].join(resource_record)
|
||||
for resource_record in resource_records
|
||||
if resource_record
|
||||
]
|
||||
records = [r.decode() for r in _resource_record]
|
||||
else:
|
||||
records = list(
|
||||
map(
|
||||
lambda r: r.to_text().replace('"', "").rstrip("."),
|
||||
resolver.resolve(domain, record_type, lifetime=timeout),
|
||||
)
|
||||
records = list(
|
||||
map(
|
||||
lambda r: r.to_text().replace('"', "").rstrip("."),
|
||||
resolver.resolve(domain, record_type, lifetime=timeout),
|
||||
)
|
||||
)
|
||||
if cache:
|
||||
cache[cache_key] = records
|
||||
|
||||
@@ -181,9 +181,9 @@ def get_reverse_dns(
|
||||
ip_address,
|
||||
*,
|
||||
cache: Optional[ExpiringDict] = None,
|
||||
nameservers: list[str] = None,
|
||||
timeout: int = 2.0,
|
||||
) -> str:
|
||||
nameservers: Optional[list[str]] = None,
|
||||
timeout: float = 2.0,
|
||||
) -> Optional[str]:
|
||||
"""
|
||||
Resolves an IP address to a hostname using a reverse DNS query
|
||||
|
||||
@@ -201,7 +201,7 @@ def get_reverse_dns(
|
||||
try:
|
||||
address = dns.reversename.from_address(ip_address)
|
||||
hostname = query_dns(
|
||||
address, "PTR", cache=cache, nameservers=nameservers, timeout=timeout
|
||||
str(address), "PTR", cache=cache, nameservers=nameservers, timeout=timeout
|
||||
)[0]
|
||||
|
||||
except dns.exception.DNSException as e:
|
||||
@@ -238,7 +238,7 @@ def timestamp_to_human(timestamp: int) -> str:
|
||||
|
||||
|
||||
def human_timestamp_to_datetime(
|
||||
human_timestamp: str, *, to_utc: Optional[bool] = False
|
||||
human_timestamp: str, *, to_utc: bool = False
|
||||
) -> datetime:
|
||||
"""
|
||||
Converts a human-readable timestamp into a Python ``datetime`` object
|
||||
@@ -269,10 +269,12 @@ def human_timestamp_to_unix_timestamp(human_timestamp: str) -> int:
|
||||
float: The converted timestamp
|
||||
"""
|
||||
human_timestamp = human_timestamp.replace("T", " ")
|
||||
return human_timestamp_to_datetime(human_timestamp).timestamp()
|
||||
return int(human_timestamp_to_datetime(human_timestamp).timestamp())
|
||||
|
||||
|
||||
def get_ip_address_country(ip_address: str, *, db_path: Optional[str] = None) -> str:
|
||||
def get_ip_address_country(
|
||||
ip_address: str, *, db_path: Optional[str] = None
|
||||
) -> Optional[str]:
|
||||
"""
|
||||
Returns the ISO code for the country associated
|
||||
with the given IPv4 or IPv6 address
|
||||
@@ -337,12 +339,12 @@ def get_ip_address_country(ip_address: str, *, db_path: Optional[str] = None) ->
|
||||
def get_service_from_reverse_dns_base_domain(
|
||||
base_domain,
|
||||
*,
|
||||
always_use_local_file: Optional[bool] = False,
|
||||
local_file_path: Optional[bool] = None,
|
||||
url: Optional[bool] = None,
|
||||
offline: Optional[bool] = False,
|
||||
reverse_dns_map: Optional[bool] = None,
|
||||
) -> str:
|
||||
always_use_local_file: bool = False,
|
||||
local_file_path: Optional[str] = None,
|
||||
url: Optional[str] = None,
|
||||
offline: bool = False,
|
||||
reverse_dns_map: Optional[ReverseDNSMap] = None,
|
||||
) -> ReverseDNSService:
|
||||
"""
|
||||
Returns the service name of a given base domain name from reverse DNS.
|
||||
|
||||
@@ -359,12 +361,6 @@ def get_service_from_reverse_dns_base_domain(
|
||||
the supplied reverse_dns_base_domain and the type will be None
|
||||
"""
|
||||
|
||||
def load_csv(_csv_file):
|
||||
reader = csv.DictReader(_csv_file)
|
||||
for row in reader:
|
||||
key = row["base_reverse_dns"].lower().strip()
|
||||
reverse_dns_map[key] = dict(name=row["name"], type=row["type"])
|
||||
|
||||
base_domain = base_domain.lower().strip()
|
||||
if url is None:
|
||||
url = (
|
||||
@@ -372,11 +368,24 @@ def get_service_from_reverse_dns_base_domain(
|
||||
"/parsedmarc/master/parsedmarc/"
|
||||
"resources/maps/base_reverse_dns_map.csv"
|
||||
)
|
||||
reverse_dns_map_value: ReverseDNSMap
|
||||
if reverse_dns_map is None:
|
||||
reverse_dns_map = dict()
|
||||
reverse_dns_map_value = {}
|
||||
else:
|
||||
reverse_dns_map_value = reverse_dns_map
|
||||
|
||||
def load_csv(_csv_file):
|
||||
reader = csv.DictReader(_csv_file)
|
||||
for row in reader:
|
||||
key = row["base_reverse_dns"].lower().strip()
|
||||
reverse_dns_map_value[key] = {
|
||||
"name": row["name"],
|
||||
"type": row["type"],
|
||||
}
|
||||
|
||||
csv_file = io.StringIO()
|
||||
|
||||
if not (offline or always_use_local_file) and len(reverse_dns_map) == 0:
|
||||
if not (offline or always_use_local_file) and len(reverse_dns_map_value) == 0:
|
||||
try:
|
||||
logger.debug(f"Trying to fetch reverse DNS map from {url}...")
|
||||
headers = {"User-Agent": USER_AGENT}
|
||||
@@ -393,7 +402,7 @@ def get_service_from_reverse_dns_base_domain(
|
||||
logging.debug("Response body:")
|
||||
logger.debug(csv_file.read())
|
||||
|
||||
if len(reverse_dns_map) == 0:
|
||||
if len(reverse_dns_map_value) == 0:
|
||||
logger.info("Loading included reverse DNS map...")
|
||||
path = str(
|
||||
files(parsedmarc.resources.maps).joinpath("base_reverse_dns_map.csv")
|
||||
@@ -402,10 +411,11 @@ def get_service_from_reverse_dns_base_domain(
|
||||
path = local_file_path
|
||||
with open(path) as csv_file:
|
||||
load_csv(csv_file)
|
||||
service: ReverseDNSService
|
||||
try:
|
||||
service = reverse_dns_map[base_domain]
|
||||
service = reverse_dns_map_value[base_domain]
|
||||
except KeyError:
|
||||
service = dict(name=base_domain, type=None)
|
||||
service = {"name": base_domain, "type": None}
|
||||
|
||||
return service
|
||||
|
||||
@@ -415,14 +425,14 @@ def get_ip_address_info(
|
||||
*,
|
||||
ip_db_path: Optional[str] = None,
|
||||
reverse_dns_map_path: Optional[str] = None,
|
||||
always_use_local_files: Optional[bool] = False,
|
||||
reverse_dns_map_url: Optional[bool] = None,
|
||||
always_use_local_files: bool = False,
|
||||
reverse_dns_map_url: Optional[str] = None,
|
||||
cache: Optional[ExpiringDict] = None,
|
||||
reverse_dns_map: Optional[bool] = None,
|
||||
offline: Optional[bool] = False,
|
||||
reverse_dns_map: Optional[ReverseDNSMap] = None,
|
||||
offline: bool = False,
|
||||
nameservers: Optional[list[str]] = None,
|
||||
timeout: Optional[float] = 2.0,
|
||||
) -> OrderedDict[str, str]:
|
||||
timeout: float = 2.0,
|
||||
) -> IPAddressInfo:
|
||||
"""
|
||||
Returns reverse DNS and country information for the given IP address
|
||||
|
||||
@@ -440,17 +450,27 @@ def get_ip_address_info(
|
||||
timeout (float): Sets the DNS timeout in seconds
|
||||
|
||||
Returns:
|
||||
OrderedDict: ``ip_address``, ``reverse_dns``, ``country``
|
||||
dict: ``ip_address``, ``reverse_dns``, ``country``
|
||||
|
||||
"""
|
||||
ip_address = ip_address.lower()
|
||||
if cache is not None:
|
||||
info = cache.get(ip_address, None)
|
||||
if info:
|
||||
cached_info = cache.get(ip_address, None)
|
||||
if (
|
||||
cached_info
|
||||
and isinstance(cached_info, dict)
|
||||
and "ip_address" in cached_info
|
||||
):
|
||||
logger.debug(f"IP address {ip_address} was found in cache")
|
||||
return info
|
||||
info = OrderedDict()
|
||||
info["ip_address"] = ip_address
|
||||
return cast(IPAddressInfo, cached_info)
|
||||
info: IPAddressInfo = {
|
||||
"ip_address": ip_address,
|
||||
"reverse_dns": None,
|
||||
"country": None,
|
||||
"base_domain": None,
|
||||
"name": None,
|
||||
"type": None,
|
||||
}
|
||||
if offline:
|
||||
reverse_dns = None
|
||||
else:
|
||||
@@ -460,9 +480,6 @@ def get_ip_address_info(
|
||||
country = get_ip_address_country(ip_address, db_path=ip_db_path)
|
||||
info["country"] = country
|
||||
info["reverse_dns"] = reverse_dns
|
||||
info["base_domain"] = None
|
||||
info["name"] = None
|
||||
info["type"] = None
|
||||
if reverse_dns is not None:
|
||||
base_domain = get_base_domain(reverse_dns)
|
||||
if base_domain is not None:
|
||||
@@ -487,7 +504,7 @@ def get_ip_address_info(
|
||||
return info
|
||||
|
||||
|
||||
def parse_email_address(original_address: str) -> OrderedDict[str, str]:
|
||||
def parse_email_address(original_address: str) -> dict[str, Optional[str]]:
|
||||
if original_address[0] == "":
|
||||
display_name = None
|
||||
else:
|
||||
@@ -500,14 +517,12 @@ def parse_email_address(original_address: str) -> OrderedDict[str, str]:
|
||||
local = address_parts[0].lower()
|
||||
domain = address_parts[-1].lower()
|
||||
|
||||
return OrderedDict(
|
||||
[
|
||||
("display_name", display_name),
|
||||
("address", address),
|
||||
("local", local),
|
||||
("domain", domain),
|
||||
]
|
||||
)
|
||||
return {
|
||||
"display_name": display_name,
|
||||
"address": address,
|
||||
"local": local,
|
||||
"domain": domain,
|
||||
}
|
||||
|
||||
|
||||
def get_filename_safe_string(string: str) -> str:
|
||||
@@ -568,7 +583,7 @@ def is_outlook_msg(content) -> bool:
|
||||
)
|
||||
|
||||
|
||||
def convert_outlook_msg(msg_bytes: bytes) -> str:
|
||||
def convert_outlook_msg(msg_bytes: bytes) -> bytes:
|
||||
"""
|
||||
Uses the ``msgconvert`` Perl utility to convert an Outlook MS file to
|
||||
standard RFC 822 format
|
||||
@@ -577,7 +592,7 @@ def convert_outlook_msg(msg_bytes: bytes) -> str:
|
||||
msg_bytes (bytes): the content of the .msg file
|
||||
|
||||
Returns:
|
||||
A RFC 822 string
|
||||
A RFC 822 bytes payload
|
||||
"""
|
||||
if not is_outlook_msg(msg_bytes):
|
||||
raise ValueError("The supplied bytes are not an Outlook MSG file")
|
||||
@@ -605,8 +620,8 @@ def convert_outlook_msg(msg_bytes: bytes) -> str:
|
||||
|
||||
|
||||
def parse_email(
|
||||
data: Union[bytes, str], *, strip_attachment_payloads: Optional[bool] = False
|
||||
):
|
||||
data: Union[bytes, str], *, strip_attachment_payloads: bool = False
|
||||
) -> dict:
|
||||
"""
|
||||
A simplified email parser
|
||||
|
||||
|
||||
@@ -4,8 +4,6 @@ from __future__ import annotations
|
||||
|
||||
from typing import Any, Optional, Union
|
||||
|
||||
from collections import OrderedDict
|
||||
|
||||
import requests
|
||||
|
||||
from parsedmarc import logger
|
||||
@@ -40,19 +38,19 @@ class WebhookClient(object):
|
||||
"Content-Type": "application/json",
|
||||
}
|
||||
|
||||
def save_forensic_report_to_webhook(self, report: OrderedDict[str, Any]):
|
||||
def save_forensic_report_to_webhook(self, report: str):
|
||||
try:
|
||||
self._send_to_webhook(self.forensic_url, report)
|
||||
except Exception as error_:
|
||||
logger.error("Webhook Error: {0}".format(error_.__str__()))
|
||||
|
||||
def save_smtp_tls_report_to_webhook(self, report: OrderedDict[str, Any]):
|
||||
def save_smtp_tls_report_to_webhook(self, report: str):
|
||||
try:
|
||||
self._send_to_webhook(self.smtp_tls_url, report)
|
||||
except Exception as error_:
|
||||
logger.error("Webhook Error: {0}".format(error_.__str__()))
|
||||
|
||||
def save_aggregate_report_to_webhook(self, report: OrderedDict[str, Any]):
|
||||
def save_aggregate_report_to_webhook(self, report: str):
|
||||
try:
|
||||
self._send_to_webhook(self.aggregate_url, report)
|
||||
except Exception as error_:
|
||||
|
||||
@@ -2,6 +2,7 @@
|
||||
requires = [
|
||||
"hatchling>=1.27.0",
|
||||
]
|
||||
requires_python = ">=3.10,<3.15"
|
||||
build-backend = "hatchling.build"
|
||||
|
||||
[project]
|
||||
@@ -28,7 +29,7 @@ classifiers = [
|
||||
"Operating System :: OS Independent",
|
||||
"Programming Language :: Python :: 3"
|
||||
]
|
||||
requires-python = ">= 3.9"
|
||||
requires-python = ">=3.10"
|
||||
dependencies = [
|
||||
"azure-identity>=1.8.0",
|
||||
"azure-monitor-ingestion>=1.0.0",
|
||||
@@ -44,10 +45,10 @@ dependencies = [
|
||||
"google-auth-httplib2>=0.1.0",
|
||||
"google-auth-oauthlib>=0.4.6",
|
||||
"google-auth>=2.3.3",
|
||||
"imapclient>=2.1.0",
|
||||
"imapclient>=3.1.0",
|
||||
"kafka-python-ng>=2.2.2",
|
||||
"lxml>=4.4.0",
|
||||
"mailsuite>=1.9.18",
|
||||
"mailsuite>=1.11.2",
|
||||
"msgraph-core==0.2.2",
|
||||
"opensearch-py>=2.4.2,<=3.0.0",
|
||||
"publicsuffixlist>=0.10.0",
|
||||
|
||||
Reference in New Issue
Block a user