mirror of
https://github.com/domainaware/parsedmarc.git
synced 2026-03-04 22:06:26 +00:00
Compare commits
5 Commits
master
...
copilot/fi
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
eb2218b6fc | ||
|
|
3f2fc5f727 | ||
|
|
f94c28c770 | ||
|
|
c0f05b81b8 | ||
|
|
9c9ef2fa50 |
2
.github/workflows/python-tests.yml
vendored
2
.github/workflows/python-tests.yml
vendored
@@ -30,7 +30,7 @@ jobs:
|
|||||||
strategy:
|
strategy:
|
||||||
fail-fast: false
|
fail-fast: false
|
||||||
matrix:
|
matrix:
|
||||||
python-version: ["3.10", "3.11", "3.12", "3.13", "3.14"]
|
python-version: ["3.9", "3.10", "3.11", "3.12", "3.13"]
|
||||||
|
|
||||||
steps:
|
steps:
|
||||||
- uses: actions/checkout@v5
|
- uses: actions/checkout@v5
|
||||||
|
|||||||
64
AGENTS.md
64
AGENTS.md
@@ -1,64 +0,0 @@
|
|||||||
# AGENTS.md
|
|
||||||
|
|
||||||
This file provides guidance to AI agents when working with code in this repository.
|
|
||||||
|
|
||||||
## Project Overview
|
|
||||||
|
|
||||||
parsedmarc is a Python module and CLI utility for parsing DMARC aggregate (RUA), forensic (RUF), and SMTP TLS reports. It reads reports from IMAP, Microsoft Graph, Gmail API, Maildir, mbox files, or direct file paths, and outputs to JSON/CSV, Elasticsearch, OpenSearch, Splunk, Kafka, S3, Azure Log Analytics, syslog, or webhooks.
|
|
||||||
|
|
||||||
## Common Commands
|
|
||||||
|
|
||||||
```bash
|
|
||||||
# Install with dev/build dependencies
|
|
||||||
pip install .[build]
|
|
||||||
|
|
||||||
# Run all tests with coverage
|
|
||||||
pytest --cov --cov-report=xml tests.py
|
|
||||||
|
|
||||||
# Run a single test
|
|
||||||
pytest tests.py::Test::testAggregateSamples
|
|
||||||
|
|
||||||
# Lint and format
|
|
||||||
ruff check .
|
|
||||||
ruff format .
|
|
||||||
|
|
||||||
# Test CLI with sample reports
|
|
||||||
parsedmarc --debug -c ci.ini samples/aggregate/*
|
|
||||||
parsedmarc --debug -c ci.ini samples/forensic/*
|
|
||||||
|
|
||||||
# Build docs
|
|
||||||
cd docs && make html
|
|
||||||
|
|
||||||
# Build distribution
|
|
||||||
hatch build
|
|
||||||
```
|
|
||||||
|
|
||||||
To skip DNS lookups during testing, set `GITHUB_ACTIONS=true`.
|
|
||||||
|
|
||||||
## Architecture
|
|
||||||
|
|
||||||
**Data flow:** Input sources → CLI (`cli.py:_main`) → Parse (`__init__.py`) → Enrich (DNS/GeoIP via `utils.py`) → Output integrations
|
|
||||||
|
|
||||||
### Key modules
|
|
||||||
|
|
||||||
- `parsedmarc/__init__.py` — Core parsing logic. Main functions: `parse_report_file()`, `parse_report_email()`, `parse_aggregate_report_xml()`, `parse_forensic_report()`, `parse_smtp_tls_report_json()`, `get_dmarc_reports_from_mailbox()`, `watch_inbox()`
|
|
||||||
- `parsedmarc/cli.py` — CLI entry point (`_main`), config file parsing, output orchestration
|
|
||||||
- `parsedmarc/types.py` — TypedDict definitions for all report types (`AggregateReport`, `ForensicReport`, `SMTPTLSReport`, `ParsingResults`)
|
|
||||||
- `parsedmarc/utils.py` — IP/DNS/GeoIP enrichment, base64 decoding, compression handling
|
|
||||||
- `parsedmarc/mail/` — Polymorphic mail connections: `IMAPConnection`, `GmailConnection`, `MSGraphConnection`, `MaildirConnection`
|
|
||||||
- `parsedmarc/{elastic,opensearch,splunk,kafkaclient,loganalytics,syslog,s3,webhook,gelf}.py` — Output integrations
|
|
||||||
|
|
||||||
### Report type system
|
|
||||||
|
|
||||||
`ReportType = Literal["aggregate", "forensic", "smtp_tls"]`. Exception hierarchy: `ParserError` → `InvalidDMARCReport` → `InvalidAggregateReport`/`InvalidForensicReport`, and `InvalidSMTPTLSReport`.
|
|
||||||
|
|
||||||
### Caching
|
|
||||||
|
|
||||||
IP address info cached for 4 hours, seen aggregate report IDs cached for 1 hour (via `ExpiringDict`).
|
|
||||||
|
|
||||||
## Code Style
|
|
||||||
|
|
||||||
- Ruff for formatting and linting (configured in `.vscode/settings.json`)
|
|
||||||
- TypedDict for structured data, type hints throughout
|
|
||||||
- Python ≥3.10 required
|
|
||||||
- Tests are in a single `tests.py` file using unittest; sample reports live in `samples/`
|
|
||||||
28
CHANGELOG.md
28
CHANGELOG.md
@@ -1,33 +1,5 @@
|
|||||||
# Changelog
|
# Changelog
|
||||||
|
|
||||||
## 9.1.1
|
|
||||||
|
|
||||||
### Fixes
|
|
||||||
|
|
||||||
- Fix the use of Elasticsearch and OpenSearch API keys (PR #660 fixes issue #653)
|
|
||||||
|
|
||||||
### Changes
|
|
||||||
|
|
||||||
- Drop support for Python 3.9 (PR #661)
|
|
||||||
|
|
||||||
## 9.1.0
|
|
||||||
|
|
||||||
## Enhancements
|
|
||||||
|
|
||||||
- Add TCP and TLS support for syslog output. (#656)
|
|
||||||
- Skip DNS lookups in GitHub Actions to prevent DNS timeouts during tests timeouts. (#657)
|
|
||||||
- Remove microseconds from DMARC aggregate report time ranges before parsing them.
|
|
||||||
|
|
||||||
## 9.0.10
|
|
||||||
|
|
||||||
- Support Python 3.14+
|
|
||||||
|
|
||||||
## 9.0.9
|
|
||||||
|
|
||||||
### Fixes
|
|
||||||
|
|
||||||
- Validate that a string is base64-encoded before trying to base64 decode it. (PRs #648 and #649)
|
|
||||||
|
|
||||||
## 9.0.8
|
## 9.0.8
|
||||||
|
|
||||||
### Fixes
|
### Fixes
|
||||||
|
|||||||
@@ -56,9 +56,9 @@ for RHEL or Debian.
|
|||||||
| 3.6 | ❌ | Used in RHEL 8, but not supported by project dependencies |
|
| 3.6 | ❌ | Used in RHEL 8, but not supported by project dependencies |
|
||||||
| 3.7 | ❌ | End of Life (EOL) |
|
| 3.7 | ❌ | End of Life (EOL) |
|
||||||
| 3.8 | ❌ | End of Life (EOL) |
|
| 3.8 | ❌ | End of Life (EOL) |
|
||||||
| 3.9 | ❌ | Used in Debian 11 and RHEL 9, but not supported by project dependencies |
|
| 3.9 | ✅ | Supported until August 2026 (Debian 11); May 2032 (RHEL 9) |
|
||||||
| 3.10 | ✅ | Actively maintained |
|
| 3.10 | ✅ | Actively maintained |
|
||||||
| 3.11 | ✅ | Actively maintained; supported until June 2028 (Debian 12) |
|
| 3.11 | ✅ | Actively maintained; supported until June 2028 (Debian 12) |
|
||||||
| 3.12 | ✅ | Actively maintained; supported until May 2035 (RHEL 10) |
|
| 3.12 | ✅ | Actively maintained; supported until May 2035 (RHEL 10) |
|
||||||
| 3.13 | ✅ | Actively maintained; supported until June 2030 (Debian 13) |
|
| 3.13 | ✅ | Actively maintained; supported until June 2030 (Debian 13) |
|
||||||
| 3.14 | ✅ | Supported (requires `imapclient>=3.1.0`) |
|
| 3.14 | ❌ | Not currently supported due to [this imapclient bug](https://github.com/mjs/imapclient/issues/618)|
|
||||||
|
|||||||
1
ci.ini
1
ci.ini
@@ -3,7 +3,6 @@ save_aggregate = True
|
|||||||
save_forensic = True
|
save_forensic = True
|
||||||
save_smtp_tls = True
|
save_smtp_tls = True
|
||||||
debug = True
|
debug = True
|
||||||
offline = True
|
|
||||||
|
|
||||||
[elasticsearch]
|
[elasticsearch]
|
||||||
hosts = http://localhost:9200
|
hosts = http://localhost:9200
|
||||||
|
|||||||
@@ -29,3 +29,14 @@ token_file = /etc/example/token.json
|
|||||||
include_spam_trash = True
|
include_spam_trash = True
|
||||||
paginate_messages = True
|
paginate_messages = True
|
||||||
scopes = https://www.googleapis.com/auth/gmail.modify
|
scopes = https://www.googleapis.com/auth/gmail.modify
|
||||||
|
|
||||||
|
[msgraph]
|
||||||
|
auth_method = ClientSecret
|
||||||
|
client_id = 12345678-90ab-cdef-1234-567890abcdef
|
||||||
|
client_secret = your-client-secret-here
|
||||||
|
tenant_id = 12345678-90ab-cdef-1234-567890abcdef
|
||||||
|
mailbox = dmarc-reports@example.com
|
||||||
|
# Use standard folder names - they work across all locales
|
||||||
|
# and avoid "Default folder Root not found" errors
|
||||||
|
reports_folder = Inbox
|
||||||
|
archive_folder = Archive
|
||||||
|
|||||||
@@ -56,12 +56,12 @@ for RHEL or Debian.
|
|||||||
| 3.6 | ❌ | Used in RHEL 8, but not supported by project dependencies |
|
| 3.6 | ❌ | Used in RHEL 8, but not supported by project dependencies |
|
||||||
| 3.7 | ❌ | End of Life (EOL) |
|
| 3.7 | ❌ | End of Life (EOL) |
|
||||||
| 3.8 | ❌ | End of Life (EOL) |
|
| 3.8 | ❌ | End of Life (EOL) |
|
||||||
| 3.9 | ❌ | Used in Debian 11 and RHEL 9, but not supported by project dependencies |
|
| 3.9 | ✅ | Supported until August 2026 (Debian 11); May 2032 (RHEL 9) |
|
||||||
| 3.10 | ✅ | Actively maintained |
|
| 3.10 | ✅ | Actively maintained |
|
||||||
| 3.11 | ✅ | Actively maintained; supported until June 2028 (Debian 12) |
|
| 3.11 | ✅ | Actively maintained; supported until June 2028 (Debian 12) |
|
||||||
| 3.12 | ✅ | Actively maintained; supported until May 2035 (RHEL 10) |
|
| 3.12 | ✅ | Actively maintained; supported until May 2035 (RHEL 10) |
|
||||||
| 3.13 | ✅ | Actively maintained; supported until June 2030 (Debian 13) |
|
| 3.13 | ✅ | Actively maintained; supported until June 2030 (Debian 13) |
|
||||||
| 3.14 | ✅ | Supported (requires `imapclient>=3.1.0`) |
|
| 3.14 | ❌ | Not currently supported due to [this imapclient bug](https://github.com/mjs/imapclient/issues/618)|
|
||||||
|
|
||||||
```{toctree}
|
```{toctree}
|
||||||
:caption: 'Contents'
|
:caption: 'Contents'
|
||||||
|
|||||||
@@ -162,10 +162,10 @@ sudo -u parsedmarc virtualenv /opt/parsedmarc/venv
|
|||||||
```
|
```
|
||||||
|
|
||||||
CentOS/RHEL 8 systems use Python 3.6 by default, so on those systems
|
CentOS/RHEL 8 systems use Python 3.6 by default, so on those systems
|
||||||
explicitly tell `virtualenv` to use `python3.10` instead
|
explicitly tell `virtualenv` to use `python3.9` instead
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
sudo -u parsedmarc virtualenv -p python3.10 /opt/parsedmarc/venv
|
sudo -u parsedmarc virtualenv -p python3.9 /opt/parsedmarc/venv
|
||||||
```
|
```
|
||||||
|
|
||||||
Activate the virtualenv
|
Activate the virtualenv
|
||||||
|
|||||||
@@ -229,6 +229,18 @@ The full set of configuration options are:
|
|||||||
username, you must grant the app `Mail.ReadWrite.Shared`.
|
username, you must grant the app `Mail.ReadWrite.Shared`.
|
||||||
:::
|
:::
|
||||||
|
|
||||||
|
:::{tip}
|
||||||
|
When configuring folder names (e.g., `reports_folder`, `archive_folder`),
|
||||||
|
you can use standard folder names like `Inbox`, `Archive`, `Sent Items`, etc.
|
||||||
|
These will be automatically mapped to Microsoft Graph's well-known folder names,
|
||||||
|
which works reliably across different mailbox locales and avoids issues with
|
||||||
|
uninitialized or shared mailboxes. Supported folder names include:
|
||||||
|
- English: Inbox, Sent Items, Deleted Items, Drafts, Junk Email, Archive, Outbox
|
||||||
|
- German: Posteingang, Gesendete Elemente, Gelöschte Elemente, Entwürfe, Junk-E-Mail, Archiv
|
||||||
|
- French: Boîte de réception, Éléments envoyés, Éléments supprimés, Brouillons, Courrier indésirable, Archives
|
||||||
|
- Spanish: Bandeja de entrada, Elementos enviados, Elementos eliminados, Borradores, Correo no deseado
|
||||||
|
:::
|
||||||
|
|
||||||
:::{warning}
|
:::{warning}
|
||||||
If you are using the `ClientSecret` auth method, you need to
|
If you are using the `ClientSecret` auth method, you need to
|
||||||
grant the `Mail.ReadWrite` (application) permission to the
|
grant the `Mail.ReadWrite` (application) permission to the
|
||||||
@@ -336,59 +348,7 @@ The full set of configuration options are:
|
|||||||
- `secret_access_key` - str: The secret access key (Optional)
|
- `secret_access_key` - str: The secret access key (Optional)
|
||||||
- `syslog`
|
- `syslog`
|
||||||
- `server` - str: The Syslog server name or IP address
|
- `server` - str: The Syslog server name or IP address
|
||||||
- `port` - int: The port to use (Default: `514`)
|
- `port` - int: The UDP port to use (Default: `514`)
|
||||||
- `protocol` - str: The protocol to use: `udp`, `tcp`, or `tls` (Default: `udp`)
|
|
||||||
- `cafile_path` - str: Path to CA certificate file for TLS server verification (Optional)
|
|
||||||
- `certfile_path` - str: Path to client certificate file for TLS authentication (Optional)
|
|
||||||
- `keyfile_path` - str: Path to client private key file for TLS authentication (Optional)
|
|
||||||
- `timeout` - float: Connection timeout in seconds for TCP/TLS (Default: `5.0`)
|
|
||||||
- `retry_attempts` - int: Number of retry attempts for failed connections (Default: `3`)
|
|
||||||
- `retry_delay` - int: Delay in seconds between retry attempts (Default: `5`)
|
|
||||||
|
|
||||||
**Example UDP configuration (default):**
|
|
||||||
|
|
||||||
```ini
|
|
||||||
[syslog]
|
|
||||||
server = syslog.example.com
|
|
||||||
port = 514
|
|
||||||
```
|
|
||||||
|
|
||||||
**Example TCP configuration:**
|
|
||||||
|
|
||||||
```ini
|
|
||||||
[syslog]
|
|
||||||
server = syslog.example.com
|
|
||||||
port = 6514
|
|
||||||
protocol = tcp
|
|
||||||
timeout = 10.0
|
|
||||||
retry_attempts = 5
|
|
||||||
```
|
|
||||||
|
|
||||||
**Example TLS configuration with server verification:**
|
|
||||||
|
|
||||||
```ini
|
|
||||||
[syslog]
|
|
||||||
server = syslog.example.com
|
|
||||||
port = 6514
|
|
||||||
protocol = tls
|
|
||||||
cafile_path = /path/to/ca-cert.pem
|
|
||||||
timeout = 10.0
|
|
||||||
```
|
|
||||||
|
|
||||||
**Example TLS configuration with mutual authentication:**
|
|
||||||
|
|
||||||
```ini
|
|
||||||
[syslog]
|
|
||||||
server = syslog.example.com
|
|
||||||
port = 6514
|
|
||||||
protocol = tls
|
|
||||||
cafile_path = /path/to/ca-cert.pem
|
|
||||||
certfile_path = /path/to/client-cert.pem
|
|
||||||
keyfile_path = /path/to/client-key.pem
|
|
||||||
timeout = 10.0
|
|
||||||
retry_attempts = 3
|
|
||||||
retry_delay = 5
|
|
||||||
```
|
|
||||||
- `gmail_api`
|
- `gmail_api`
|
||||||
- `credentials_file` - str: Path to file containing the
|
- `credentials_file` - str: Path to file containing the
|
||||||
credentials, None to disable (Default: `None`)
|
credentials, None to disable (Default: `None`)
|
||||||
|
|||||||
File diff suppressed because one or more lines are too long
@@ -751,8 +751,8 @@ def parse_aggregate_report_xml(
|
|||||||
new_report_metadata["report_id"] = report_id
|
new_report_metadata["report_id"] = report_id
|
||||||
date_range = report["report_metadata"]["date_range"]
|
date_range = report["report_metadata"]["date_range"]
|
||||||
|
|
||||||
begin_ts = int(date_range["begin"].split(".")[0])
|
begin_ts = int(date_range["begin"])
|
||||||
end_ts = int(date_range["end"].split(".")[0])
|
end_ts = int(date_range["end"])
|
||||||
span_seconds = end_ts - begin_ts
|
span_seconds = end_ts - begin_ts
|
||||||
|
|
||||||
normalize_timespan = span_seconds > normalize_timespan_threshold_hours * 3600
|
normalize_timespan = span_seconds > normalize_timespan_threshold_hours * 3600
|
||||||
@@ -892,11 +892,7 @@ def extract_report(content: Union[bytes, str, BinaryIO]) -> str:
|
|||||||
try:
|
try:
|
||||||
if isinstance(content, str):
|
if isinstance(content, str):
|
||||||
try:
|
try:
|
||||||
file_object = BytesIO(
|
file_object = BytesIO(b64decode(content))
|
||||||
b64decode(
|
|
||||||
content.replace("\n", "").replace("\r", ""), validate=True
|
|
||||||
)
|
|
||||||
)
|
|
||||||
except binascii.Error:
|
except binascii.Error:
|
||||||
return content
|
return content
|
||||||
header = file_object.read(6)
|
header = file_object.read(6)
|
||||||
|
|||||||
@@ -697,13 +697,6 @@ def _main():
|
|||||||
s3_secret_access_key=None,
|
s3_secret_access_key=None,
|
||||||
syslog_server=None,
|
syslog_server=None,
|
||||||
syslog_port=None,
|
syslog_port=None,
|
||||||
syslog_protocol=None,
|
|
||||||
syslog_cafile_path=None,
|
|
||||||
syslog_certfile_path=None,
|
|
||||||
syslog_keyfile_path=None,
|
|
||||||
syslog_timeout=None,
|
|
||||||
syslog_retry_attempts=None,
|
|
||||||
syslog_retry_delay=None,
|
|
||||||
gmail_api_credentials_file=None,
|
gmail_api_credentials_file=None,
|
||||||
gmail_api_token_file=None,
|
gmail_api_token_file=None,
|
||||||
gmail_api_include_spam_trash=False,
|
gmail_api_include_spam_trash=False,
|
||||||
@@ -1058,10 +1051,10 @@ def _main():
|
|||||||
opts.elasticsearch_password = elasticsearch_config["password"]
|
opts.elasticsearch_password = elasticsearch_config["password"]
|
||||||
# Until 8.20
|
# Until 8.20
|
||||||
if "apiKey" in elasticsearch_config:
|
if "apiKey" in elasticsearch_config:
|
||||||
opts.elasticsearch_api_key = elasticsearch_config["apiKey"]
|
opts.elasticsearch_apiKey = elasticsearch_config["apiKey"]
|
||||||
# Since 8.20
|
# Since 8.20
|
||||||
if "api_key" in elasticsearch_config:
|
if "api_key" in elasticsearch_config:
|
||||||
opts.elasticsearch_api_key = elasticsearch_config["api_key"]
|
opts.elasticsearch_apiKey = elasticsearch_config["api_key"]
|
||||||
|
|
||||||
if "opensearch" in config:
|
if "opensearch" in config:
|
||||||
opensearch_config = config["opensearch"]
|
opensearch_config = config["opensearch"]
|
||||||
@@ -1098,10 +1091,10 @@ def _main():
|
|||||||
opts.opensearch_password = opensearch_config["password"]
|
opts.opensearch_password = opensearch_config["password"]
|
||||||
# Until 8.20
|
# Until 8.20
|
||||||
if "apiKey" in opensearch_config:
|
if "apiKey" in opensearch_config:
|
||||||
opts.opensearch_api_key = opensearch_config["apiKey"]
|
opts.opensearch_apiKey = opensearch_config["apiKey"]
|
||||||
# Since 8.20
|
# Since 8.20
|
||||||
if "api_key" in opensearch_config:
|
if "api_key" in opensearch_config:
|
||||||
opts.opensearch_api_key = opensearch_config["api_key"]
|
opts.opensearch_apiKey = opensearch_config["api_key"]
|
||||||
|
|
||||||
if "splunk_hec" in config.sections():
|
if "splunk_hec" in config.sections():
|
||||||
hec_config = config["splunk_hec"]
|
hec_config = config["splunk_hec"]
|
||||||
@@ -1246,28 +1239,6 @@ def _main():
|
|||||||
opts.syslog_port = syslog_config["port"]
|
opts.syslog_port = syslog_config["port"]
|
||||||
else:
|
else:
|
||||||
opts.syslog_port = 514
|
opts.syslog_port = 514
|
||||||
if "protocol" in syslog_config:
|
|
||||||
opts.syslog_protocol = syslog_config["protocol"]
|
|
||||||
else:
|
|
||||||
opts.syslog_protocol = "udp"
|
|
||||||
if "cafile_path" in syslog_config:
|
|
||||||
opts.syslog_cafile_path = syslog_config["cafile_path"]
|
|
||||||
if "certfile_path" in syslog_config:
|
|
||||||
opts.syslog_certfile_path = syslog_config["certfile_path"]
|
|
||||||
if "keyfile_path" in syslog_config:
|
|
||||||
opts.syslog_keyfile_path = syslog_config["keyfile_path"]
|
|
||||||
if "timeout" in syslog_config:
|
|
||||||
opts.syslog_timeout = float(syslog_config["timeout"])
|
|
||||||
else:
|
|
||||||
opts.syslog_timeout = 5.0
|
|
||||||
if "retry_attempts" in syslog_config:
|
|
||||||
opts.syslog_retry_attempts = int(syslog_config["retry_attempts"])
|
|
||||||
else:
|
|
||||||
opts.syslog_retry_attempts = 3
|
|
||||||
if "retry_delay" in syslog_config:
|
|
||||||
opts.syslog_retry_delay = int(syslog_config["retry_delay"])
|
|
||||||
else:
|
|
||||||
opts.syslog_retry_delay = 5
|
|
||||||
|
|
||||||
if "gmail_api" in config.sections():
|
if "gmail_api" in config.sections():
|
||||||
gmail_api_config = config["gmail_api"]
|
gmail_api_config = config["gmail_api"]
|
||||||
@@ -1465,17 +1436,6 @@ def _main():
|
|||||||
syslog_client = syslog.SyslogClient(
|
syslog_client = syslog.SyslogClient(
|
||||||
server_name=opts.syslog_server,
|
server_name=opts.syslog_server,
|
||||||
server_port=int(opts.syslog_port),
|
server_port=int(opts.syslog_port),
|
||||||
protocol=opts.syslog_protocol or "udp",
|
|
||||||
cafile_path=opts.syslog_cafile_path,
|
|
||||||
certfile_path=opts.syslog_certfile_path,
|
|
||||||
keyfile_path=opts.syslog_keyfile_path,
|
|
||||||
timeout=opts.syslog_timeout if opts.syslog_timeout is not None else 5.0,
|
|
||||||
retry_attempts=opts.syslog_retry_attempts
|
|
||||||
if opts.syslog_retry_attempts is not None
|
|
||||||
else 3,
|
|
||||||
retry_delay=opts.syslog_retry_delay
|
|
||||||
if opts.syslog_retry_delay is not None
|
|
||||||
else 5,
|
|
||||||
)
|
)
|
||||||
except Exception as error_:
|
except Exception as error_:
|
||||||
logger.error("Syslog Error: {0}".format(error_.__str__()))
|
logger.error("Syslog Error: {0}".format(error_.__str__()))
|
||||||
|
|||||||
@@ -1,3 +1,3 @@
|
|||||||
__version__ = "9.1.1"
|
__version__ = "9.0.8"
|
||||||
|
|
||||||
USER_AGENT = f"parsedmarc/{__version__}"
|
USER_AGENT = f"parsedmarc/{__version__}"
|
||||||
|
|||||||
@@ -20,6 +20,59 @@ from msgraph.core import GraphClient
|
|||||||
from parsedmarc.log import logger
|
from parsedmarc.log import logger
|
||||||
from parsedmarc.mail.mailbox_connection import MailboxConnection
|
from parsedmarc.mail.mailbox_connection import MailboxConnection
|
||||||
|
|
||||||
|
# Mapping of common folder names to Microsoft Graph well-known folder names
|
||||||
|
# This avoids the "Default folder Root not found" error on uninitialized mailboxes
|
||||||
|
WELL_KNOWN_FOLDER_MAP = {
|
||||||
|
# English names
|
||||||
|
"inbox": "inbox",
|
||||||
|
"sent items": "sentitems",
|
||||||
|
"sent": "sentitems",
|
||||||
|
"sentitems": "sentitems",
|
||||||
|
"deleted items": "deleteditems",
|
||||||
|
"deleted": "deleteditems",
|
||||||
|
"deleteditems": "deleteditems",
|
||||||
|
"trash": "deleteditems",
|
||||||
|
"drafts": "drafts",
|
||||||
|
"junk email": "junkemail",
|
||||||
|
"junk": "junkemail",
|
||||||
|
"junkemail": "junkemail",
|
||||||
|
"spam": "junkemail",
|
||||||
|
"archive": "archive",
|
||||||
|
"outbox": "outbox",
|
||||||
|
"conversation history": "conversationhistory",
|
||||||
|
"conversationhistory": "conversationhistory",
|
||||||
|
# German names
|
||||||
|
"posteingang": "inbox",
|
||||||
|
"gesendete elemente": "sentitems",
|
||||||
|
"gesendet": "sentitems",
|
||||||
|
"gelöschte elemente": "deleteditems",
|
||||||
|
"gelöscht": "deleteditems",
|
||||||
|
"entwürfe": "drafts",
|
||||||
|
"junk-e-mail": "junkemail",
|
||||||
|
"archiv": "archive",
|
||||||
|
"postausgang": "outbox",
|
||||||
|
# French names
|
||||||
|
"boîte de réception": "inbox",
|
||||||
|
"éléments envoyés": "sentitems",
|
||||||
|
"envoyés": "sentitems",
|
||||||
|
"éléments supprimés": "deleteditems",
|
||||||
|
"supprimés": "deleteditems",
|
||||||
|
"brouillons": "drafts",
|
||||||
|
"courrier indésirable": "junkemail",
|
||||||
|
"archives": "archive",
|
||||||
|
"boîte d'envoi": "outbox",
|
||||||
|
# Spanish names
|
||||||
|
"bandeja de entrada": "inbox",
|
||||||
|
"elementos enviados": "sentitems",
|
||||||
|
"enviados": "sentitems",
|
||||||
|
"elementos eliminados": "deleteditems",
|
||||||
|
"eliminados": "deleteditems",
|
||||||
|
"borradores": "drafts",
|
||||||
|
"correo no deseado": "junkemail",
|
||||||
|
"archivar": "archive",
|
||||||
|
"bandeja de salida": "outbox",
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
class AuthMethod(Enum):
|
class AuthMethod(Enum):
|
||||||
DeviceCode = 1
|
DeviceCode = 1
|
||||||
@@ -130,6 +183,13 @@ class MSGraphConnection(MailboxConnection):
|
|||||||
self.mailbox_name = mailbox
|
self.mailbox_name = mailbox
|
||||||
|
|
||||||
def create_folder(self, folder_name: str):
|
def create_folder(self, folder_name: str):
|
||||||
|
# Check if this is a well-known folder - they already exist and cannot be created
|
||||||
|
if "/" not in folder_name:
|
||||||
|
well_known_name = WELL_KNOWN_FOLDER_MAP.get(folder_name.lower())
|
||||||
|
if well_known_name:
|
||||||
|
logger.debug(f"Folder '{folder_name}' is a well-known folder, skipping creation")
|
||||||
|
return
|
||||||
|
|
||||||
sub_url = ""
|
sub_url = ""
|
||||||
path_parts = folder_name.split("/")
|
path_parts = folder_name.split("/")
|
||||||
if len(path_parts) > 1: # Folder is a subFolder
|
if len(path_parts) > 1: # Folder is a subFolder
|
||||||
@@ -246,6 +306,12 @@ class MSGraphConnection(MailboxConnection):
|
|||||||
parent_folder_id = folder_id
|
parent_folder_id = folder_id
|
||||||
return self._find_folder_id_with_parent(path_parts[-1], parent_folder_id)
|
return self._find_folder_id_with_parent(path_parts[-1], parent_folder_id)
|
||||||
else:
|
else:
|
||||||
|
# Check if this is a well-known folder name (case-insensitive)
|
||||||
|
well_known_name = WELL_KNOWN_FOLDER_MAP.get(folder_name.lower())
|
||||||
|
if well_known_name:
|
||||||
|
# Use well-known folder name directly to avoid querying uninitialized mailboxes
|
||||||
|
logger.debug(f"Using well-known folder name '{well_known_name}' for '{folder_name}'")
|
||||||
|
return well_known_name
|
||||||
return self._find_folder_id_with_parent(folder_name, None)
|
return self._find_folder_id_with_parent(folder_name, None)
|
||||||
|
|
||||||
def _find_folder_id_with_parent(
|
def _find_folder_id_with_parent(
|
||||||
|
|||||||
@@ -6,10 +6,7 @@ from __future__ import annotations
|
|||||||
import json
|
import json
|
||||||
import logging
|
import logging
|
||||||
import logging.handlers
|
import logging.handlers
|
||||||
import socket
|
from typing import Any
|
||||||
import ssl
|
|
||||||
import time
|
|
||||||
from typing import Any, Optional
|
|
||||||
|
|
||||||
from parsedmarc import (
|
from parsedmarc import (
|
||||||
parsed_aggregate_reports_to_csv_rows,
|
parsed_aggregate_reports_to_csv_rows,
|
||||||
@@ -21,150 +18,20 @@ from parsedmarc import (
|
|||||||
class SyslogClient(object):
|
class SyslogClient(object):
|
||||||
"""A client for Syslog"""
|
"""A client for Syslog"""
|
||||||
|
|
||||||
def __init__(
|
def __init__(self, server_name: str, server_port: int):
|
||||||
self,
|
|
||||||
server_name: str,
|
|
||||||
server_port: int,
|
|
||||||
protocol: str = "udp",
|
|
||||||
cafile_path: Optional[str] = None,
|
|
||||||
certfile_path: Optional[str] = None,
|
|
||||||
keyfile_path: Optional[str] = None,
|
|
||||||
timeout: float = 5.0,
|
|
||||||
retry_attempts: int = 3,
|
|
||||||
retry_delay: int = 5,
|
|
||||||
):
|
|
||||||
"""
|
"""
|
||||||
Initializes the SyslogClient
|
Initializes the SyslogClient
|
||||||
Args:
|
Args:
|
||||||
server_name (str): The Syslog server
|
server_name (str): The Syslog server
|
||||||
server_port (int): The Syslog port
|
server_port (int): The Syslog UDP port
|
||||||
protocol (str): The protocol to use: "udp", "tcp", or "tls" (Default: "udp")
|
|
||||||
cafile_path (str): Path to CA certificate file for TLS server verification (Optional)
|
|
||||||
certfile_path (str): Path to client certificate file for TLS authentication (Optional)
|
|
||||||
keyfile_path (str): Path to client private key file for TLS authentication (Optional)
|
|
||||||
timeout (float): Connection timeout in seconds for TCP/TLS (Default: 5.0)
|
|
||||||
retry_attempts (int): Number of retry attempts for failed connections (Default: 3)
|
|
||||||
retry_delay (int): Delay in seconds between retry attempts (Default: 5)
|
|
||||||
"""
|
"""
|
||||||
self.server_name = server_name
|
self.server_name = server_name
|
||||||
self.server_port = server_port
|
self.server_port = server_port
|
||||||
self.protocol = protocol.lower()
|
|
||||||
self.timeout = timeout
|
|
||||||
self.retry_attempts = retry_attempts
|
|
||||||
self.retry_delay = retry_delay
|
|
||||||
|
|
||||||
self.logger = logging.getLogger("parsedmarc_syslog")
|
self.logger = logging.getLogger("parsedmarc_syslog")
|
||||||
self.logger.setLevel(logging.INFO)
|
self.logger.setLevel(logging.INFO)
|
||||||
|
log_handler = logging.handlers.SysLogHandler(address=(server_name, server_port))
|
||||||
# Create the appropriate syslog handler based on protocol
|
|
||||||
log_handler = self._create_syslog_handler(
|
|
||||||
server_name,
|
|
||||||
server_port,
|
|
||||||
self.protocol,
|
|
||||||
cafile_path,
|
|
||||||
certfile_path,
|
|
||||||
keyfile_path,
|
|
||||||
timeout,
|
|
||||||
retry_attempts,
|
|
||||||
retry_delay,
|
|
||||||
)
|
|
||||||
|
|
||||||
self.logger.addHandler(log_handler)
|
self.logger.addHandler(log_handler)
|
||||||
|
|
||||||
def _create_syslog_handler(
|
|
||||||
self,
|
|
||||||
server_name: str,
|
|
||||||
server_port: int,
|
|
||||||
protocol: str,
|
|
||||||
cafile_path: Optional[str],
|
|
||||||
certfile_path: Optional[str],
|
|
||||||
keyfile_path: Optional[str],
|
|
||||||
timeout: float,
|
|
||||||
retry_attempts: int,
|
|
||||||
retry_delay: int,
|
|
||||||
) -> logging.handlers.SysLogHandler:
|
|
||||||
"""
|
|
||||||
Creates a SysLogHandler with the specified protocol and TLS settings
|
|
||||||
"""
|
|
||||||
if protocol == "udp":
|
|
||||||
# UDP protocol (default, backward compatible)
|
|
||||||
return logging.handlers.SysLogHandler(
|
|
||||||
address=(server_name, server_port),
|
|
||||||
socktype=socket.SOCK_DGRAM,
|
|
||||||
)
|
|
||||||
elif protocol in ["tcp", "tls"]:
|
|
||||||
# TCP or TLS protocol with retry logic
|
|
||||||
for attempt in range(1, retry_attempts + 1):
|
|
||||||
try:
|
|
||||||
if protocol == "tcp":
|
|
||||||
# TCP without TLS
|
|
||||||
handler = logging.handlers.SysLogHandler(
|
|
||||||
address=(server_name, server_port),
|
|
||||||
socktype=socket.SOCK_STREAM,
|
|
||||||
)
|
|
||||||
# Set timeout on the socket
|
|
||||||
if hasattr(handler, "socket") and handler.socket:
|
|
||||||
handler.socket.settimeout(timeout)
|
|
||||||
return handler
|
|
||||||
else:
|
|
||||||
# TLS protocol
|
|
||||||
# Create SSL context with secure defaults
|
|
||||||
ssl_context = ssl.create_default_context()
|
|
||||||
|
|
||||||
# Explicitly set minimum TLS version to 1.2 for security
|
|
||||||
ssl_context.minimum_version = ssl.TLSVersion.TLSv1_2
|
|
||||||
|
|
||||||
# Configure server certificate verification
|
|
||||||
if cafile_path:
|
|
||||||
ssl_context.load_verify_locations(cafile=cafile_path)
|
|
||||||
|
|
||||||
# Configure client certificate authentication
|
|
||||||
if certfile_path and keyfile_path:
|
|
||||||
ssl_context.load_cert_chain(
|
|
||||||
certfile=certfile_path,
|
|
||||||
keyfile=keyfile_path,
|
|
||||||
)
|
|
||||||
elif certfile_path or keyfile_path:
|
|
||||||
# Warn if only one of the two required parameters is provided
|
|
||||||
self.logger.warning(
|
|
||||||
"Both certfile_path and keyfile_path are required for "
|
|
||||||
"client certificate authentication. Client authentication "
|
|
||||||
"will not be used."
|
|
||||||
)
|
|
||||||
|
|
||||||
# Create TCP handler first
|
|
||||||
handler = logging.handlers.SysLogHandler(
|
|
||||||
address=(server_name, server_port),
|
|
||||||
socktype=socket.SOCK_STREAM,
|
|
||||||
)
|
|
||||||
|
|
||||||
# Wrap socket with TLS
|
|
||||||
if hasattr(handler, "socket") and handler.socket:
|
|
||||||
handler.socket = ssl_context.wrap_socket(
|
|
||||||
handler.socket,
|
|
||||||
server_hostname=server_name,
|
|
||||||
)
|
|
||||||
handler.socket.settimeout(timeout)
|
|
||||||
|
|
||||||
return handler
|
|
||||||
|
|
||||||
except Exception as e:
|
|
||||||
if attempt < retry_attempts:
|
|
||||||
self.logger.warning(
|
|
||||||
f"Syslog connection attempt {attempt}/{retry_attempts} failed: {e}. "
|
|
||||||
f"Retrying in {retry_delay} seconds..."
|
|
||||||
)
|
|
||||||
time.sleep(retry_delay)
|
|
||||||
else:
|
|
||||||
self.logger.error(
|
|
||||||
f"Syslog connection failed after {retry_attempts} attempts: {e}"
|
|
||||||
)
|
|
||||||
raise
|
|
||||||
else:
|
|
||||||
raise ValueError(
|
|
||||||
f"Invalid protocol '{protocol}'. Must be 'udp', 'tcp', or 'tls'."
|
|
||||||
)
|
|
||||||
|
|
||||||
def save_aggregate_report_to_syslog(self, aggregate_reports: list[dict[str, Any]]):
|
def save_aggregate_report_to_syslog(self, aggregate_reports: list[dict[str, Any]]):
|
||||||
rows = parsed_aggregate_reports_to_csv_rows(aggregate_reports)
|
rows = parsed_aggregate_reports_to_csv_rows(aggregate_reports)
|
||||||
for row in rows:
|
for row in rows:
|
||||||
|
|||||||
@@ -2,7 +2,7 @@ from __future__ import annotations
|
|||||||
|
|
||||||
from typing import Any, Dict, List, Literal, Optional, TypedDict, Union
|
from typing import Any, Dict, List, Literal, Optional, TypedDict, Union
|
||||||
|
|
||||||
# NOTE: This module is intentionally Python 3.10 compatible.
|
# NOTE: This module is intentionally Python 3.9 compatible.
|
||||||
# - No PEP 604 unions (A | B)
|
# - No PEP 604 unions (A | B)
|
||||||
# - No typing.NotRequired / Required (3.11+) to avoid an extra dependency.
|
# - No typing.NotRequired / Required (3.11+) to avoid an extra dependency.
|
||||||
# For optional keys, use total=False TypedDicts.
|
# For optional keys, use total=False TypedDicts.
|
||||||
|
|||||||
@@ -2,7 +2,7 @@
|
|||||||
requires = [
|
requires = [
|
||||||
"hatchling>=1.27.0",
|
"hatchling>=1.27.0",
|
||||||
]
|
]
|
||||||
requires_python = ">=3.10,<3.15"
|
requires_python = ">=3.9,<3.14"
|
||||||
build-backend = "hatchling.build"
|
build-backend = "hatchling.build"
|
||||||
|
|
||||||
[project]
|
[project]
|
||||||
@@ -29,7 +29,7 @@ classifiers = [
|
|||||||
"Operating System :: OS Independent",
|
"Operating System :: OS Independent",
|
||||||
"Programming Language :: Python :: 3"
|
"Programming Language :: Python :: 3"
|
||||||
]
|
]
|
||||||
requires-python = ">=3.10"
|
requires-python = ">=3.9, <3.14"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"azure-identity>=1.8.0",
|
"azure-identity>=1.8.0",
|
||||||
"azure-monitor-ingestion>=1.0.0",
|
"azure-monitor-ingestion>=1.0.0",
|
||||||
@@ -45,10 +45,10 @@ dependencies = [
|
|||||||
"google-auth-httplib2>=0.1.0",
|
"google-auth-httplib2>=0.1.0",
|
||||||
"google-auth-oauthlib>=0.4.6",
|
"google-auth-oauthlib>=0.4.6",
|
||||||
"google-auth>=2.3.3",
|
"google-auth>=2.3.3",
|
||||||
"imapclient>=3.1.0",
|
"imapclient>=2.1.0",
|
||||||
"kafka-python-ng>=2.2.2",
|
"kafka-python-ng>=2.2.2",
|
||||||
"lxml>=4.4.0",
|
"lxml>=4.4.0",
|
||||||
"mailsuite>=1.11.2",
|
"mailsuite>=1.11.1",
|
||||||
"msgraph-core==0.2.2",
|
"msgraph-core==0.2.2",
|
||||||
"opensearch-py>=2.4.2,<=3.0.0",
|
"opensearch-py>=2.4.2,<=3.0.0",
|
||||||
"publicsuffixlist>=0.10.0",
|
"publicsuffixlist>=0.10.0",
|
||||||
|
|||||||
45
tests.py
45
tests.py
@@ -12,9 +12,6 @@ from lxml import etree
|
|||||||
import parsedmarc
|
import parsedmarc
|
||||||
import parsedmarc.utils
|
import parsedmarc.utils
|
||||||
|
|
||||||
# Detect if running in GitHub Actions to skip DNS lookups
|
|
||||||
OFFLINE_MODE = os.environ.get("GITHUB_ACTIONS", "false").lower() == "true"
|
|
||||||
|
|
||||||
|
|
||||||
def minify_xml(xml_string):
|
def minify_xml(xml_string):
|
||||||
parser = etree.XMLParser(remove_blank_text=True)
|
parser = etree.XMLParser(remove_blank_text=True)
|
||||||
@@ -124,7 +121,7 @@ class Test(unittest.TestCase):
|
|||||||
continue
|
continue
|
||||||
print("Testing {0}: ".format(sample_path), end="")
|
print("Testing {0}: ".format(sample_path), end="")
|
||||||
parsed_report = parsedmarc.parse_report_file(
|
parsed_report = parsedmarc.parse_report_file(
|
||||||
sample_path, always_use_local_files=True, offline=OFFLINE_MODE
|
sample_path, always_use_local_files=True
|
||||||
)["report"]
|
)["report"]
|
||||||
parsedmarc.parsed_aggregate_reports_to_csv(parsed_report)
|
parsedmarc.parsed_aggregate_reports_to_csv(parsed_report)
|
||||||
print("Passed!")
|
print("Passed!")
|
||||||
@@ -132,7 +129,7 @@ class Test(unittest.TestCase):
|
|||||||
def testEmptySample(self):
|
def testEmptySample(self):
|
||||||
"""Test empty/unparasable report"""
|
"""Test empty/unparasable report"""
|
||||||
with self.assertRaises(parsedmarc.ParserError):
|
with self.assertRaises(parsedmarc.ParserError):
|
||||||
parsedmarc.parse_report_file("samples/empty.xml", offline=OFFLINE_MODE)
|
parsedmarc.parse_report_file("samples/empty.xml")
|
||||||
|
|
||||||
def testForensicSamples(self):
|
def testForensicSamples(self):
|
||||||
"""Test sample forensic/ruf/failure DMARC reports"""
|
"""Test sample forensic/ruf/failure DMARC reports"""
|
||||||
@@ -142,12 +139,8 @@ class Test(unittest.TestCase):
|
|||||||
print("Testing {0}: ".format(sample_path), end="")
|
print("Testing {0}: ".format(sample_path), end="")
|
||||||
with open(sample_path) as sample_file:
|
with open(sample_path) as sample_file:
|
||||||
sample_content = sample_file.read()
|
sample_content = sample_file.read()
|
||||||
parsed_report = parsedmarc.parse_report_email(
|
parsed_report = parsedmarc.parse_report_email(sample_content)["report"]
|
||||||
sample_content, offline=OFFLINE_MODE
|
parsed_report = parsedmarc.parse_report_file(sample_path)["report"]
|
||||||
)["report"]
|
|
||||||
parsed_report = parsedmarc.parse_report_file(
|
|
||||||
sample_path, offline=OFFLINE_MODE
|
|
||||||
)["report"]
|
|
||||||
parsedmarc.parsed_forensic_reports_to_csv(parsed_report)
|
parsedmarc.parsed_forensic_reports_to_csv(parsed_report)
|
||||||
print("Passed!")
|
print("Passed!")
|
||||||
|
|
||||||
@@ -159,12 +152,36 @@ class Test(unittest.TestCase):
|
|||||||
if os.path.isdir(sample_path):
|
if os.path.isdir(sample_path):
|
||||||
continue
|
continue
|
||||||
print("Testing {0}: ".format(sample_path), end="")
|
print("Testing {0}: ".format(sample_path), end="")
|
||||||
parsed_report = parsedmarc.parse_report_file(
|
parsed_report = parsedmarc.parse_report_file(sample_path)["report"]
|
||||||
sample_path, offline=OFFLINE_MODE
|
|
||||||
)["report"]
|
|
||||||
parsedmarc.parsed_smtp_tls_reports_to_csv(parsed_report)
|
parsedmarc.parsed_smtp_tls_reports_to_csv(parsed_report)
|
||||||
print("Passed!")
|
print("Passed!")
|
||||||
|
|
||||||
|
def testMSGraphWellKnownFolders(self):
|
||||||
|
"""Test MSGraph well-known folder name mapping"""
|
||||||
|
from parsedmarc.mail.graph import WELL_KNOWN_FOLDER_MAP
|
||||||
|
|
||||||
|
# Test English folder names
|
||||||
|
assert WELL_KNOWN_FOLDER_MAP.get("inbox") == "inbox"
|
||||||
|
assert WELL_KNOWN_FOLDER_MAP.get("sent items") == "sentitems"
|
||||||
|
assert WELL_KNOWN_FOLDER_MAP.get("deleted items") == "deleteditems"
|
||||||
|
assert WELL_KNOWN_FOLDER_MAP.get("archive") == "archive"
|
||||||
|
|
||||||
|
# Test case insensitivity - simulating how the code actually uses it
|
||||||
|
# This is what happens when user config has "reports_folder = Inbox"
|
||||||
|
assert WELL_KNOWN_FOLDER_MAP.get("inbox") == "inbox"
|
||||||
|
assert WELL_KNOWN_FOLDER_MAP.get("Inbox".lower()) == "inbox" # User's exact config
|
||||||
|
assert WELL_KNOWN_FOLDER_MAP.get("INBOX".lower()) == "inbox"
|
||||||
|
assert WELL_KNOWN_FOLDER_MAP.get("Archive".lower()) == "archive"
|
||||||
|
|
||||||
|
# Test German folder names
|
||||||
|
assert WELL_KNOWN_FOLDER_MAP.get("posteingang") == "inbox"
|
||||||
|
assert WELL_KNOWN_FOLDER_MAP.get("Posteingang".lower()) == "inbox" # Capitalized
|
||||||
|
assert WELL_KNOWN_FOLDER_MAP.get("archiv") == "archive"
|
||||||
|
|
||||||
|
# Test that custom folders don't match
|
||||||
|
assert WELL_KNOWN_FOLDER_MAP.get("custom_folder") is None
|
||||||
|
assert WELL_KNOWN_FOLDER_MAP.get("my_reports") is None
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
unittest.main(verbosity=2)
|
unittest.main(verbosity=2)
|
||||||
|
|||||||
Reference in New Issue
Block a user