mirror of
https://github.com/domainaware/parsedmarc.git
synced 2026-02-20 16:26:24 +00:00
Compare commits
35 Commits
copilot/re
...
copilot/cr
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
3dbf21f072 | ||
|
|
2d2e2bc261 | ||
|
|
f830418381 | ||
|
|
4d97bd25aa | ||
|
|
17a612df0c | ||
|
|
221bc332ef | ||
|
|
a2a75f7a81 | ||
|
|
50fcb51577 | ||
|
|
dd9ef90773 | ||
|
|
0e3a4b0f06 | ||
|
|
343b53ef18 | ||
|
|
792079a3e8 | ||
|
|
1f3a1fc843 | ||
|
|
34fa0c145d | ||
|
|
6719a06388 | ||
|
|
eafa435868 | ||
|
|
5d772c3b36 | ||
|
|
72cabbef23 | ||
|
|
3d74cd6ac0 | ||
|
|
d1ac59a016 | ||
|
|
7fdd53008f | ||
|
|
35331d4b84 | ||
|
|
de9edd3590 | ||
|
|
abf4bdba13 | ||
|
|
7b842740f5 | ||
|
|
ebe3ccf40a | ||
|
|
808285658f | ||
|
|
bc1dae29bd | ||
|
|
4b904444e5 | ||
|
|
3608bce344 | ||
|
|
fe809c4c3f | ||
|
|
a76c2f9621 | ||
|
|
bb8f4002bf | ||
|
|
b5773c6b4a | ||
|
|
b99bd67225 |
2
.github/workflows/python-tests.yml
vendored
2
.github/workflows/python-tests.yml
vendored
@@ -30,7 +30,7 @@ jobs:
|
||||
strategy:
|
||||
fail-fast: false
|
||||
matrix:
|
||||
python-version: ["3.9", "3.10", "3.11", "3.12", "3.13"]
|
||||
python-version: ["3.9", "3.10", "3.11", "3.12", "3.13", "3.14"]
|
||||
|
||||
steps:
|
||||
- uses: actions/checkout@v5
|
||||
|
||||
292
.vscode/settings.json
vendored
292
.vscode/settings.json
vendored
@@ -13,148 +13,154 @@
|
||||
"MD024": false
|
||||
},
|
||||
"cSpell.words": [
|
||||
"adkim",
|
||||
"akamaiedge",
|
||||
"amsmath",
|
||||
"andrewmcgilvray",
|
||||
"arcname",
|
||||
"aspf",
|
||||
"autoclass",
|
||||
"automodule",
|
||||
"backported",
|
||||
"bellsouth",
|
||||
"boto",
|
||||
"brakhane",
|
||||
"Brightmail",
|
||||
"CEST",
|
||||
"CHACHA",
|
||||
"checkdmarc",
|
||||
"Codecov",
|
||||
"confnew",
|
||||
"dateparser",
|
||||
"dateutil",
|
||||
"Davmail",
|
||||
"DBIP",
|
||||
"dearmor",
|
||||
"deflist",
|
||||
"devel",
|
||||
"DMARC",
|
||||
"Dmarcian",
|
||||
"dnspython",
|
||||
"dollarmath",
|
||||
"dpkg",
|
||||
"exampleuser",
|
||||
"expiringdict",
|
||||
"fieldlist",
|
||||
"GELF",
|
||||
"genindex",
|
||||
"geoip",
|
||||
"geoipupdate",
|
||||
"Geolite",
|
||||
"geolocation",
|
||||
"githubpages",
|
||||
"Grafana",
|
||||
"hostnames",
|
||||
"htpasswd",
|
||||
"httpasswd",
|
||||
"httplib",
|
||||
"IMAP",
|
||||
"imapclient",
|
||||
"infile",
|
||||
"Interaktive",
|
||||
"IPDB",
|
||||
"journalctl",
|
||||
"keepalive",
|
||||
"keyout",
|
||||
"keyrings",
|
||||
"Leeman",
|
||||
"libemail",
|
||||
"linkify",
|
||||
"LISTSERV",
|
||||
"lxml",
|
||||
"mailparser",
|
||||
"mailrelay",
|
||||
"mailsuite",
|
||||
"maxdepth",
|
||||
"MAXHEADERS",
|
||||
"maxmind",
|
||||
"mbox",
|
||||
"mfrom",
|
||||
"michaeldavie",
|
||||
"mikesiegel",
|
||||
"Mimecast",
|
||||
"mitigations",
|
||||
"MMDB",
|
||||
"modindex",
|
||||
"msgconvert",
|
||||
"msgraph",
|
||||
"MSSP",
|
||||
"multiprocess",
|
||||
"Munge",
|
||||
"ndjson",
|
||||
"newkey",
|
||||
"Nhcm",
|
||||
"nojekyll",
|
||||
"nondigest",
|
||||
"nosecureimap",
|
||||
"nosniff",
|
||||
"nwettbewerb",
|
||||
"opensearch",
|
||||
"opensearchpy",
|
||||
"parsedmarc",
|
||||
"passsword",
|
||||
"Postorius",
|
||||
"premade",
|
||||
"procs",
|
||||
"publicsuffix",
|
||||
"publicsuffixlist",
|
||||
"publixsuffix",
|
||||
"pygelf",
|
||||
"pypy",
|
||||
"pytest",
|
||||
"quickstart",
|
||||
"Reindex",
|
||||
"replyto",
|
||||
"reversename",
|
||||
"Rollup",
|
||||
"Rpdm",
|
||||
"SAMEORIGIN",
|
||||
"sdist",
|
||||
"Servernameone",
|
||||
"setuptools",
|
||||
"smartquotes",
|
||||
"SMTPTLS",
|
||||
"sortlists",
|
||||
"sortmaps",
|
||||
"sourcetype",
|
||||
"STARTTLS",
|
||||
"tasklist",
|
||||
"timespan",
|
||||
"tlsa",
|
||||
"tlsrpt",
|
||||
"toctree",
|
||||
"TQDDM",
|
||||
"tqdm",
|
||||
"truststore",
|
||||
"Übersicht",
|
||||
"uids",
|
||||
"Uncategorized",
|
||||
"unparasable",
|
||||
"uper",
|
||||
"urllib",
|
||||
"Valimail",
|
||||
"venv",
|
||||
"Vhcw",
|
||||
"viewcode",
|
||||
"virtualenv",
|
||||
"WBITS",
|
||||
"webmail",
|
||||
"Wettbewerber",
|
||||
"Whalen",
|
||||
"whitespaces",
|
||||
"xennn",
|
||||
"xmltodict",
|
||||
"xpack",
|
||||
"zscholl"
|
||||
"adkim",
|
||||
"akamaiedge",
|
||||
"amsmath",
|
||||
"andrewmcgilvray",
|
||||
"arcname",
|
||||
"aspf",
|
||||
"autoclass",
|
||||
"automodule",
|
||||
"backported",
|
||||
"bellsouth",
|
||||
"boto",
|
||||
"brakhane",
|
||||
"Brightmail",
|
||||
"CEST",
|
||||
"CHACHA",
|
||||
"checkdmarc",
|
||||
"Codecov",
|
||||
"confnew",
|
||||
"dateparser",
|
||||
"dateutil",
|
||||
"Davmail",
|
||||
"DBIP",
|
||||
"dearmor",
|
||||
"deflist",
|
||||
"devel",
|
||||
"DMARC",
|
||||
"Dmarcian",
|
||||
"dnspython",
|
||||
"dollarmath",
|
||||
"dpkg",
|
||||
"exampleuser",
|
||||
"expiringdict",
|
||||
"fieldlist",
|
||||
"GELF",
|
||||
"genindex",
|
||||
"geoip",
|
||||
"geoipupdate",
|
||||
"Geolite",
|
||||
"geolocation",
|
||||
"githubpages",
|
||||
"Grafana",
|
||||
"hostnames",
|
||||
"htpasswd",
|
||||
"httpasswd",
|
||||
"httplib",
|
||||
"ifhost",
|
||||
"IMAP",
|
||||
"imapclient",
|
||||
"infile",
|
||||
"Interaktive",
|
||||
"IPDB",
|
||||
"journalctl",
|
||||
"kafkaclient",
|
||||
"keepalive",
|
||||
"keyout",
|
||||
"keyrings",
|
||||
"Leeman",
|
||||
"libemail",
|
||||
"linkify",
|
||||
"LISTSERV",
|
||||
"loganalytics",
|
||||
"lxml",
|
||||
"mailparser",
|
||||
"mailrelay",
|
||||
"mailsuite",
|
||||
"maxdepth",
|
||||
"MAXHEADERS",
|
||||
"maxmind",
|
||||
"mbox",
|
||||
"mfrom",
|
||||
"mhdw",
|
||||
"michaeldavie",
|
||||
"mikesiegel",
|
||||
"Mimecast",
|
||||
"mitigations",
|
||||
"MMDB",
|
||||
"modindex",
|
||||
"msgconvert",
|
||||
"msgraph",
|
||||
"MSSP",
|
||||
"multiprocess",
|
||||
"Munge",
|
||||
"ndjson",
|
||||
"newkey",
|
||||
"Nhcm",
|
||||
"nojekyll",
|
||||
"nondigest",
|
||||
"nosecureimap",
|
||||
"nosniff",
|
||||
"nwettbewerb",
|
||||
"opensearch",
|
||||
"opensearchpy",
|
||||
"parsedmarc",
|
||||
"passsword",
|
||||
"pbar",
|
||||
"Postorius",
|
||||
"premade",
|
||||
"privatesuffix",
|
||||
"procs",
|
||||
"publicsuffix",
|
||||
"publicsuffixlist",
|
||||
"publixsuffix",
|
||||
"pygelf",
|
||||
"pypy",
|
||||
"pytest",
|
||||
"quickstart",
|
||||
"Reindex",
|
||||
"replyto",
|
||||
"reversename",
|
||||
"Rollup",
|
||||
"Rpdm",
|
||||
"SAMEORIGIN",
|
||||
"sdist",
|
||||
"Servernameone",
|
||||
"setuptools",
|
||||
"smartquotes",
|
||||
"SMTPTLS",
|
||||
"sortlists",
|
||||
"sortmaps",
|
||||
"sourcetype",
|
||||
"STARTTLS",
|
||||
"tasklist",
|
||||
"timespan",
|
||||
"tlsa",
|
||||
"tlsrpt",
|
||||
"toctree",
|
||||
"TQDDM",
|
||||
"tqdm",
|
||||
"truststore",
|
||||
"Übersicht",
|
||||
"uids",
|
||||
"Uncategorized",
|
||||
"unparasable",
|
||||
"uper",
|
||||
"urllib",
|
||||
"Valimail",
|
||||
"venv",
|
||||
"Vhcw",
|
||||
"viewcode",
|
||||
"virtualenv",
|
||||
"WBITS",
|
||||
"webmail",
|
||||
"Wettbewerber",
|
||||
"Whalen",
|
||||
"whitespaces",
|
||||
"xennn",
|
||||
"xmltodict",
|
||||
"xpack",
|
||||
"zscholl"
|
||||
],
|
||||
}
|
||||
41
CHANGELOG.md
41
CHANGELOG.md
@@ -1,5 +1,46 @@
|
||||
# Changelog
|
||||
|
||||
## 9.0.10
|
||||
|
||||
- Support Python 3.14+
|
||||
|
||||
## 9.0.9
|
||||
|
||||
### Fixes
|
||||
|
||||
- Validate that a string is base64-encoded before trying to base64 decode it. (PRs #648 and #649)
|
||||
|
||||
## 9.0.8
|
||||
|
||||
### Fixes
|
||||
|
||||
- Fix logging configuration not propagating to child parser processes (#646).
|
||||
- Update `mailsuite` dependency to `?=1.11.1` to solve issues with iCloud IMAP (#493).
|
||||
|
||||
## 9.0.7
|
||||
|
||||
## Fixes
|
||||
|
||||
- Fix IMAP `since` option (#PR 645 closes issues #581 and #643).
|
||||
|
||||
## 9.0.6
|
||||
|
||||
### Fixes
|
||||
|
||||
- Fix #638.
|
||||
- Fix/clarify report extraction and parsing behavior for multiple input types (bytes, base64 strings, and file-like objects).
|
||||
- Fix type mismatches that could cause runtime issues in SMTP emailing and CLI option handling.
|
||||
|
||||
### Improvements
|
||||
|
||||
- Improve type hints across the library (Pylance/Pyright friendliness) and reduce false-positive linter errors.
|
||||
- Emails in Microsoft 365 are now marked read as they are read. This provides constancy with other mailbox types, and gives you a indication of when emails are being read as they are processed in batches. (Close #625)
|
||||
|
||||
### Compatibility / Dependencies
|
||||
|
||||
- Set Python requirement to `>=3.9,<3.14`.
|
||||
- Bump `mailsuite` requirement to `>=1.11.0`.
|
||||
|
||||
## 9.0.5
|
||||
|
||||
## Fixes
|
||||
|
||||
@@ -61,4 +61,4 @@ for RHEL or Debian.
|
||||
| 3.11 | ✅ | Actively maintained; supported until June 2028 (Debian 12) |
|
||||
| 3.12 | ✅ | Actively maintained; supported until May 2035 (RHEL 10) |
|
||||
| 3.13 | ✅ | Actively maintained; supported until June 2030 (Debian 13) |
|
||||
| 3.14 | ❌ | Not currently supported due to Not currently supported due to [this imapclient bug](https://github.com/mjs/imapclient/issues/618)|
|
||||
| 3.14 | ✅ | Actively maintained |
|
||||
|
||||
1
ci.ini
1
ci.ini
@@ -3,6 +3,7 @@ save_aggregate = True
|
||||
save_forensic = True
|
||||
save_smtp_tls = True
|
||||
debug = True
|
||||
offline = True
|
||||
|
||||
[elasticsearch]
|
||||
hosts = http://localhost:9200
|
||||
|
||||
@@ -28,6 +28,13 @@
|
||||
:members:
|
||||
```
|
||||
|
||||
## parsedmarc.types
|
||||
|
||||
```{eval-rst}
|
||||
.. automodule:: parsedmarc.types
|
||||
:members:
|
||||
```
|
||||
|
||||
## parsedmarc.utils
|
||||
|
||||
```{eval-rst}
|
||||
|
||||
@@ -61,7 +61,7 @@ for RHEL or Debian.
|
||||
| 3.11 | ✅ | Actively maintained; supported until June 2028 (Debian 12) |
|
||||
| 3.12 | ✅ | Actively maintained; supported until May 2035 (RHEL 10) |
|
||||
| 3.13 | ✅ | Actively maintained; supported until June 2030 (Debian 13) |
|
||||
| 3.14 | ❌ | Not currently supported due to [this imapclient bug](https://github.com/mjs/imapclient/issues/618)|
|
||||
| 3.14 | ✅ | Actively maintained |
|
||||
|
||||
```{toctree}
|
||||
:caption: 'Contents'
|
||||
|
||||
@@ -171,8 +171,8 @@ The full set of configuration options are:
|
||||
- `check_timeout` - int: Number of seconds to wait for a IMAP
|
||||
IDLE response or the number of seconds until the next
|
||||
mail check (Default: `30`)
|
||||
- `since` - str: Search for messages since certain time. (Examples: `5m|3h|2d|1w`)
|
||||
Acceptable units - {"m":"minutes", "h":"hours", "d":"days", "w":"weeks"}.
|
||||
- `since` - str: Search for messages since certain time. (Examples: `5m|3h|2d|1w`)
|
||||
Acceptable units - {"m":"minutes", "h":"hours", "d":"days", "w":"weeks"}.
|
||||
Defaults to `1d` if incorrect value is provided.
|
||||
- `imap`
|
||||
- `host` - str: The IMAP server hostname or IP address
|
||||
@@ -240,7 +240,7 @@ The full set of configuration options are:
|
||||
group and use that as the group id.
|
||||
|
||||
```powershell
|
||||
New-ApplicationAccessPolicy -AccessRight RestrictAccess
|
||||
New-ApplicationAccessPolicy -AccessRight RestrictAccess
|
||||
-AppId "<CLIENT_ID>" -PolicyScopeGroupId "<MAILBOX>"
|
||||
-Description "Restrict access to dmarc reports mailbox."
|
||||
```
|
||||
@@ -336,13 +336,65 @@ The full set of configuration options are:
|
||||
- `secret_access_key` - str: The secret access key (Optional)
|
||||
- `syslog`
|
||||
- `server` - str: The Syslog server name or IP address
|
||||
- `port` - int: The UDP port to use (Default: `514`)
|
||||
- `port` - int: The port to use (Default: `514`)
|
||||
- `protocol` - str: The protocol to use: `udp`, `tcp`, or `tls` (Default: `udp`)
|
||||
- `cafile_path` - str: Path to CA certificate file for TLS server verification (Optional)
|
||||
- `certfile_path` - str: Path to client certificate file for TLS authentication (Optional)
|
||||
- `keyfile_path` - str: Path to client private key file for TLS authentication (Optional)
|
||||
- `timeout` - float: Connection timeout in seconds for TCP/TLS (Default: `5.0`)
|
||||
- `retry_attempts` - int: Number of retry attempts for failed connections (Default: `3`)
|
||||
- `retry_delay` - int: Delay in seconds between retry attempts (Default: `5`)
|
||||
|
||||
**Example UDP configuration (default):**
|
||||
|
||||
```ini
|
||||
[syslog]
|
||||
server = syslog.example.com
|
||||
port = 514
|
||||
```
|
||||
|
||||
**Example TCP configuration:**
|
||||
|
||||
```ini
|
||||
[syslog]
|
||||
server = syslog.example.com
|
||||
port = 6514
|
||||
protocol = tcp
|
||||
timeout = 10.0
|
||||
retry_attempts = 5
|
||||
```
|
||||
|
||||
**Example TLS configuration with server verification:**
|
||||
|
||||
```ini
|
||||
[syslog]
|
||||
server = syslog.example.com
|
||||
port = 6514
|
||||
protocol = tls
|
||||
cafile_path = /path/to/ca-cert.pem
|
||||
timeout = 10.0
|
||||
```
|
||||
|
||||
**Example TLS configuration with mutual authentication:**
|
||||
|
||||
```ini
|
||||
[syslog]
|
||||
server = syslog.example.com
|
||||
port = 6514
|
||||
protocol = tls
|
||||
cafile_path = /path/to/ca-cert.pem
|
||||
certfile_path = /path/to/client-cert.pem
|
||||
keyfile_path = /path/to/client-key.pem
|
||||
timeout = 10.0
|
||||
retry_attempts = 3
|
||||
retry_delay = 5
|
||||
```
|
||||
- `gmail_api`
|
||||
- `credentials_file` - str: Path to file containing the
|
||||
credentials, None to disable (Default: `None`)
|
||||
- `token_file` - str: Path to save the token file
|
||||
(Default: `.token`)
|
||||
|
||||
|
||||
:::{note}
|
||||
credentials_file and token_file can be got with [quickstart](https://developers.google.com/gmail/api/quickstart/python).Please change the scope to `https://www.googleapis.com/auth/gmail.modify`.
|
||||
:::
|
||||
@@ -442,7 +494,7 @@ Update the limit to 2k per example:
|
||||
PUT _cluster/settings
|
||||
{
|
||||
"persistent" : {
|
||||
"cluster.max_shards_per_node" : 2000
|
||||
"cluster.max_shards_per_node" : 2000
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
132
google_secops_parser/README.md
Normal file
132
google_secops_parser/README.md
Normal file
@@ -0,0 +1,132 @@
|
||||
# Google SecOps Parser for parsedmarc
|
||||
|
||||
A [Google Security Operations (Chronicle)](https://cloud.google.com/security/products/security-operations) custom parser for ingesting [parsedmarc](https://domainaware.github.io/parsedmarc/) syslog events into the Unified Data Model (UDM).
|
||||
|
||||
## Overview
|
||||
|
||||
parsedmarc sends DMARC aggregate reports, forensic reports, and SMTP TLS reports as JSON-formatted syslog messages. This parser transforms those JSON events into Google SecOps UDM events for threat detection and investigation.
|
||||
|
||||
### Supported Report Types
|
||||
|
||||
| Report Type | UDM Event Type | Description |
|
||||
|---|---|---|
|
||||
| DMARC Aggregate | `EMAIL_TRANSACTION` | Aggregate DMARC authentication results from reporting organizations |
|
||||
| DMARC Forensic | `EMAIL_TRANSACTION` | Individual email authentication failure reports |
|
||||
| SMTP TLS | `GENERIC_EVENT` | SMTP TLS session success/failure reports (RFC 8460) |
|
||||
|
||||
## UDM Field Mappings
|
||||
|
||||
### DMARC Aggregate Reports
|
||||
|
||||
| parsedmarc Field | UDM Field | Notes |
|
||||
|---|---|---|
|
||||
| `source_ip_address` | `principal.ip` | IP address of the email source |
|
||||
| `source_reverse_dns` | `principal.hostname` | Reverse DNS of source |
|
||||
| `source_country` | `principal.location.country_or_region` | GeoIP country of source |
|
||||
| `header_from` | `network.email.from` | From header domain |
|
||||
| `envelope_from` | `network.email.mail_from` | Envelope sender |
|
||||
| `envelope_to` | `network.email.to` | Envelope recipient |
|
||||
| `domain` | `target.hostname` | Domain the report is about |
|
||||
| `report_id` | `metadata.product_log_id` | Report identifier |
|
||||
| `disposition` | `security_result.action` | `none`→`ALLOW`, `quarantine`→`QUARANTINE`, `reject`→`BLOCK` |
|
||||
| `dmarc_aligned` | `additional.fields` | Whether DMARC passed |
|
||||
| `spf_aligned` | `additional.fields` | Whether SPF was aligned |
|
||||
| `dkim_aligned` | `additional.fields` | Whether DKIM was aligned |
|
||||
| `org_name` | `additional.fields` | Reporting organization name |
|
||||
| `count` | `additional.fields` | Number of messages |
|
||||
| `p`, `sp`, `pct` | `additional.fields` | DMARC policy settings |
|
||||
| `dkim_domains`, `dkim_results` | `additional.fields` | DKIM authentication details |
|
||||
| `spf_domains`, `spf_results` | `additional.fields` | SPF authentication details |
|
||||
|
||||
### DMARC Forensic Reports
|
||||
|
||||
| parsedmarc Field | UDM Field | Notes |
|
||||
|---|---|---|
|
||||
| `source_ip_address` | `principal.ip` | IP address of the email source |
|
||||
| `source_reverse_dns` | `principal.hostname` | Reverse DNS of source |
|
||||
| `source_country` | `principal.location.country_or_region` | GeoIP country of source |
|
||||
| `original_mail_from` | `network.email.from` | Original sender |
|
||||
| `original_rcpt_to` | `network.email.to` | Original recipient |
|
||||
| `subject` | `network.email.subject` | Email subject |
|
||||
| `reported_domain` | `target.hostname` | Reported domain |
|
||||
| `message_id` | `metadata.product_log_id` | Email message ID |
|
||||
| `arrival_date_utc` | `metadata.event_timestamp` | Arrival timestamp (UTC) |
|
||||
| `auth_failure` | `security_result.description` | Type of authentication failure |
|
||||
| `feedback_type` | `additional.fields` | Feedback report type |
|
||||
| `authentication_results` | `additional.fields` | Full authentication results string |
|
||||
| `delivery_result` | `additional.fields` | Email delivery outcome |
|
||||
|
||||
### SMTP TLS Reports
|
||||
|
||||
| parsedmarc Field | UDM Field | Notes |
|
||||
|---|---|---|
|
||||
| `sending_mta_ip` | `principal.ip` | Sending MTA IP address |
|
||||
| `receiving_ip` | `target.ip` | Receiving MTA IP address |
|
||||
| `receiving_mx_hostname` | `target.hostname` | Receiving MX hostname |
|
||||
| `report_id` | `metadata.product_log_id` | Report identifier |
|
||||
| `organization_name` | `additional.fields` | Reporting organization |
|
||||
| `policy_domain` | `additional.fields` | Policy domain |
|
||||
| `policy_type` | `additional.fields` | TLS policy type |
|
||||
| `successful_session_count` | `additional.fields` | Count of successful TLS sessions |
|
||||
| `failed_session_count` | `additional.fields` | Count of failed TLS sessions |
|
||||
| `result_type` | `additional.fields` | Failure result type |
|
||||
| `failure_reason_code` | `additional.fields` | Failure reason code |
|
||||
|
||||
## Installation
|
||||
|
||||
### Prerequisites
|
||||
|
||||
- A Google Security Operations (Chronicle) tenant
|
||||
- parsedmarc configured to send syslog output (see [parsedmarc documentation](https://domainaware.github.io/parsedmarc/))
|
||||
|
||||
### Steps
|
||||
|
||||
1. **Configure parsedmarc syslog output** in your `parsedmarc.ini`:
|
||||
|
||||
```ini
|
||||
[syslog]
|
||||
server = your-chronicle-forwarder.example.com
|
||||
port = 514
|
||||
```
|
||||
|
||||
2. **Create the log source** in Google SecOps:
|
||||
- Navigate to **Settings** → **Feeds** → **Add New**
|
||||
- Select **Syslog** as the source type
|
||||
- Configure to listen for parsedmarc syslog messages
|
||||
|
||||
3. **Upload the custom parser**:
|
||||
- Navigate to **Settings** → **Parsers**
|
||||
- Click **Create Custom Parser**
|
||||
- Set the **Log Type** to match your feed configuration
|
||||
- Paste the contents of `parsedmarc.conf`
|
||||
- Click **Submit**
|
||||
|
||||
4. **Validate** the parser using the Chronicle parser validation tool with sample parsedmarc JSON events.
|
||||
|
||||
## Sample Log Events
|
||||
|
||||
### Aggregate Report
|
||||
|
||||
```json
|
||||
{"xml_schema": "1.0", "org_name": "Example Inc", "org_email": "noreply@example.net", "report_id": "abc123", "begin_date": "2024-01-01 00:00:00", "end_date": "2024-01-01 23:59:59", "domain": "example.com", "adkim": "r", "aspf": "r", "p": "reject", "sp": "reject", "pct": "100", "fo": "0", "source_ip_address": "203.0.113.1", "source_country": "United States", "source_reverse_dns": "mail.example.org", "source_base_domain": "example.org", "count": 42, "spf_aligned": true, "dkim_aligned": true, "dmarc_aligned": true, "disposition": "none", "header_from": "example.com", "envelope_from": "example.com", "envelope_to": null, "dkim_domains": "example.com", "dkim_selectors": "selector1", "dkim_results": "pass", "spf_domains": "example.com", "spf_scopes": "mfrom", "spf_results": "pass"}
|
||||
```
|
||||
|
||||
### Forensic Report
|
||||
|
||||
```json
|
||||
{"feedback_type": "auth-failure", "user_agent": "Lua/1.0", "version": "1.0", "original_mail_from": "sender@example.com", "original_rcpt_to": "recipient@example.org", "arrival_date": "Mon, 01 Jan 2024 12:00:00 +0000", "arrival_date_utc": "2024-01-01 12:00:00", "source_ip_address": "198.51.100.1", "source_country": "Germany", "source_reverse_dns": "mail.example.com", "source_base_domain": "example.com", "subject": "Test Email", "message_id": "<abc@example.com>", "authentication_results": "dmarc=fail (p=reject; dis=reject) header.from=example.com", "dkim_domain": "example.com", "delivery_result": "reject", "auth_failure": "dmarc", "reported_domain": "example.com", "authentication_mechanisms": "dmarc"}
|
||||
```
|
||||
|
||||
### SMTP TLS Report
|
||||
|
||||
```json
|
||||
{"organization_name": "Example Inc", "begin_date": "2024-01-01 00:00:00", "end_date": "2024-01-01 23:59:59", "report_id": "tls-123", "policy_domain": "example.com", "policy_type": "sts", "policy_strings": "version: STSv1; mode: enforce", "mx_host_patterns": "*.mail.example.com", "successful_session_count": 1000, "failed_session_count": 5, "result_type": "certificate-expired", "sending_mta_ip": "203.0.113.10", "receiving_ip": "198.51.100.20", "receiving_mx_hostname": "mx.example.com", "receiving_mx_helo": "mx.example.com", "failure_reason_code": "X509_V_ERR_CERT_HAS_EXPIRED"}
|
||||
```
|
||||
|
||||
## UDM Reference
|
||||
|
||||
For the complete list of UDM fields, see the [Google SecOps UDM field list](https://cloud.google.com/chronicle/docs/reference/udm-field-list).
|
||||
|
||||
## License
|
||||
|
||||
This parser is part of the [parsedmarc](https://github.com/domainaware/parsedmarc) project and is distributed under the same license.
|
||||
1052
google_secops_parser/parsedmarc.conf
Normal file
1052
google_secops_parser/parsedmarc.conf
Normal file
File diff suppressed because it is too large
Load Diff
File diff suppressed because one or more lines are too long
File diff suppressed because it is too large
Load Diff
@@ -3,53 +3,55 @@
|
||||
|
||||
"""A CLI for parsing DMARC reports"""
|
||||
|
||||
from argparse import Namespace, ArgumentParser
|
||||
import http.client
|
||||
import json
|
||||
import logging
|
||||
import os
|
||||
import sys
|
||||
from argparse import ArgumentParser, Namespace
|
||||
from configparser import ConfigParser
|
||||
from glob import glob
|
||||
import logging
|
||||
import math
|
||||
import yaml
|
||||
import json
|
||||
from ssl import CERT_NONE, create_default_context
|
||||
from multiprocessing import Pipe, Process
|
||||
import sys
|
||||
import http.client
|
||||
from ssl import CERT_NONE, create_default_context
|
||||
|
||||
import yaml
|
||||
from tqdm import tqdm
|
||||
|
||||
from parsedmarc import (
|
||||
get_dmarc_reports_from_mailbox,
|
||||
watch_inbox,
|
||||
parse_report_file,
|
||||
get_dmarc_reports_from_mbox,
|
||||
elastic,
|
||||
opensearch,
|
||||
kafkaclient,
|
||||
splunk,
|
||||
save_output,
|
||||
email_results,
|
||||
SEEN_AGGREGATE_REPORT_IDS,
|
||||
InvalidDMARCReport,
|
||||
ParserError,
|
||||
__version__,
|
||||
InvalidDMARCReport,
|
||||
s3,
|
||||
syslog,
|
||||
loganalytics,
|
||||
elastic,
|
||||
email_results,
|
||||
gelf,
|
||||
get_dmarc_reports_from_mailbox,
|
||||
get_dmarc_reports_from_mbox,
|
||||
kafkaclient,
|
||||
loganalytics,
|
||||
opensearch,
|
||||
parse_report_file,
|
||||
s3,
|
||||
save_output,
|
||||
splunk,
|
||||
syslog,
|
||||
watch_inbox,
|
||||
webhook,
|
||||
)
|
||||
from parsedmarc.log import logger
|
||||
from parsedmarc.mail import (
|
||||
IMAPConnection,
|
||||
MSGraphConnection,
|
||||
GmailConnection,
|
||||
IMAPConnection,
|
||||
MaildirConnection,
|
||||
MSGraphConnection,
|
||||
)
|
||||
from parsedmarc.mail.graph import AuthMethod
|
||||
from parsedmarc.types import ParsingResults
|
||||
from parsedmarc.utils import get_base_domain, get_reverse_dns, is_mbox
|
||||
|
||||
from parsedmarc.log import logger
|
||||
from parsedmarc.utils import is_mbox, get_reverse_dns, get_base_domain
|
||||
from parsedmarc import SEEN_AGGREGATE_REPORT_IDS
|
||||
|
||||
http.client._MAXHEADERS = 200 # pylint:disable=protected-access
|
||||
# Increase the max header limit for very large emails. `_MAXHEADERS` is a
|
||||
# private stdlib attribute and may not exist in type stubs.
|
||||
setattr(http.client, "_MAXHEADERS", 200)
|
||||
|
||||
formatter = logging.Formatter(
|
||||
fmt="%(levelname)8s:%(filename)s:%(lineno)d:%(message)s",
|
||||
@@ -66,6 +68,48 @@ def _str_to_list(s):
|
||||
return list(map(lambda i: i.lstrip(), _list))
|
||||
|
||||
|
||||
def _configure_logging(log_level, log_file=None):
|
||||
"""
|
||||
Configure logging for the current process.
|
||||
This is needed for child processes to properly log messages.
|
||||
|
||||
Args:
|
||||
log_level: The logging level (e.g., logging.DEBUG, logging.WARNING)
|
||||
log_file: Optional path to log file
|
||||
"""
|
||||
# Get the logger
|
||||
from parsedmarc.log import logger
|
||||
|
||||
# Set the log level
|
||||
logger.setLevel(log_level)
|
||||
|
||||
# Add StreamHandler with formatter if not already present
|
||||
# Check if we already have a StreamHandler to avoid duplicates
|
||||
# Use exact type check to distinguish from FileHandler subclass
|
||||
has_stream_handler = any(type(h) is logging.StreamHandler for h in logger.handlers)
|
||||
|
||||
if not has_stream_handler:
|
||||
formatter = logging.Formatter(
|
||||
fmt="%(levelname)8s:%(filename)s:%(lineno)d:%(message)s",
|
||||
datefmt="%Y-%m-%d:%H:%M:%S",
|
||||
)
|
||||
handler = logging.StreamHandler()
|
||||
handler.setFormatter(formatter)
|
||||
logger.addHandler(handler)
|
||||
|
||||
# Add FileHandler if log_file is specified
|
||||
if log_file:
|
||||
try:
|
||||
fh = logging.FileHandler(log_file, "a")
|
||||
formatter = logging.Formatter(
|
||||
"%(asctime)s - %(levelname)s - [%(filename)s:%(lineno)d] - %(message)s"
|
||||
)
|
||||
fh.setFormatter(formatter)
|
||||
logger.addHandler(fh)
|
||||
except (IOError, OSError, PermissionError) as error:
|
||||
logger.warning("Unable to write to log file: {}".format(error))
|
||||
|
||||
|
||||
def cli_parse(
|
||||
file_path,
|
||||
sa,
|
||||
@@ -78,8 +122,29 @@ def cli_parse(
|
||||
reverse_dns_map_url,
|
||||
normalize_timespan_threshold_hours,
|
||||
conn,
|
||||
log_level=logging.ERROR,
|
||||
log_file=None,
|
||||
):
|
||||
"""Separated this function for multiprocessing"""
|
||||
"""Separated this function for multiprocessing
|
||||
|
||||
Args:
|
||||
file_path: Path to the report file
|
||||
sa: Strip attachment payloads flag
|
||||
nameservers: List of nameservers
|
||||
dns_timeout: DNS timeout
|
||||
ip_db_path: Path to IP database
|
||||
offline: Offline mode flag
|
||||
always_use_local_files: Always use local files flag
|
||||
reverse_dns_map_path: Path to reverse DNS map
|
||||
reverse_dns_map_url: URL to reverse DNS map
|
||||
normalize_timespan_threshold_hours: Timespan threshold
|
||||
conn: Pipe connection for IPC
|
||||
log_level: Logging level for this process
|
||||
log_file: Optional path to log file
|
||||
"""
|
||||
# Configure logging in this child process
|
||||
_configure_logging(log_level, log_file)
|
||||
|
||||
try:
|
||||
file_results = parse_report_file(
|
||||
file_path,
|
||||
@@ -104,6 +169,7 @@ def _main():
|
||||
"""Called when the module is executed"""
|
||||
|
||||
def get_index_prefix(report):
|
||||
domain = None
|
||||
if index_prefix_domain_map is None:
|
||||
return None
|
||||
if "policy_published" in report:
|
||||
@@ -137,7 +203,7 @@ def _main():
|
||||
print(output_str)
|
||||
if opts.output:
|
||||
save_output(
|
||||
results,
|
||||
reports_,
|
||||
output_directory=opts.output,
|
||||
aggregate_json_filename=opts.aggregate_json_filename,
|
||||
forensic_json_filename=opts.forensic_json_filename,
|
||||
@@ -631,6 +697,13 @@ def _main():
|
||||
s3_secret_access_key=None,
|
||||
syslog_server=None,
|
||||
syslog_port=None,
|
||||
syslog_protocol=None,
|
||||
syslog_cafile_path=None,
|
||||
syslog_certfile_path=None,
|
||||
syslog_keyfile_path=None,
|
||||
syslog_timeout=None,
|
||||
syslog_retry_attempts=None,
|
||||
syslog_retry_delay=None,
|
||||
gmail_api_credentials_file=None,
|
||||
gmail_api_token_file=None,
|
||||
gmail_api_include_spam_trash=False,
|
||||
@@ -676,7 +749,7 @@ def _main():
|
||||
if "general" in config.sections():
|
||||
general_config = config["general"]
|
||||
if "silent" in general_config:
|
||||
opts.silent = general_config.getboolean("silent")
|
||||
opts.silent = bool(general_config.getboolean("silent"))
|
||||
if "normalize_timespan_threshold_hours" in general_config:
|
||||
opts.normalize_timespan_threshold_hours = general_config.getfloat(
|
||||
"normalize_timespan_threshold_hours"
|
||||
@@ -685,10 +758,10 @@ def _main():
|
||||
with open(general_config["index_prefix_domain_map"]) as f:
|
||||
index_prefix_domain_map = yaml.safe_load(f)
|
||||
if "offline" in general_config:
|
||||
opts.offline = general_config.getboolean("offline")
|
||||
opts.offline = bool(general_config.getboolean("offline"))
|
||||
if "strip_attachment_payloads" in general_config:
|
||||
opts.strip_attachment_payloads = general_config.getboolean(
|
||||
"strip_attachment_payloads"
|
||||
opts.strip_attachment_payloads = bool(
|
||||
general_config.getboolean("strip_attachment_payloads")
|
||||
)
|
||||
if "output" in general_config:
|
||||
opts.output = general_config["output"]
|
||||
@@ -706,6 +779,8 @@ def _main():
|
||||
opts.smtp_tls_csv_filename = general_config["smtp_tls_csv_filename"]
|
||||
if "dns_timeout" in general_config:
|
||||
opts.dns_timeout = general_config.getfloat("dns_timeout")
|
||||
if opts.dns_timeout is None:
|
||||
opts.dns_timeout = 2
|
||||
if "dns_test_address" in general_config:
|
||||
opts.dns_test_address = general_config["dns_test_address"]
|
||||
if "nameservers" in general_config:
|
||||
@@ -728,19 +803,19 @@ def _main():
|
||||
)
|
||||
exit(-1)
|
||||
if "save_aggregate" in general_config:
|
||||
opts.save_aggregate = general_config.getboolean("save_aggregate")
|
||||
opts.save_aggregate = bool(general_config.getboolean("save_aggregate"))
|
||||
if "save_forensic" in general_config:
|
||||
opts.save_forensic = general_config.getboolean("save_forensic")
|
||||
opts.save_forensic = bool(general_config.getboolean("save_forensic"))
|
||||
if "save_smtp_tls" in general_config:
|
||||
opts.save_smtp_tls = general_config.getboolean("save_smtp_tls")
|
||||
opts.save_smtp_tls = bool(general_config.getboolean("save_smtp_tls"))
|
||||
if "debug" in general_config:
|
||||
opts.debug = general_config.getboolean("debug")
|
||||
opts.debug = bool(general_config.getboolean("debug"))
|
||||
if "verbose" in general_config:
|
||||
opts.verbose = general_config.getboolean("verbose")
|
||||
opts.verbose = bool(general_config.getboolean("verbose"))
|
||||
if "silent" in general_config:
|
||||
opts.silent = general_config.getboolean("silent")
|
||||
opts.silent = bool(general_config.getboolean("silent"))
|
||||
if "warnings" in general_config:
|
||||
opts.warnings = general_config.getboolean("warnings")
|
||||
opts.warnings = bool(general_config.getboolean("warnings"))
|
||||
if "log_file" in general_config:
|
||||
opts.log_file = general_config["log_file"]
|
||||
if "n_procs" in general_config:
|
||||
@@ -750,15 +825,15 @@ def _main():
|
||||
else:
|
||||
opts.ip_db_path = None
|
||||
if "always_use_local_files" in general_config:
|
||||
opts.always_use_local_files = general_config.getboolean(
|
||||
"always_use_local_files"
|
||||
opts.always_use_local_files = bool(
|
||||
general_config.getboolean("always_use_local_files")
|
||||
)
|
||||
if "reverse_dns_map_path" in general_config:
|
||||
opts.reverse_dns_map_path = general_config["reverse_dns_path"]
|
||||
if "reverse_dns_map_url" in general_config:
|
||||
opts.reverse_dns_map_url = general_config["reverse_dns_url"]
|
||||
if "prettify_json" in general_config:
|
||||
opts.prettify_json = general_config.getboolean("prettify_json")
|
||||
opts.prettify_json = bool(general_config.getboolean("prettify_json"))
|
||||
|
||||
if "mailbox" in config.sections():
|
||||
mailbox_config = config["mailbox"]
|
||||
@@ -769,11 +844,11 @@ def _main():
|
||||
if "archive_folder" in mailbox_config:
|
||||
opts.mailbox_archive_folder = mailbox_config["archive_folder"]
|
||||
if "watch" in mailbox_config:
|
||||
opts.mailbox_watch = mailbox_config.getboolean("watch")
|
||||
opts.mailbox_watch = bool(mailbox_config.getboolean("watch"))
|
||||
if "delete" in mailbox_config:
|
||||
opts.mailbox_delete = mailbox_config.getboolean("delete")
|
||||
opts.mailbox_delete = bool(mailbox_config.getboolean("delete"))
|
||||
if "test" in mailbox_config:
|
||||
opts.mailbox_test = mailbox_config.getboolean("test")
|
||||
opts.mailbox_test = bool(mailbox_config.getboolean("test"))
|
||||
if "batch_size" in mailbox_config:
|
||||
opts.mailbox_batch_size = mailbox_config.getint("batch_size")
|
||||
if "check_timeout" in mailbox_config:
|
||||
@@ -797,14 +872,14 @@ def _main():
|
||||
if "port" in imap_config:
|
||||
opts.imap_port = imap_config.getint("port")
|
||||
if "timeout" in imap_config:
|
||||
opts.imap_timeout = imap_config.getfloat("timeout")
|
||||
opts.imap_timeout = imap_config.getint("timeout")
|
||||
if "max_retries" in imap_config:
|
||||
opts.imap_max_retries = imap_config.getint("max_retries")
|
||||
if "ssl" in imap_config:
|
||||
opts.imap_ssl = imap_config.getboolean("ssl")
|
||||
opts.imap_ssl = bool(imap_config.getboolean("ssl"))
|
||||
if "skip_certificate_verification" in imap_config:
|
||||
opts.imap_skip_certificate_verification = imap_config.getboolean(
|
||||
"skip_certificate_verification"
|
||||
opts.imap_skip_certificate_verification = bool(
|
||||
imap_config.getboolean("skip_certificate_verification")
|
||||
)
|
||||
if "user" in imap_config:
|
||||
opts.imap_user = imap_config["user"]
|
||||
@@ -833,7 +908,7 @@ def _main():
|
||||
"section instead."
|
||||
)
|
||||
if "watch" in imap_config:
|
||||
opts.mailbox_watch = imap_config.getboolean("watch")
|
||||
opts.mailbox_watch = bool(imap_config.getboolean("watch"))
|
||||
logger.warning(
|
||||
"Use of the watch option in the imap "
|
||||
"configuration section has been deprecated. "
|
||||
@@ -848,7 +923,7 @@ def _main():
|
||||
"section instead."
|
||||
)
|
||||
if "test" in imap_config:
|
||||
opts.mailbox_test = imap_config.getboolean("test")
|
||||
opts.mailbox_test = bool(imap_config.getboolean("test"))
|
||||
logger.warning(
|
||||
"Use of the test option in the imap "
|
||||
"configuration section has been deprecated. "
|
||||
@@ -942,8 +1017,8 @@ def _main():
|
||||
opts.graph_url = graph_config["graph_url"]
|
||||
|
||||
if "allow_unencrypted_storage" in graph_config:
|
||||
opts.graph_allow_unencrypted_storage = graph_config.getboolean(
|
||||
"allow_unencrypted_storage"
|
||||
opts.graph_allow_unencrypted_storage = bool(
|
||||
graph_config.getboolean("allow_unencrypted_storage")
|
||||
)
|
||||
|
||||
if "elasticsearch" in config:
|
||||
@@ -971,10 +1046,10 @@ def _main():
|
||||
if "index_prefix" in elasticsearch_config:
|
||||
opts.elasticsearch_index_prefix = elasticsearch_config["index_prefix"]
|
||||
if "monthly_indexes" in elasticsearch_config:
|
||||
monthly = elasticsearch_config.getboolean("monthly_indexes")
|
||||
monthly = bool(elasticsearch_config.getboolean("monthly_indexes"))
|
||||
opts.elasticsearch_monthly_indexes = monthly
|
||||
if "ssl" in elasticsearch_config:
|
||||
opts.elasticsearch_ssl = elasticsearch_config.getboolean("ssl")
|
||||
opts.elasticsearch_ssl = bool(elasticsearch_config.getboolean("ssl"))
|
||||
if "cert_path" in elasticsearch_config:
|
||||
opts.elasticsearch_ssl_cert_path = elasticsearch_config["cert_path"]
|
||||
if "user" in elasticsearch_config:
|
||||
@@ -1011,10 +1086,10 @@ def _main():
|
||||
if "index_prefix" in opensearch_config:
|
||||
opts.opensearch_index_prefix = opensearch_config["index_prefix"]
|
||||
if "monthly_indexes" in opensearch_config:
|
||||
monthly = opensearch_config.getboolean("monthly_indexes")
|
||||
monthly = bool(opensearch_config.getboolean("monthly_indexes"))
|
||||
opts.opensearch_monthly_indexes = monthly
|
||||
if "ssl" in opensearch_config:
|
||||
opts.opensearch_ssl = opensearch_config.getboolean("ssl")
|
||||
opts.opensearch_ssl = bool(opensearch_config.getboolean("ssl"))
|
||||
if "cert_path" in opensearch_config:
|
||||
opts.opensearch_ssl_cert_path = opensearch_config["cert_path"]
|
||||
if "user" in opensearch_config:
|
||||
@@ -1068,9 +1143,11 @@ def _main():
|
||||
if "password" in kafka_config:
|
||||
opts.kafka_password = kafka_config["password"]
|
||||
if "ssl" in kafka_config:
|
||||
opts.kafka_ssl = kafka_config.getboolean("ssl")
|
||||
opts.kafka_ssl = bool(kafka_config.getboolean("ssl"))
|
||||
if "skip_certificate_verification" in kafka_config:
|
||||
kafka_verify = kafka_config.getboolean("skip_certificate_verification")
|
||||
kafka_verify = bool(
|
||||
kafka_config.getboolean("skip_certificate_verification")
|
||||
)
|
||||
opts.kafka_skip_certificate_verification = kafka_verify
|
||||
if "aggregate_topic" in kafka_config:
|
||||
opts.kafka_aggregate_topic = kafka_config["aggregate_topic"]
|
||||
@@ -1102,9 +1179,11 @@ def _main():
|
||||
if "port" in smtp_config:
|
||||
opts.smtp_port = smtp_config.getint("port")
|
||||
if "ssl" in smtp_config:
|
||||
opts.smtp_ssl = smtp_config.getboolean("ssl")
|
||||
opts.smtp_ssl = bool(smtp_config.getboolean("ssl"))
|
||||
if "skip_certificate_verification" in smtp_config:
|
||||
smtp_verify = smtp_config.getboolean("skip_certificate_verification")
|
||||
smtp_verify = bool(
|
||||
smtp_config.getboolean("skip_certificate_verification")
|
||||
)
|
||||
opts.smtp_skip_certificate_verification = smtp_verify
|
||||
if "user" in smtp_config:
|
||||
opts.smtp_user = smtp_config["user"]
|
||||
@@ -1167,16 +1246,38 @@ def _main():
|
||||
opts.syslog_port = syslog_config["port"]
|
||||
else:
|
||||
opts.syslog_port = 514
|
||||
if "protocol" in syslog_config:
|
||||
opts.syslog_protocol = syslog_config["protocol"]
|
||||
else:
|
||||
opts.syslog_protocol = "udp"
|
||||
if "cafile_path" in syslog_config:
|
||||
opts.syslog_cafile_path = syslog_config["cafile_path"]
|
||||
if "certfile_path" in syslog_config:
|
||||
opts.syslog_certfile_path = syslog_config["certfile_path"]
|
||||
if "keyfile_path" in syslog_config:
|
||||
opts.syslog_keyfile_path = syslog_config["keyfile_path"]
|
||||
if "timeout" in syslog_config:
|
||||
opts.syslog_timeout = float(syslog_config["timeout"])
|
||||
else:
|
||||
opts.syslog_timeout = 5.0
|
||||
if "retry_attempts" in syslog_config:
|
||||
opts.syslog_retry_attempts = int(syslog_config["retry_attempts"])
|
||||
else:
|
||||
opts.syslog_retry_attempts = 3
|
||||
if "retry_delay" in syslog_config:
|
||||
opts.syslog_retry_delay = int(syslog_config["retry_delay"])
|
||||
else:
|
||||
opts.syslog_retry_delay = 5
|
||||
|
||||
if "gmail_api" in config.sections():
|
||||
gmail_api_config = config["gmail_api"]
|
||||
opts.gmail_api_credentials_file = gmail_api_config.get("credentials_file")
|
||||
opts.gmail_api_token_file = gmail_api_config.get("token_file", ".token")
|
||||
opts.gmail_api_include_spam_trash = gmail_api_config.getboolean(
|
||||
"include_spam_trash", False
|
||||
opts.gmail_api_include_spam_trash = bool(
|
||||
gmail_api_config.getboolean("include_spam_trash", False)
|
||||
)
|
||||
opts.gmail_api_paginate_messages = gmail_api_config.getboolean(
|
||||
"paginate_messages", True
|
||||
opts.gmail_api_paginate_messages = bool(
|
||||
gmail_api_config.getboolean("paginate_messages", True)
|
||||
)
|
||||
opts.gmail_api_scopes = gmail_api_config.get(
|
||||
"scopes", default_gmail_api_scope
|
||||
@@ -1190,7 +1291,9 @@ def _main():
|
||||
if "maildir" in config.sections():
|
||||
maildir_api_config = config["maildir"]
|
||||
opts.maildir_path = maildir_api_config.get("maildir_path")
|
||||
opts.maildir_create = maildir_api_config.get("maildir_create")
|
||||
opts.maildir_create = bool(
|
||||
maildir_api_config.getboolean("maildir_create", fallback=False)
|
||||
)
|
||||
|
||||
if "log_analytics" in config.sections():
|
||||
log_analytics_config = config["log_analytics"]
|
||||
@@ -1285,6 +1388,11 @@ def _main():
|
||||
es_aggregate_index = "{0}{1}".format(prefix, es_aggregate_index)
|
||||
es_forensic_index = "{0}{1}".format(prefix, es_forensic_index)
|
||||
es_smtp_tls_index = "{0}{1}".format(prefix, es_smtp_tls_index)
|
||||
elastic_timeout_value = (
|
||||
float(opts.elasticsearch_timeout)
|
||||
if opts.elasticsearch_timeout is not None
|
||||
else 60.0
|
||||
)
|
||||
elastic.set_hosts(
|
||||
opts.elasticsearch_hosts,
|
||||
use_ssl=opts.elasticsearch_ssl,
|
||||
@@ -1292,7 +1400,7 @@ def _main():
|
||||
username=opts.elasticsearch_username,
|
||||
password=opts.elasticsearch_password,
|
||||
api_key=opts.elasticsearch_api_key,
|
||||
timeout=opts.elasticsearch_timeout,
|
||||
timeout=elastic_timeout_value,
|
||||
)
|
||||
elastic.migrate_indexes(
|
||||
aggregate_indexes=[es_aggregate_index],
|
||||
@@ -1317,6 +1425,11 @@ def _main():
|
||||
os_aggregate_index = "{0}{1}".format(prefix, os_aggregate_index)
|
||||
os_forensic_index = "{0}{1}".format(prefix, os_forensic_index)
|
||||
os_smtp_tls_index = "{0}{1}".format(prefix, os_smtp_tls_index)
|
||||
opensearch_timeout_value = (
|
||||
float(opts.opensearch_timeout)
|
||||
if opts.opensearch_timeout is not None
|
||||
else 60.0
|
||||
)
|
||||
opensearch.set_hosts(
|
||||
opts.opensearch_hosts,
|
||||
use_ssl=opts.opensearch_ssl,
|
||||
@@ -1324,7 +1437,7 @@ def _main():
|
||||
username=opts.opensearch_username,
|
||||
password=opts.opensearch_password,
|
||||
api_key=opts.opensearch_api_key,
|
||||
timeout=opts.opensearch_timeout,
|
||||
timeout=opensearch_timeout_value,
|
||||
)
|
||||
opensearch.migrate_indexes(
|
||||
aggregate_indexes=[os_aggregate_index],
|
||||
@@ -1352,6 +1465,13 @@ def _main():
|
||||
syslog_client = syslog.SyslogClient(
|
||||
server_name=opts.syslog_server,
|
||||
server_port=int(opts.syslog_port),
|
||||
protocol=opts.syslog_protocol or "udp",
|
||||
cafile_path=opts.syslog_cafile_path,
|
||||
certfile_path=opts.syslog_certfile_path,
|
||||
keyfile_path=opts.syslog_keyfile_path,
|
||||
timeout=opts.syslog_timeout if opts.syslog_timeout is not None else 5.0,
|
||||
retry_attempts=opts.syslog_retry_attempts if opts.syslog_retry_attempts is not None else 3,
|
||||
retry_delay=opts.syslog_retry_delay if opts.syslog_retry_delay is not None else 5,
|
||||
)
|
||||
except Exception as error_:
|
||||
logger.error("Syslog Error: {0}".format(error_.__str__()))
|
||||
@@ -1433,16 +1553,23 @@ def _main():
|
||||
|
||||
results = []
|
||||
|
||||
pbar = None
|
||||
if sys.stdout.isatty():
|
||||
pbar = tqdm(total=len(file_paths))
|
||||
|
||||
for batch_index in range(math.ceil(len(file_paths) / opts.n_procs)):
|
||||
n_procs = int(opts.n_procs or 1)
|
||||
if n_procs < 1:
|
||||
n_procs = 1
|
||||
|
||||
# Capture the current log level to pass to child processes
|
||||
current_log_level = logger.level
|
||||
current_log_file = opts.log_file
|
||||
|
||||
for batch_index in range((len(file_paths) + n_procs - 1) // n_procs):
|
||||
processes = []
|
||||
connections = []
|
||||
|
||||
for proc_index in range(
|
||||
opts.n_procs * batch_index, opts.n_procs * (batch_index + 1)
|
||||
):
|
||||
for proc_index in range(n_procs * batch_index, n_procs * (batch_index + 1)):
|
||||
if proc_index >= len(file_paths):
|
||||
break
|
||||
|
||||
@@ -1463,6 +1590,8 @@ def _main():
|
||||
opts.reverse_dns_map_url,
|
||||
opts.normalize_timespan_threshold_hours,
|
||||
child_conn,
|
||||
current_log_level,
|
||||
current_log_file,
|
||||
),
|
||||
)
|
||||
processes.append(process)
|
||||
@@ -1475,9 +1604,12 @@ def _main():
|
||||
|
||||
for proc in processes:
|
||||
proc.join()
|
||||
if sys.stdout.isatty():
|
||||
if pbar is not None:
|
||||
counter += 1
|
||||
pbar.update(counter - pbar.n)
|
||||
pbar.update(1)
|
||||
|
||||
if pbar is not None:
|
||||
pbar.close()
|
||||
|
||||
for result in results:
|
||||
if isinstance(result[0], ParserError) or result[0] is None:
|
||||
@@ -1501,6 +1633,11 @@ def _main():
|
||||
smtp_tls_reports.append(result[0]["report"])
|
||||
|
||||
for mbox_path in mbox_paths:
|
||||
normalize_timespan_threshold_hours_value = (
|
||||
float(opts.normalize_timespan_threshold_hours)
|
||||
if opts.normalize_timespan_threshold_hours is not None
|
||||
else 24.0
|
||||
)
|
||||
strip = opts.strip_attachment_payloads
|
||||
reports = get_dmarc_reports_from_mbox(
|
||||
mbox_path,
|
||||
@@ -1512,13 +1649,17 @@ def _main():
|
||||
reverse_dns_map_path=opts.reverse_dns_map_path,
|
||||
reverse_dns_map_url=opts.reverse_dns_map_url,
|
||||
offline=opts.offline,
|
||||
normalize_timespan_threshold_hours=opts.normalize_timespan_threshold_hours,
|
||||
normalize_timespan_threshold_hours=normalize_timespan_threshold_hours_value,
|
||||
)
|
||||
aggregate_reports += reports["aggregate_reports"]
|
||||
forensic_reports += reports["forensic_reports"]
|
||||
smtp_tls_reports += reports["smtp_tls_reports"]
|
||||
|
||||
mailbox_connection = None
|
||||
mailbox_batch_size_value = 10
|
||||
mailbox_check_timeout_value = 30
|
||||
normalize_timespan_threshold_hours_value = 24.0
|
||||
|
||||
if opts.imap_host:
|
||||
try:
|
||||
if opts.imap_user is None or opts.imap_password is None:
|
||||
@@ -1534,13 +1675,20 @@ def _main():
|
||||
if not opts.imap_ssl:
|
||||
ssl = False
|
||||
|
||||
imap_timeout = (
|
||||
int(opts.imap_timeout) if opts.imap_timeout is not None else 30
|
||||
)
|
||||
imap_max_retries = (
|
||||
int(opts.imap_max_retries) if opts.imap_max_retries is not None else 4
|
||||
)
|
||||
imap_port_value = int(opts.imap_port) if opts.imap_port is not None else 993
|
||||
mailbox_connection = IMAPConnection(
|
||||
host=opts.imap_host,
|
||||
port=opts.imap_port,
|
||||
port=imap_port_value,
|
||||
ssl=ssl,
|
||||
verify=verify,
|
||||
timeout=opts.imap_timeout,
|
||||
max_retries=opts.imap_max_retries,
|
||||
timeout=imap_timeout,
|
||||
max_retries=imap_max_retries,
|
||||
user=opts.imap_user,
|
||||
password=opts.imap_password,
|
||||
)
|
||||
@@ -1561,7 +1709,7 @@ def _main():
|
||||
username=opts.graph_user,
|
||||
password=opts.graph_password,
|
||||
token_file=opts.graph_token_file,
|
||||
allow_unencrypted_storage=opts.graph_allow_unencrypted_storage,
|
||||
allow_unencrypted_storage=bool(opts.graph_allow_unencrypted_storage),
|
||||
graph_url=opts.graph_url,
|
||||
)
|
||||
|
||||
@@ -1606,11 +1754,24 @@ def _main():
|
||||
exit(1)
|
||||
|
||||
if mailbox_connection:
|
||||
mailbox_batch_size_value = (
|
||||
int(opts.mailbox_batch_size) if opts.mailbox_batch_size is not None else 10
|
||||
)
|
||||
mailbox_check_timeout_value = (
|
||||
int(opts.mailbox_check_timeout)
|
||||
if opts.mailbox_check_timeout is not None
|
||||
else 30
|
||||
)
|
||||
normalize_timespan_threshold_hours_value = (
|
||||
float(opts.normalize_timespan_threshold_hours)
|
||||
if opts.normalize_timespan_threshold_hours is not None
|
||||
else 24.0
|
||||
)
|
||||
try:
|
||||
reports = get_dmarc_reports_from_mailbox(
|
||||
connection=mailbox_connection,
|
||||
delete=opts.mailbox_delete,
|
||||
batch_size=opts.mailbox_batch_size,
|
||||
batch_size=mailbox_batch_size_value,
|
||||
reports_folder=opts.mailbox_reports_folder,
|
||||
archive_folder=opts.mailbox_archive_folder,
|
||||
ip_db_path=opts.ip_db_path,
|
||||
@@ -1622,7 +1783,7 @@ def _main():
|
||||
test=opts.mailbox_test,
|
||||
strip_attachment_payloads=opts.strip_attachment_payloads,
|
||||
since=opts.mailbox_since,
|
||||
normalize_timespan_threshold_hours=opts.normalize_timespan_threshold_hours,
|
||||
normalize_timespan_threshold_hours=normalize_timespan_threshold_hours_value,
|
||||
)
|
||||
|
||||
aggregate_reports += reports["aggregate_reports"]
|
||||
@@ -1633,27 +1794,31 @@ def _main():
|
||||
logger.exception("Mailbox Error")
|
||||
exit(1)
|
||||
|
||||
results = dict(
|
||||
[
|
||||
("aggregate_reports", aggregate_reports),
|
||||
("forensic_reports", forensic_reports),
|
||||
("smtp_tls_reports", smtp_tls_reports),
|
||||
]
|
||||
)
|
||||
parsing_results: ParsingResults = {
|
||||
"aggregate_reports": aggregate_reports,
|
||||
"forensic_reports": forensic_reports,
|
||||
"smtp_tls_reports": smtp_tls_reports,
|
||||
}
|
||||
|
||||
process_reports(results)
|
||||
process_reports(parsing_results)
|
||||
|
||||
if opts.smtp_host:
|
||||
try:
|
||||
verify = True
|
||||
if opts.smtp_skip_certificate_verification:
|
||||
verify = False
|
||||
smtp_port_value = int(opts.smtp_port) if opts.smtp_port is not None else 25
|
||||
smtp_to_value = (
|
||||
list(opts.smtp_to)
|
||||
if isinstance(opts.smtp_to, list)
|
||||
else _str_to_list(str(opts.smtp_to))
|
||||
)
|
||||
email_results(
|
||||
results,
|
||||
parsing_results,
|
||||
opts.smtp_host,
|
||||
opts.smtp_from,
|
||||
opts.smtp_to,
|
||||
port=opts.smtp_port,
|
||||
smtp_to_value,
|
||||
port=smtp_port_value,
|
||||
verify=verify,
|
||||
username=opts.smtp_user,
|
||||
password=opts.smtp_password,
|
||||
@@ -1675,17 +1840,17 @@ def _main():
|
||||
archive_folder=opts.mailbox_archive_folder,
|
||||
delete=opts.mailbox_delete,
|
||||
test=opts.mailbox_test,
|
||||
check_timeout=opts.mailbox_check_timeout,
|
||||
check_timeout=mailbox_check_timeout_value,
|
||||
nameservers=opts.nameservers,
|
||||
dns_timeout=opts.dns_timeout,
|
||||
strip_attachment_payloads=opts.strip_attachment_payloads,
|
||||
batch_size=opts.mailbox_batch_size,
|
||||
batch_size=mailbox_batch_size_value,
|
||||
ip_db_path=opts.ip_db_path,
|
||||
always_use_local_files=opts.always_use_local_files,
|
||||
reverse_dns_map_path=opts.reverse_dns_map_path,
|
||||
reverse_dns_map_url=opts.reverse_dns_map_url,
|
||||
offline=opts.offline,
|
||||
normalize_timespan_threshold_hours=opts.normalize_timespan_threshold_hours,
|
||||
normalize_timespan_threshold_hours=normalize_timespan_threshold_hours_value,
|
||||
)
|
||||
except FileExistsError as error:
|
||||
logger.error("{0}".format(error.__str__()))
|
||||
|
||||
@@ -1,3 +1,3 @@
|
||||
__version__ = "9.0.5"
|
||||
__version__ = "9.0.10"
|
||||
|
||||
USER_AGENT = f"parsedmarc/{__version__}"
|
||||
|
||||
@@ -2,29 +2,28 @@
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from typing import Optional, Union, Any
|
||||
from typing import Any, Optional, Union
|
||||
|
||||
|
||||
from elasticsearch_dsl.search import Q
|
||||
from elasticsearch.helpers import reindex
|
||||
from elasticsearch_dsl import (
|
||||
connections,
|
||||
Object,
|
||||
Boolean,
|
||||
Date,
|
||||
Document,
|
||||
Index,
|
||||
Nested,
|
||||
InnerDoc,
|
||||
Integer,
|
||||
Text,
|
||||
Boolean,
|
||||
Ip,
|
||||
Date,
|
||||
Nested,
|
||||
Object,
|
||||
Search,
|
||||
Text,
|
||||
connections,
|
||||
)
|
||||
from elasticsearch.helpers import reindex
|
||||
from elasticsearch_dsl.search import Q
|
||||
|
||||
from parsedmarc import InvalidForensicReport
|
||||
from parsedmarc.log import logger
|
||||
from parsedmarc.utils import human_timestamp_to_datetime
|
||||
from parsedmarc import InvalidForensicReport
|
||||
|
||||
|
||||
class ElasticsearchError(Exception):
|
||||
@@ -93,17 +92,17 @@ class _AggregateReportDoc(Document):
|
||||
spf_results = Nested(_SPFResult)
|
||||
|
||||
def add_policy_override(self, type_: str, comment: str):
|
||||
self.policy_overrides.append(_PolicyOverride(type=type_, comment=comment))
|
||||
self.policy_overrides.append(_PolicyOverride(type=type_, comment=comment)) # pyright: ignore[reportCallIssue]
|
||||
|
||||
def add_dkim_result(self, domain: str, selector: str, result: _DKIMResult):
|
||||
self.dkim_results.append(
|
||||
_DKIMResult(domain=domain, selector=selector, result=result)
|
||||
)
|
||||
) # pyright: ignore[reportCallIssue]
|
||||
|
||||
def add_spf_result(self, domain: str, scope: str, result: _SPFResult):
|
||||
self.spf_results.append(_SPFResult(domain=domain, scope=scope, result=result))
|
||||
self.spf_results.append(_SPFResult(domain=domain, scope=scope, result=result)) # pyright: ignore[reportCallIssue]
|
||||
|
||||
def save(self, **kwargs):
|
||||
def save(self, **kwargs): # pyright: ignore[reportIncompatibleMethodOverride]
|
||||
self.passed_dmarc = False
|
||||
self.passed_dmarc = self.spf_aligned or self.dkim_aligned
|
||||
|
||||
@@ -137,25 +136,25 @@ class _ForensicSampleDoc(InnerDoc):
|
||||
attachments = Nested(_EmailAttachmentDoc)
|
||||
|
||||
def add_to(self, display_name: str, address: str):
|
||||
self.to.append(_EmailAddressDoc(display_name=display_name, address=address))
|
||||
self.to.append(_EmailAddressDoc(display_name=display_name, address=address)) # pyright: ignore[reportCallIssue]
|
||||
|
||||
def add_reply_to(self, display_name: str, address: str):
|
||||
self.reply_to.append(
|
||||
_EmailAddressDoc(display_name=display_name, address=address)
|
||||
)
|
||||
) # pyright: ignore[reportCallIssue]
|
||||
|
||||
def add_cc(self, display_name: str, address: str):
|
||||
self.cc.append(_EmailAddressDoc(display_name=display_name, address=address))
|
||||
self.cc.append(_EmailAddressDoc(display_name=display_name, address=address)) # pyright: ignore[reportCallIssue]
|
||||
|
||||
def add_bcc(self, display_name: str, address: str):
|
||||
self.bcc.append(_EmailAddressDoc(display_name=display_name, address=address))
|
||||
self.bcc.append(_EmailAddressDoc(display_name=display_name, address=address)) # pyright: ignore[reportCallIssue]
|
||||
|
||||
def add_attachment(self, filename: str, content_type: str, sha256: str):
|
||||
self.attachments.append(
|
||||
_EmailAttachmentDoc(
|
||||
filename=filename, content_type=content_type, sha256=sha256
|
||||
)
|
||||
)
|
||||
) # pyright: ignore[reportCallIssue]
|
||||
|
||||
|
||||
class _ForensicReportDoc(Document):
|
||||
@@ -223,7 +222,7 @@ class _SMTPTLSPolicyDoc(InnerDoc):
|
||||
additional_information=additional_information_uri,
|
||||
failure_reason_code=failure_reason_code,
|
||||
)
|
||||
self.failure_details.append(_details)
|
||||
self.failure_details.append(_details) # pyright: ignore[reportCallIssue]
|
||||
|
||||
|
||||
class _SMTPTLSReportDoc(Document):
|
||||
@@ -257,7 +256,7 @@ class _SMTPTLSReportDoc(Document):
|
||||
policy_string=policy_string,
|
||||
mx_host_patterns=mx_host_patterns,
|
||||
failure_details=failure_details,
|
||||
)
|
||||
) # pyright: ignore[reportCallIssue]
|
||||
|
||||
|
||||
class AlreadySaved(ValueError):
|
||||
@@ -267,18 +266,18 @@ class AlreadySaved(ValueError):
|
||||
def set_hosts(
|
||||
hosts: Union[str, list[str]],
|
||||
*,
|
||||
use_ssl: Optional[bool] = False,
|
||||
use_ssl: bool = False,
|
||||
ssl_cert_path: Optional[str] = None,
|
||||
username: Optional[str] = None,
|
||||
password: Optional[str] = None,
|
||||
api_key: Optional[str] = None,
|
||||
timeout: Optional[float] = 60.0,
|
||||
timeout: float = 60.0,
|
||||
):
|
||||
"""
|
||||
Sets the Elasticsearch hosts to use
|
||||
|
||||
Args:
|
||||
hosts (Union[str, list[str]]): A single hostname or URL, or list of hostnames or URLs
|
||||
hosts (str | list[str]): A single hostname or URL, or list of hostnames or URLs
|
||||
use_ssl (bool): Use an HTTPS connection to the server
|
||||
ssl_cert_path (str): Path to the certificate chain
|
||||
username (str): The username to use for authentication
|
||||
@@ -368,7 +367,7 @@ def migrate_indexes(
|
||||
}
|
||||
Index(new_index_name).create()
|
||||
Index(new_index_name).put_mapping(doc_type=doc, body=body)
|
||||
reindex(connections.get_connection(), aggregate_index_name, new_index_name)
|
||||
reindex(connections.get_connection(), aggregate_index_name, new_index_name) # pyright: ignore[reportArgumentType]
|
||||
Index(aggregate_index_name).delete()
|
||||
|
||||
for forensic_index in forensic_indexes:
|
||||
@@ -380,8 +379,8 @@ def save_aggregate_report_to_elasticsearch(
|
||||
index_suffix: Optional[str] = None,
|
||||
index_prefix: Optional[str] = None,
|
||||
monthly_indexes: Optional[bool] = False,
|
||||
number_of_shards: Optional[int] = 1,
|
||||
number_of_replicas: Optional[int] = 0,
|
||||
number_of_shards: int = 1,
|
||||
number_of_replicas: int = 0,
|
||||
):
|
||||
"""
|
||||
Saves a parsed DMARC aggregate report to Elasticsearch
|
||||
@@ -411,11 +410,11 @@ def save_aggregate_report_to_elasticsearch(
|
||||
else:
|
||||
index_date = begin_date.strftime("%Y-%m-%d")
|
||||
|
||||
org_name_query = Q(dict(match_phrase=dict(org_name=org_name)))
|
||||
report_id_query = Q(dict(match_phrase=dict(report_id=report_id)))
|
||||
domain_query = Q(dict(match_phrase={"published_policy.domain": domain}))
|
||||
begin_date_query = Q(dict(match=dict(date_begin=begin_date)))
|
||||
end_date_query = Q(dict(match=dict(date_end=end_date)))
|
||||
org_name_query = Q(dict(match_phrase=dict(org_name=org_name))) # type: ignore
|
||||
report_id_query = Q(dict(match_phrase=dict(report_id=report_id))) # pyright: ignore[reportArgumentType]
|
||||
domain_query = Q(dict(match_phrase={"published_policy.domain": domain})) # pyright: ignore[reportArgumentType]
|
||||
begin_date_query = Q(dict(match=dict(date_begin=begin_date))) # pyright: ignore[reportArgumentType]
|
||||
end_date_query = Q(dict(match=dict(date_end=end_date))) # pyright: ignore[reportArgumentType]
|
||||
|
||||
if index_suffix is not None:
|
||||
search_index = "dmarc_aggregate_{0}*".format(index_suffix)
|
||||
@@ -427,13 +426,12 @@ def save_aggregate_report_to_elasticsearch(
|
||||
query = org_name_query & report_id_query & domain_query
|
||||
query = query & begin_date_query & end_date_query
|
||||
search.query = query
|
||||
begin_date_human = begin_date.strftime("%Y-%m-%d %H:%M:%SZ")
|
||||
end_date_human = end_date.strftime("%Y-%m-%d %H:%M:%SZ")
|
||||
|
||||
try:
|
||||
existing = search.execute()
|
||||
except Exception as error_:
|
||||
begin_date_human = begin_date.strftime("%Y-%m-%d %H:%M:%SZ")
|
||||
end_date_human = end_date.strftime("%Y-%m-%d %H:%M:%SZ")
|
||||
|
||||
raise ElasticsearchError(
|
||||
"Elasticsearch's search for existing report \
|
||||
error: {}".format(error_.__str__())
|
||||
@@ -529,7 +527,7 @@ def save_aggregate_report_to_elasticsearch(
|
||||
number_of_shards=number_of_shards, number_of_replicas=number_of_replicas
|
||||
)
|
||||
create_indexes([index], index_settings)
|
||||
agg_doc.meta.index = index
|
||||
agg_doc.meta.index = index # pyright: ignore[reportOptionalMemberAccess, reportAttributeAccessIssue]
|
||||
|
||||
try:
|
||||
agg_doc.save()
|
||||
@@ -569,7 +567,7 @@ def save_forensic_report_to_elasticsearch(
|
||||
sample_date = forensic_report["parsed_sample"]["date"]
|
||||
sample_date = human_timestamp_to_datetime(sample_date)
|
||||
original_headers = forensic_report["parsed_sample"]["headers"]
|
||||
headers = dict()
|
||||
headers: dict[str, Any] = {}
|
||||
for original_header in original_headers:
|
||||
headers[original_header.lower()] = original_headers[original_header]
|
||||
|
||||
@@ -583,7 +581,7 @@ def save_forensic_report_to_elasticsearch(
|
||||
if index_prefix is not None:
|
||||
search_index = "{0}{1}".format(index_prefix, search_index)
|
||||
search = Search(index=search_index)
|
||||
q = Q(dict(match=dict(arrival_date=arrival_date_epoch_milliseconds)))
|
||||
q = Q(dict(match=dict(arrival_date=arrival_date_epoch_milliseconds))) # pyright: ignore[reportArgumentType]
|
||||
|
||||
from_ = None
|
||||
to_ = None
|
||||
@@ -598,7 +596,7 @@ def save_forensic_report_to_elasticsearch(
|
||||
|
||||
from_ = dict()
|
||||
from_["sample.headers.from"] = headers["from"]
|
||||
from_query = Q(dict(match_phrase=from_))
|
||||
from_query = Q(dict(match_phrase=from_)) # pyright: ignore[reportArgumentType]
|
||||
q = q & from_query
|
||||
if "to" in headers:
|
||||
# We convert the TO header from a string list to a flat string.
|
||||
@@ -610,12 +608,12 @@ def save_forensic_report_to_elasticsearch(
|
||||
|
||||
to_ = dict()
|
||||
to_["sample.headers.to"] = headers["to"]
|
||||
to_query = Q(dict(match_phrase=to_))
|
||||
to_query = Q(dict(match_phrase=to_)) # pyright: ignore[reportArgumentType]
|
||||
q = q & to_query
|
||||
if "subject" in headers:
|
||||
subject = headers["subject"]
|
||||
subject_query = {"match_phrase": {"sample.headers.subject": subject}}
|
||||
q = q & Q(subject_query)
|
||||
q = q & Q(subject_query) # pyright: ignore[reportArgumentType]
|
||||
|
||||
search.query = q
|
||||
existing = search.execute()
|
||||
@@ -693,7 +691,7 @@ def save_forensic_report_to_elasticsearch(
|
||||
number_of_shards=number_of_shards, number_of_replicas=number_of_replicas
|
||||
)
|
||||
create_indexes([index], index_settings)
|
||||
forensic_doc.meta.index = index
|
||||
forensic_doc.meta.index = index # pyright: ignore[reportAttributeAccessIssue, reportOptionalMemberAccess]
|
||||
try:
|
||||
forensic_doc.save()
|
||||
except Exception as e:
|
||||
@@ -708,9 +706,9 @@ def save_smtp_tls_report_to_elasticsearch(
|
||||
report: dict[str, Any],
|
||||
index_suffix: Optional[str] = None,
|
||||
index_prefix: Optional[str] = None,
|
||||
monthly_indexes: Optional[bool] = False,
|
||||
number_of_shards: Optional[int] = 1,
|
||||
number_of_replicas: Optional[int] = 0,
|
||||
monthly_indexes: bool = False,
|
||||
number_of_shards: int = 1,
|
||||
number_of_replicas: int = 0,
|
||||
):
|
||||
"""
|
||||
Saves a parsed SMTP TLS report to Elasticsearch
|
||||
@@ -740,10 +738,10 @@ def save_smtp_tls_report_to_elasticsearch(
|
||||
report["begin_date"] = begin_date
|
||||
report["end_date"] = end_date
|
||||
|
||||
org_name_query = Q(dict(match_phrase=dict(org_name=org_name)))
|
||||
report_id_query = Q(dict(match_phrase=dict(report_id=report_id)))
|
||||
begin_date_query = Q(dict(match=dict(date_begin=begin_date)))
|
||||
end_date_query = Q(dict(match=dict(date_end=end_date)))
|
||||
org_name_query = Q(dict(match_phrase=dict(org_name=org_name))) # pyright: ignore[reportArgumentType]
|
||||
report_id_query = Q(dict(match_phrase=dict(report_id=report_id))) # pyright: ignore[reportArgumentType]
|
||||
begin_date_query = Q(dict(match=dict(date_begin=begin_date))) # pyright: ignore[reportArgumentType]
|
||||
end_date_query = Q(dict(match=dict(date_end=end_date))) # pyright: ignore[reportArgumentType]
|
||||
|
||||
if index_suffix is not None:
|
||||
search_index = "smtp_tls_{0}*".format(index_suffix)
|
||||
@@ -844,10 +842,10 @@ def save_smtp_tls_report_to_elasticsearch(
|
||||
additional_information_uri=additional_information_uri,
|
||||
failure_reason_code=failure_reason_code,
|
||||
)
|
||||
smtp_tls_doc.policies.append(policy_doc)
|
||||
smtp_tls_doc.policies.append(policy_doc) # pyright: ignore[reportCallIssue]
|
||||
|
||||
create_indexes([index], index_settings)
|
||||
smtp_tls_doc.meta.index = index
|
||||
smtp_tls_doc.meta.index = index # pyright: ignore[reportOptionalMemberAccess, reportAttributeAccessIssue]
|
||||
|
||||
try:
|
||||
smtp_tls_doc.save()
|
||||
|
||||
@@ -2,20 +2,18 @@
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from typing import Any
|
||||
|
||||
import logging
|
||||
import logging.handlers
|
||||
import json
|
||||
import threading
|
||||
from typing import Any
|
||||
|
||||
from pygelf import GelfTcpHandler, GelfTlsHandler, GelfUdpHandler
|
||||
|
||||
from parsedmarc import (
|
||||
parsed_aggregate_reports_to_csv_rows,
|
||||
parsed_forensic_reports_to_csv_rows,
|
||||
parsed_smtp_tls_reports_to_csv_rows,
|
||||
)
|
||||
from pygelf import GelfTcpHandler, GelfUdpHandler, GelfTlsHandler
|
||||
|
||||
|
||||
log_context_data = threading.local()
|
||||
|
||||
@@ -52,9 +50,7 @@ class GelfClient(object):
|
||||
)
|
||||
self.logger.addHandler(self.handler)
|
||||
|
||||
def save_aggregate_report_to_gelf(
|
||||
self, aggregate_reports: list[dict[str, Any]]
|
||||
):
|
||||
def save_aggregate_report_to_gelf(self, aggregate_reports: list[dict[str, Any]]):
|
||||
rows = parsed_aggregate_reports_to_csv_rows(aggregate_reports)
|
||||
for row in rows:
|
||||
log_context_data.parsedmarc = row
|
||||
@@ -62,14 +58,14 @@ class GelfClient(object):
|
||||
|
||||
log_context_data.parsedmarc = None
|
||||
|
||||
def save_forensic_report_to_gelf(
|
||||
self, forensic_reports: list[dict[str, Any]]
|
||||
):
|
||||
def save_forensic_report_to_gelf(self, forensic_reports: list[dict[str, Any]]):
|
||||
rows = parsed_forensic_reports_to_csv_rows(forensic_reports)
|
||||
for row in rows:
|
||||
self.logger.info(json.dumps(row))
|
||||
log_context_data.parsedmarc = row
|
||||
self.logger.info("parsedmarc forensic report")
|
||||
|
||||
def save_smtp_tls_report_to_gelf(self, smtp_tls_reports: dict[str, Any]):
|
||||
rows = parsed_smtp_tls_reports_to_csv_rows(smtp_tls_reports)
|
||||
for row in rows:
|
||||
self.logger.info(json.dumps(row))
|
||||
log_context_data.parsedmarc = row
|
||||
self.logger.info("parsedmarc smtptls report")
|
||||
|
||||
@@ -2,18 +2,16 @@
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from typing import Any, Optional, Union
|
||||
from ssl import SSLContext
|
||||
|
||||
import json
|
||||
from ssl import create_default_context
|
||||
from ssl import SSLContext, create_default_context
|
||||
from typing import Any, Optional, Union
|
||||
|
||||
from kafka import KafkaProducer
|
||||
from kafka.errors import NoBrokersAvailable, UnknownTopicOrPartitionError
|
||||
from parsedmarc.utils import human_timestamp_to_datetime
|
||||
|
||||
from parsedmarc import __version__
|
||||
from parsedmarc.log import logger
|
||||
from parsedmarc.utils import human_timestamp_to_datetime
|
||||
|
||||
|
||||
class KafkaError(RuntimeError):
|
||||
@@ -48,7 +46,7 @@ class KafkaClient(object):
|
||||
``$ConnectionString``, and the password is the
|
||||
Azure Event Hub connection string.
|
||||
"""
|
||||
config = dict(
|
||||
config: dict[str, Any] = dict(
|
||||
value_serializer=lambda v: json.dumps(v).encode("utf-8"),
|
||||
bootstrap_servers=kafka_hosts,
|
||||
client_id="parsedmarc-{0}".format(__version__),
|
||||
|
||||
@@ -4,11 +4,12 @@ from __future__ import annotations
|
||||
|
||||
from typing import Any
|
||||
|
||||
from parsedmarc.log import logger
|
||||
from azure.core.exceptions import HttpResponseError
|
||||
from azure.identity import ClientSecretCredential
|
||||
from azure.monitor.ingestion import LogsIngestionClient
|
||||
|
||||
from parsedmarc.log import logger
|
||||
|
||||
|
||||
class LogAnalyticsException(Exception):
|
||||
"""Raised when an Elasticsearch error occurs"""
|
||||
@@ -132,7 +133,7 @@ class LogAnalyticsClient(object):
|
||||
|
||||
def publish_results(
|
||||
self,
|
||||
results: dict[str, dict[str, Any]],
|
||||
results: dict[str, Any],
|
||||
save_aggregate: bool,
|
||||
save_forensic: bool,
|
||||
save_smtp_tls: bool,
|
||||
|
||||
@@ -116,14 +116,14 @@ class GmailConnection(MailboxConnection):
|
||||
else:
|
||||
return [id for id in self._fetch_all_message_ids(reports_label_id)]
|
||||
|
||||
def fetch_message(self, message_id):
|
||||
def fetch_message(self, message_id) -> str:
|
||||
msg = (
|
||||
self.service.users()
|
||||
.messages()
|
||||
.get(userId="me", id=message_id, format="raw")
|
||||
.execute()
|
||||
)
|
||||
return urlsafe_b64decode(msg["raw"])
|
||||
return urlsafe_b64decode(msg["raw"]).decode(errors="replace")
|
||||
|
||||
def delete_message(self, message_id: str):
|
||||
self.service.users().messages().delete(userId="me", id=message_id)
|
||||
|
||||
@@ -6,7 +6,7 @@ from enum import Enum
|
||||
from functools import lru_cache
|
||||
from pathlib import Path
|
||||
from time import sleep
|
||||
from typing import List, Optional
|
||||
from typing import Any, List, Optional, Union
|
||||
|
||||
from azure.identity import (
|
||||
UsernamePasswordCredential,
|
||||
@@ -28,7 +28,7 @@ class AuthMethod(Enum):
|
||||
|
||||
|
||||
def _get_cache_args(token_path: Path, allow_unencrypted_storage):
|
||||
cache_args = {
|
||||
cache_args: dict[str, Any] = {
|
||||
"cache_persistence_options": TokenCachePersistenceOptions(
|
||||
name="parsedmarc", allow_unencrypted_storage=allow_unencrypted_storage
|
||||
)
|
||||
@@ -151,9 +151,9 @@ class MSGraphConnection(MailboxConnection):
|
||||
else:
|
||||
logger.warning(f"Unknown response {resp.status_code} {resp.json()}")
|
||||
|
||||
def fetch_messages(self, folder_name: str, **kwargs) -> List[str]:
|
||||
def fetch_messages(self, reports_folder: str, **kwargs) -> List[str]:
|
||||
"""Returns a list of message UIDs in the specified folder"""
|
||||
folder_id = self._find_folder_id_from_folder_path(folder_name)
|
||||
folder_id = self._find_folder_id_from_folder_path(reports_folder)
|
||||
url = f"/users/{self.mailbox_name}/mailFolders/{folder_id}/messages"
|
||||
since = kwargs.get("since")
|
||||
if not since:
|
||||
@@ -166,7 +166,7 @@ class MSGraphConnection(MailboxConnection):
|
||||
|
||||
def _get_all_messages(self, url, batch_size, since):
|
||||
messages: list
|
||||
params = {"$select": "id"}
|
||||
params: dict[str, Union[str, int]] = {"$select": "id"}
|
||||
if since:
|
||||
params["$filter"] = f"receivedDateTime ge {since}"
|
||||
if batch_size and batch_size > 0:
|
||||
|
||||
@@ -2,7 +2,7 @@
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from typing import Optional
|
||||
from typing import cast
|
||||
|
||||
from time import sleep
|
||||
|
||||
@@ -17,15 +17,14 @@ from parsedmarc.mail.mailbox_connection import MailboxConnection
|
||||
class IMAPConnection(MailboxConnection):
|
||||
def __init__(
|
||||
self,
|
||||
host: Optional[str] = None,
|
||||
*,
|
||||
user: Optional[str] = None,
|
||||
password: Optional[str] = None,
|
||||
port: Optional[str] = None,
|
||||
ssl: Optional[bool] = True,
|
||||
verify: Optional[bool] = True,
|
||||
timeout: Optional[int] = 30,
|
||||
max_retries: Optional[int] = 4,
|
||||
host: str,
|
||||
user: str,
|
||||
password: str,
|
||||
port: int = 993,
|
||||
ssl: bool = True,
|
||||
verify: bool = True,
|
||||
timeout: int = 30,
|
||||
max_retries: int = 4,
|
||||
):
|
||||
self._username = user
|
||||
self._password = password
|
||||
@@ -47,13 +46,13 @@ class IMAPConnection(MailboxConnection):
|
||||
def fetch_messages(self, reports_folder: str, **kwargs):
|
||||
self._client.select_folder(reports_folder)
|
||||
since = kwargs.get("since")
|
||||
if since:
|
||||
return self._client.search(["SINCE", since])
|
||||
if since is not None:
|
||||
return self._client.search(f"SINCE {since}")
|
||||
else:
|
||||
return self._client.search()
|
||||
|
||||
def fetch_message(self, message_id: int):
|
||||
return self._client.fetch_message(message_id, parse=False)
|
||||
return cast(str, self._client.fetch_message(message_id, parse=False))
|
||||
|
||||
def delete_message(self, message_id: int):
|
||||
self._client.delete_messages([message_id])
|
||||
|
||||
@@ -13,16 +13,16 @@ class MailboxConnection(ABC):
|
||||
def create_folder(self, folder_name: str):
|
||||
raise NotImplementedError
|
||||
|
||||
def fetch_messages(self, reports_folder: str, **kwargs) -> list[str]:
|
||||
def fetch_messages(self, reports_folder: str, **kwargs):
|
||||
raise NotImplementedError
|
||||
|
||||
def fetch_message(self, message_id) -> str:
|
||||
raise NotImplementedError
|
||||
|
||||
def delete_message(self, message_id: str):
|
||||
def delete_message(self, message_id):
|
||||
raise NotImplementedError
|
||||
|
||||
def move_message(self, message_id: str, folder_name: str):
|
||||
def move_message(self, message_id, folder_name: str):
|
||||
raise NotImplementedError
|
||||
|
||||
def keepalive(self):
|
||||
|
||||
@@ -2,21 +2,20 @@
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from typing import Optional
|
||||
|
||||
import mailbox
|
||||
import os
|
||||
from time import sleep
|
||||
from typing import Dict
|
||||
|
||||
from parsedmarc.log import logger
|
||||
from parsedmarc.mail.mailbox_connection import MailboxConnection
|
||||
import mailbox
|
||||
import os
|
||||
|
||||
|
||||
class MaildirConnection(MailboxConnection):
|
||||
def __init__(
|
||||
self,
|
||||
maildir_path: Optional[bool] = None,
|
||||
maildir_create: Optional[bool] = False,
|
||||
maildir_path: str,
|
||||
maildir_create: bool = False,
|
||||
):
|
||||
self._maildir_path = maildir_path
|
||||
self._maildir_create = maildir_create
|
||||
@@ -33,27 +32,31 @@ class MaildirConnection(MailboxConnection):
|
||||
)
|
||||
raise Exception(ex)
|
||||
self._client = mailbox.Maildir(maildir_path, create=maildir_create)
|
||||
self._subfolder_client = {}
|
||||
self._subfolder_client: Dict[str, mailbox.Maildir] = {}
|
||||
|
||||
def create_folder(self, folder_name: str):
|
||||
self._subfolder_client[folder_name] = self._client.add_folder(folder_name)
|
||||
self._client.add_folder(folder_name)
|
||||
|
||||
def fetch_messages(self, reports_folder: str, **kwargs):
|
||||
return self._client.keys()
|
||||
|
||||
def fetch_message(self, message_id: str):
|
||||
return self._client.get(message_id).as_string()
|
||||
def fetch_message(self, message_id: str) -> str:
|
||||
msg = self._client.get(message_id)
|
||||
if msg is not None:
|
||||
msg = msg.as_string()
|
||||
if msg is not None:
|
||||
return msg
|
||||
return ""
|
||||
|
||||
def delete_message(self, message_id: str):
|
||||
self._client.remove(message_id)
|
||||
|
||||
def move_message(self, message_id: str, folder_name: str):
|
||||
message_data = self._client.get(message_id)
|
||||
if folder_name not in self._subfolder_client.keys():
|
||||
self._subfolder_client = mailbox.Maildir(
|
||||
os.join(self.maildir_path, folder_name), create=self.maildir_create
|
||||
)
|
||||
if message_data is None:
|
||||
return
|
||||
if folder_name not in self._subfolder_client:
|
||||
self._subfolder_client[folder_name] = self._client.add_folder(folder_name)
|
||||
self._subfolder_client[folder_name].add(message_data)
|
||||
self._client.remove(message_id)
|
||||
|
||||
|
||||
@@ -2,29 +2,28 @@
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from typing import Optional, Union, Any
|
||||
|
||||
from typing import Any, Optional, Union
|
||||
|
||||
from opensearchpy import (
|
||||
Q,
|
||||
connections,
|
||||
Object,
|
||||
Boolean,
|
||||
Date,
|
||||
Document,
|
||||
Index,
|
||||
Nested,
|
||||
InnerDoc,
|
||||
Integer,
|
||||
Text,
|
||||
Boolean,
|
||||
Ip,
|
||||
Date,
|
||||
Nested,
|
||||
Object,
|
||||
Q,
|
||||
Search,
|
||||
Text,
|
||||
connections,
|
||||
)
|
||||
from opensearchpy.helpers import reindex
|
||||
|
||||
from parsedmarc import InvalidForensicReport
|
||||
from parsedmarc.log import logger
|
||||
from parsedmarc.utils import human_timestamp_to_datetime
|
||||
from parsedmarc import InvalidForensicReport
|
||||
|
||||
|
||||
class OpenSearchError(Exception):
|
||||
@@ -103,7 +102,7 @@ class _AggregateReportDoc(Document):
|
||||
def add_spf_result(self, domain: str, scope: str, result: _SPFResult):
|
||||
self.spf_results.append(_SPFResult(domain=domain, scope=scope, result=result))
|
||||
|
||||
def save(self, **kwargs):
|
||||
def save(self, **kwargs): # pyright: ignore[reportIncompatibleMethodOverride]
|
||||
self.passed_dmarc = False
|
||||
self.passed_dmarc = self.spf_aligned or self.dkim_aligned
|
||||
|
||||
@@ -379,9 +378,9 @@ def save_aggregate_report_to_opensearch(
|
||||
aggregate_report: dict[str, Any],
|
||||
index_suffix: Optional[str] = None,
|
||||
index_prefix: Optional[str] = None,
|
||||
monthly_indexes: Optional[bool] = False,
|
||||
number_of_shards: Optional[int] = 1,
|
||||
number_of_replicas: Optional[int] = 0,
|
||||
monthly_indexes: bool = False,
|
||||
number_of_shards: int = 1,
|
||||
number_of_replicas: int = 0,
|
||||
):
|
||||
"""
|
||||
Saves a parsed DMARC aggregate report to OpenSearch
|
||||
@@ -427,13 +426,12 @@ def save_aggregate_report_to_opensearch(
|
||||
query = org_name_query & report_id_query & domain_query
|
||||
query = query & begin_date_query & end_date_query
|
||||
search.query = query
|
||||
begin_date_human = begin_date.strftime("%Y-%m-%d %H:%M:%SZ")
|
||||
end_date_human = end_date.strftime("%Y-%m-%d %H:%M:%SZ")
|
||||
|
||||
try:
|
||||
existing = search.execute()
|
||||
except Exception as error_:
|
||||
begin_date_human = begin_date.strftime("%Y-%m-%d %H:%M:%SZ")
|
||||
end_date_human = end_date.strftime("%Y-%m-%d %H:%M:%SZ")
|
||||
|
||||
raise OpenSearchError(
|
||||
"OpenSearch's search for existing report \
|
||||
error: {}".format(error_.__str__())
|
||||
@@ -541,7 +539,7 @@ def save_forensic_report_to_opensearch(
|
||||
forensic_report: dict[str, Any],
|
||||
index_suffix: Optional[str] = None,
|
||||
index_prefix: Optional[str] = None,
|
||||
monthly_indexes: Optional[bool] = False,
|
||||
monthly_indexes: bool = False,
|
||||
number_of_shards: int = 1,
|
||||
number_of_replicas: int = 0,
|
||||
):
|
||||
@@ -569,7 +567,7 @@ def save_forensic_report_to_opensearch(
|
||||
sample_date = forensic_report["parsed_sample"]["date"]
|
||||
sample_date = human_timestamp_to_datetime(sample_date)
|
||||
original_headers = forensic_report["parsed_sample"]["headers"]
|
||||
headers = dict()
|
||||
headers: dict[str, Any] = {}
|
||||
for original_header in original_headers:
|
||||
headers[original_header.lower()] = original_headers[original_header]
|
||||
|
||||
@@ -708,9 +706,9 @@ def save_smtp_tls_report_to_opensearch(
|
||||
report: dict[str, Any],
|
||||
index_suffix: Optional[str] = None,
|
||||
index_prefix: Optional[str] = None,
|
||||
monthly_indexes: Optional[bool] = False,
|
||||
number_of_shards: Optional[int] = 1,
|
||||
number_of_replicas: Optional[int] = 0,
|
||||
monthly_indexes: bool = False,
|
||||
number_of_shards: int = 1,
|
||||
number_of_replicas: int = 0,
|
||||
):
|
||||
"""
|
||||
Saves a parsed SMTP TLS report to OpenSearch
|
||||
|
||||
@@ -2,9 +2,9 @@
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
from typing import Any
|
||||
|
||||
import json
|
||||
import boto3
|
||||
|
||||
from parsedmarc.log import logger
|
||||
@@ -51,7 +51,7 @@ class S3Client(object):
|
||||
aws_access_key_id=access_key_id,
|
||||
aws_secret_access_key=secret_access_key,
|
||||
)
|
||||
self.bucket: Any = self.s3.Bucket(self.bucket_name)
|
||||
self.bucket = self.s3.Bucket(self.bucket_name) # type: ignore
|
||||
|
||||
def save_aggregate_report_to_s3(self, report: dict[str, Any]):
|
||||
self.save_report_to_s3(report, "aggregate")
|
||||
|
||||
@@ -2,15 +2,13 @@
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from typing import Any, Union
|
||||
|
||||
|
||||
from urllib.parse import urlparse
|
||||
import socket
|
||||
import json
|
||||
import socket
|
||||
from typing import Any, Union
|
||||
from urllib.parse import urlparse
|
||||
|
||||
import urllib3
|
||||
import requests
|
||||
import urllib3
|
||||
|
||||
from parsedmarc.constants import USER_AGENT
|
||||
from parsedmarc.log import logger
|
||||
|
||||
@@ -3,13 +3,13 @@
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
import logging
|
||||
import logging.handlers
|
||||
|
||||
from typing import Any
|
||||
|
||||
|
||||
import json
|
||||
import socket
|
||||
import ssl
|
||||
import time
|
||||
from typing import Any, Optional
|
||||
|
||||
from parsedmarc import (
|
||||
parsed_aggregate_reports_to_csv_rows,
|
||||
@@ -21,37 +21,161 @@ from parsedmarc import (
|
||||
class SyslogClient(object):
|
||||
"""A client for Syslog"""
|
||||
|
||||
def __init__(self, server_name: str, server_port: int):
|
||||
def __init__(
|
||||
self,
|
||||
server_name: str,
|
||||
server_port: int,
|
||||
protocol: str = "udp",
|
||||
cafile_path: Optional[str] = None,
|
||||
certfile_path: Optional[str] = None,
|
||||
keyfile_path: Optional[str] = None,
|
||||
timeout: float = 5.0,
|
||||
retry_attempts: int = 3,
|
||||
retry_delay: int = 5,
|
||||
):
|
||||
"""
|
||||
Initializes the SyslogClient
|
||||
Args:
|
||||
server_name (str): The Syslog server
|
||||
server_port (int): The Syslog UDP port
|
||||
server_port (int): The Syslog port
|
||||
protocol (str): The protocol to use: "udp", "tcp", or "tls" (Default: "udp")
|
||||
cafile_path (str): Path to CA certificate file for TLS server verification (Optional)
|
||||
certfile_path (str): Path to client certificate file for TLS authentication (Optional)
|
||||
keyfile_path (str): Path to client private key file for TLS authentication (Optional)
|
||||
timeout (float): Connection timeout in seconds for TCP/TLS (Default: 5.0)
|
||||
retry_attempts (int): Number of retry attempts for failed connections (Default: 3)
|
||||
retry_delay (int): Delay in seconds between retry attempts (Default: 5)
|
||||
"""
|
||||
self.server_name = server_name
|
||||
self.server_port = server_port
|
||||
self.protocol = protocol.lower()
|
||||
self.timeout = timeout
|
||||
self.retry_attempts = retry_attempts
|
||||
self.retry_delay = retry_delay
|
||||
|
||||
self.logger = logging.getLogger("parsedmarc_syslog")
|
||||
self.logger.setLevel(logging.INFO)
|
||||
log_handler = logging.handlers.SysLogHandler(address=(server_name, server_port))
|
||||
|
||||
# Create the appropriate syslog handler based on protocol
|
||||
log_handler = self._create_syslog_handler(
|
||||
server_name,
|
||||
server_port,
|
||||
self.protocol,
|
||||
cafile_path,
|
||||
certfile_path,
|
||||
keyfile_path,
|
||||
timeout,
|
||||
retry_attempts,
|
||||
retry_delay,
|
||||
)
|
||||
|
||||
self.logger.addHandler(log_handler)
|
||||
|
||||
def save_aggregate_report_to_syslog(
|
||||
self, aggregate_reports: list[dict[str, Any]]
|
||||
):
|
||||
def _create_syslog_handler(
|
||||
self,
|
||||
server_name: str,
|
||||
server_port: int,
|
||||
protocol: str,
|
||||
cafile_path: Optional[str],
|
||||
certfile_path: Optional[str],
|
||||
keyfile_path: Optional[str],
|
||||
timeout: float,
|
||||
retry_attempts: int,
|
||||
retry_delay: int,
|
||||
) -> logging.handlers.SysLogHandler:
|
||||
"""
|
||||
Creates a SysLogHandler with the specified protocol and TLS settings
|
||||
"""
|
||||
if protocol == "udp":
|
||||
# UDP protocol (default, backward compatible)
|
||||
return logging.handlers.SysLogHandler(
|
||||
address=(server_name, server_port),
|
||||
socktype=socket.SOCK_DGRAM,
|
||||
)
|
||||
elif protocol in ["tcp", "tls"]:
|
||||
# TCP or TLS protocol with retry logic
|
||||
for attempt in range(1, retry_attempts + 1):
|
||||
try:
|
||||
if protocol == "tcp":
|
||||
# TCP without TLS
|
||||
handler = logging.handlers.SysLogHandler(
|
||||
address=(server_name, server_port),
|
||||
socktype=socket.SOCK_STREAM,
|
||||
)
|
||||
# Set timeout on the socket
|
||||
if hasattr(handler, "socket") and handler.socket:
|
||||
handler.socket.settimeout(timeout)
|
||||
return handler
|
||||
else:
|
||||
# TLS protocol
|
||||
# Create SSL context with secure defaults
|
||||
ssl_context = ssl.create_default_context()
|
||||
|
||||
# Explicitly set minimum TLS version to 1.2 for security
|
||||
ssl_context.minimum_version = ssl.TLSVersion.TLSv1_2
|
||||
|
||||
# Configure server certificate verification
|
||||
if cafile_path:
|
||||
ssl_context.load_verify_locations(cafile=cafile_path)
|
||||
|
||||
# Configure client certificate authentication
|
||||
if certfile_path and keyfile_path:
|
||||
ssl_context.load_cert_chain(
|
||||
certfile=certfile_path,
|
||||
keyfile=keyfile_path,
|
||||
)
|
||||
elif certfile_path or keyfile_path:
|
||||
# Warn if only one of the two required parameters is provided
|
||||
self.logger.warning(
|
||||
"Both certfile_path and keyfile_path are required for "
|
||||
"client certificate authentication. Client authentication "
|
||||
"will not be used."
|
||||
)
|
||||
|
||||
# Create TCP handler first
|
||||
handler = logging.handlers.SysLogHandler(
|
||||
address=(server_name, server_port),
|
||||
socktype=socket.SOCK_STREAM,
|
||||
)
|
||||
|
||||
# Wrap socket with TLS
|
||||
if hasattr(handler, "socket") and handler.socket:
|
||||
handler.socket = ssl_context.wrap_socket(
|
||||
handler.socket,
|
||||
server_hostname=server_name,
|
||||
)
|
||||
handler.socket.settimeout(timeout)
|
||||
|
||||
return handler
|
||||
|
||||
except Exception as e:
|
||||
if attempt < retry_attempts:
|
||||
self.logger.warning(
|
||||
f"Syslog connection attempt {attempt}/{retry_attempts} failed: {e}. "
|
||||
f"Retrying in {retry_delay} seconds..."
|
||||
)
|
||||
time.sleep(retry_delay)
|
||||
else:
|
||||
self.logger.error(
|
||||
f"Syslog connection failed after {retry_attempts} attempts: {e}"
|
||||
)
|
||||
raise
|
||||
else:
|
||||
raise ValueError(
|
||||
f"Invalid protocol '{protocol}'. Must be 'udp', 'tcp', or 'tls'."
|
||||
)
|
||||
|
||||
def save_aggregate_report_to_syslog(self, aggregate_reports: list[dict[str, Any]]):
|
||||
rows = parsed_aggregate_reports_to_csv_rows(aggregate_reports)
|
||||
for row in rows:
|
||||
self.logger.info(json.dumps(row))
|
||||
|
||||
def save_forensic_report_to_syslog(
|
||||
self, forensic_reports: list[dict[str, Any]]
|
||||
):
|
||||
def save_forensic_report_to_syslog(self, forensic_reports: list[dict[str, Any]]):
|
||||
rows = parsed_forensic_reports_to_csv_rows(forensic_reports)
|
||||
for row in rows:
|
||||
self.logger.info(json.dumps(row))
|
||||
|
||||
def save_smtp_tls_report_to_syslog(
|
||||
self, smtp_tls_reports: list[dict[str, Any]]
|
||||
):
|
||||
def save_smtp_tls_report_to_syslog(self, smtp_tls_reports: list[dict[str, Any]]):
|
||||
rows = parsed_smtp_tls_reports_to_csv_rows(smtp_tls_reports)
|
||||
for row in rows:
|
||||
self.logger.info(json.dumps(row))
|
||||
|
||||
220
parsedmarc/types.py
Normal file
220
parsedmarc/types.py
Normal file
@@ -0,0 +1,220 @@
|
||||
from __future__ import annotations
|
||||
|
||||
from typing import Any, Dict, List, Literal, Optional, TypedDict, Union
|
||||
|
||||
# NOTE: This module is intentionally Python 3.9 compatible.
|
||||
# - No PEP 604 unions (A | B)
|
||||
# - No typing.NotRequired / Required (3.11+) to avoid an extra dependency.
|
||||
# For optional keys, use total=False TypedDicts.
|
||||
|
||||
|
||||
ReportType = Literal["aggregate", "forensic", "smtp_tls"]
|
||||
|
||||
|
||||
class AggregateReportMetadata(TypedDict):
|
||||
org_name: str
|
||||
org_email: str
|
||||
org_extra_contact_info: Optional[str]
|
||||
report_id: str
|
||||
begin_date: str
|
||||
end_date: str
|
||||
timespan_requires_normalization: bool
|
||||
original_timespan_seconds: int
|
||||
errors: List[str]
|
||||
|
||||
|
||||
class AggregatePolicyPublished(TypedDict):
|
||||
domain: str
|
||||
adkim: str
|
||||
aspf: str
|
||||
p: str
|
||||
sp: str
|
||||
pct: str
|
||||
fo: str
|
||||
|
||||
|
||||
class IPSourceInfo(TypedDict):
|
||||
ip_address: str
|
||||
country: Optional[str]
|
||||
reverse_dns: Optional[str]
|
||||
base_domain: Optional[str]
|
||||
name: Optional[str]
|
||||
type: Optional[str]
|
||||
|
||||
|
||||
class AggregateAlignment(TypedDict):
|
||||
spf: bool
|
||||
dkim: bool
|
||||
dmarc: bool
|
||||
|
||||
|
||||
class AggregateIdentifiers(TypedDict):
|
||||
header_from: str
|
||||
envelope_from: Optional[str]
|
||||
envelope_to: Optional[str]
|
||||
|
||||
|
||||
class AggregatePolicyOverrideReason(TypedDict):
|
||||
type: Optional[str]
|
||||
comment: Optional[str]
|
||||
|
||||
|
||||
class AggregateAuthResultDKIM(TypedDict):
|
||||
domain: str
|
||||
result: str
|
||||
selector: str
|
||||
|
||||
|
||||
class AggregateAuthResultSPF(TypedDict):
|
||||
domain: str
|
||||
result: str
|
||||
scope: str
|
||||
|
||||
|
||||
class AggregateAuthResults(TypedDict):
|
||||
dkim: List[AggregateAuthResultDKIM]
|
||||
spf: List[AggregateAuthResultSPF]
|
||||
|
||||
|
||||
class AggregatePolicyEvaluated(TypedDict):
|
||||
disposition: str
|
||||
dkim: str
|
||||
spf: str
|
||||
policy_override_reasons: List[AggregatePolicyOverrideReason]
|
||||
|
||||
|
||||
class AggregateRecord(TypedDict):
|
||||
interval_begin: str
|
||||
interval_end: str
|
||||
source: IPSourceInfo
|
||||
count: int
|
||||
alignment: AggregateAlignment
|
||||
policy_evaluated: AggregatePolicyEvaluated
|
||||
disposition: str
|
||||
identifiers: AggregateIdentifiers
|
||||
auth_results: AggregateAuthResults
|
||||
|
||||
|
||||
class AggregateReport(TypedDict):
|
||||
xml_schema: str
|
||||
report_metadata: AggregateReportMetadata
|
||||
policy_published: AggregatePolicyPublished
|
||||
records: List[AggregateRecord]
|
||||
|
||||
|
||||
class EmailAddress(TypedDict):
|
||||
display_name: Optional[str]
|
||||
address: str
|
||||
local: Optional[str]
|
||||
domain: Optional[str]
|
||||
|
||||
|
||||
class EmailAttachment(TypedDict, total=False):
|
||||
filename: Optional[str]
|
||||
mail_content_type: Optional[str]
|
||||
sha256: Optional[str]
|
||||
|
||||
|
||||
ParsedEmail = TypedDict(
|
||||
"ParsedEmail",
|
||||
{
|
||||
# This is a lightly-specified version of mailsuite/mailparser JSON.
|
||||
# It focuses on the fields parsedmarc uses in forensic handling.
|
||||
"headers": Dict[str, Any],
|
||||
"subject": Optional[str],
|
||||
"filename_safe_subject": Optional[str],
|
||||
"date": Optional[str],
|
||||
"from": EmailAddress,
|
||||
"to": List[EmailAddress],
|
||||
"cc": List[EmailAddress],
|
||||
"bcc": List[EmailAddress],
|
||||
"attachments": List[EmailAttachment],
|
||||
"body": Optional[str],
|
||||
"has_defects": bool,
|
||||
"defects": Any,
|
||||
"defects_categories": Any,
|
||||
},
|
||||
total=False,
|
||||
)
|
||||
|
||||
|
||||
class ForensicReport(TypedDict):
|
||||
feedback_type: Optional[str]
|
||||
user_agent: Optional[str]
|
||||
version: Optional[str]
|
||||
original_envelope_id: Optional[str]
|
||||
original_mail_from: Optional[str]
|
||||
original_rcpt_to: Optional[str]
|
||||
arrival_date: str
|
||||
arrival_date_utc: str
|
||||
authentication_results: Optional[str]
|
||||
delivery_result: Optional[str]
|
||||
auth_failure: List[str]
|
||||
authentication_mechanisms: List[str]
|
||||
dkim_domain: Optional[str]
|
||||
reported_domain: str
|
||||
sample_headers_only: bool
|
||||
source: IPSourceInfo
|
||||
sample: str
|
||||
parsed_sample: ParsedEmail
|
||||
|
||||
|
||||
class SMTPTLSFailureDetails(TypedDict):
|
||||
result_type: str
|
||||
failed_session_count: int
|
||||
|
||||
|
||||
class SMTPTLSFailureDetailsOptional(SMTPTLSFailureDetails, total=False):
|
||||
sending_mta_ip: str
|
||||
receiving_ip: str
|
||||
receiving_mx_hostname: str
|
||||
receiving_mx_helo: str
|
||||
additional_info_uri: str
|
||||
failure_reason_code: str
|
||||
ip_address: str
|
||||
|
||||
|
||||
class SMTPTLSPolicySummary(TypedDict):
|
||||
policy_domain: str
|
||||
policy_type: str
|
||||
successful_session_count: int
|
||||
failed_session_count: int
|
||||
|
||||
|
||||
class SMTPTLSPolicy(SMTPTLSPolicySummary, total=False):
|
||||
policy_strings: List[str]
|
||||
mx_host_patterns: List[str]
|
||||
failure_details: List[SMTPTLSFailureDetailsOptional]
|
||||
|
||||
|
||||
class SMTPTLSReport(TypedDict):
|
||||
organization_name: str
|
||||
begin_date: str
|
||||
end_date: str
|
||||
contact_info: Union[str, List[str]]
|
||||
report_id: str
|
||||
policies: List[SMTPTLSPolicy]
|
||||
|
||||
|
||||
class AggregateParsedReport(TypedDict):
|
||||
report_type: Literal["aggregate"]
|
||||
report: AggregateReport
|
||||
|
||||
|
||||
class ForensicParsedReport(TypedDict):
|
||||
report_type: Literal["forensic"]
|
||||
report: ForensicReport
|
||||
|
||||
|
||||
class SMTPTLSParsedReport(TypedDict):
|
||||
report_type: Literal["smtp_tls"]
|
||||
report: SMTPTLSReport
|
||||
|
||||
|
||||
ParsedReport = Union[AggregateParsedReport, ForensicParsedReport, SMTPTLSParsedReport]
|
||||
|
||||
|
||||
class ParsingResults(TypedDict):
|
||||
aggregate_reports: List[AggregateReport]
|
||||
forensic_reports: List[ForensicReport]
|
||||
smtp_tls_reports: List[SMTPTLSReport]
|
||||
@@ -4,25 +4,23 @@
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from typing import Optional, Union, TypedDict, Any
|
||||
|
||||
import logging
|
||||
import os
|
||||
from datetime import datetime
|
||||
from datetime import timezone
|
||||
from datetime import timedelta
|
||||
from expiringdict import ExpiringDict
|
||||
import tempfile
|
||||
import subprocess
|
||||
import shutil
|
||||
import mailparser
|
||||
import json
|
||||
import hashlib
|
||||
import base64
|
||||
import mailbox
|
||||
import re
|
||||
import csv
|
||||
import hashlib
|
||||
import io
|
||||
import json
|
||||
import logging
|
||||
import mailbox
|
||||
import os
|
||||
import re
|
||||
import shutil
|
||||
import subprocess
|
||||
import tempfile
|
||||
from datetime import datetime, timedelta, timezone
|
||||
from typing import Optional, TypedDict, Union, cast
|
||||
|
||||
import mailparser
|
||||
from expiringdict import ExpiringDict
|
||||
|
||||
try:
|
||||
from importlib.resources import files
|
||||
@@ -31,19 +29,19 @@ except ImportError:
|
||||
from importlib.resources import files
|
||||
|
||||
|
||||
from dateutil.parser import parse as parse_date
|
||||
import dns.reversename
|
||||
import dns.resolver
|
||||
import dns.exception
|
||||
import dns.resolver
|
||||
import dns.reversename
|
||||
import geoip2.database
|
||||
import geoip2.errors
|
||||
import publicsuffixlist
|
||||
import requests
|
||||
from dateutil.parser import parse as parse_date
|
||||
|
||||
from parsedmarc.log import logger
|
||||
import parsedmarc.resources.dbip
|
||||
import parsedmarc.resources.maps
|
||||
from parsedmarc.constants import USER_AGENT
|
||||
from parsedmarc.log import logger
|
||||
|
||||
parenthesis_regex = re.compile(r"\s*\(.*\)\s*")
|
||||
|
||||
@@ -66,12 +64,21 @@ class DownloadError(RuntimeError):
|
||||
"""Raised when an error occurs when downloading a file"""
|
||||
|
||||
|
||||
class EmailAddress(TypedDict):
|
||||
"""Parsed email address information"""
|
||||
display_name: Optional[str]
|
||||
address: str
|
||||
local: Optional[str]
|
||||
domain: Optional[str]
|
||||
class ReverseDNSService(TypedDict):
|
||||
name: str
|
||||
type: Optional[str]
|
||||
|
||||
|
||||
ReverseDNSMap = dict[str, ReverseDNSService]
|
||||
|
||||
|
||||
class IPAddressInfo(TypedDict):
|
||||
ip_address: str
|
||||
reverse_dns: Optional[str]
|
||||
country: Optional[str]
|
||||
base_domain: Optional[str]
|
||||
name: Optional[str]
|
||||
type: Optional[str]
|
||||
|
||||
|
||||
def decode_base64(data: str) -> bytes:
|
||||
@@ -85,14 +92,14 @@ def decode_base64(data: str) -> bytes:
|
||||
bytes: The decoded bytes
|
||||
|
||||
"""
|
||||
data = bytes(data, encoding="ascii")
|
||||
missing_padding = len(data) % 4
|
||||
data_bytes = bytes(data, encoding="ascii")
|
||||
missing_padding = len(data_bytes) % 4
|
||||
if missing_padding != 0:
|
||||
data += b"=" * (4 - missing_padding)
|
||||
return base64.b64decode(data)
|
||||
data_bytes += b"=" * (4 - missing_padding)
|
||||
return base64.b64decode(data_bytes)
|
||||
|
||||
|
||||
def get_base_domain(domain: str) -> str:
|
||||
def get_base_domain(domain: str) -> Optional[str]:
|
||||
"""
|
||||
Gets the base domain name for the given domain
|
||||
|
||||
@@ -121,8 +128,8 @@ def query_dns(
|
||||
record_type: str,
|
||||
*,
|
||||
cache: Optional[ExpiringDict] = None,
|
||||
nameservers: list[str] = None,
|
||||
timeout: int = 2.0,
|
||||
nameservers: Optional[list[str]] = None,
|
||||
timeout: float = 2.0,
|
||||
) -> list[str]:
|
||||
"""
|
||||
Queries DNS
|
||||
@@ -142,9 +149,9 @@ def query_dns(
|
||||
record_type = record_type.upper()
|
||||
cache_key = "{0}_{1}".format(domain, record_type)
|
||||
if cache:
|
||||
records = cache.get(cache_key, None)
|
||||
if records:
|
||||
return records
|
||||
cached_records = cache.get(cache_key, None)
|
||||
if isinstance(cached_records, list):
|
||||
return cast(list[str], cached_records)
|
||||
|
||||
resolver = dns.resolver.Resolver()
|
||||
timeout = float(timeout)
|
||||
@@ -158,26 +165,12 @@ def query_dns(
|
||||
resolver.nameservers = nameservers
|
||||
resolver.timeout = timeout
|
||||
resolver.lifetime = timeout
|
||||
if record_type == "TXT":
|
||||
resource_records = list(
|
||||
map(
|
||||
lambda r: r.strings,
|
||||
resolver.resolve(domain, record_type, lifetime=timeout),
|
||||
)
|
||||
)
|
||||
_resource_record = [
|
||||
resource_record[0][:0].join(resource_record)
|
||||
for resource_record in resource_records
|
||||
if resource_record
|
||||
]
|
||||
records = [r.decode() for r in _resource_record]
|
||||
else:
|
||||
records = list(
|
||||
map(
|
||||
lambda r: r.to_text().replace('"', "").rstrip("."),
|
||||
resolver.resolve(domain, record_type, lifetime=timeout),
|
||||
)
|
||||
records = list(
|
||||
map(
|
||||
lambda r: r.to_text().replace('"', "").rstrip("."),
|
||||
resolver.resolve(domain, record_type, lifetime=timeout),
|
||||
)
|
||||
)
|
||||
if cache:
|
||||
cache[cache_key] = records
|
||||
|
||||
@@ -188,9 +181,9 @@ def get_reverse_dns(
|
||||
ip_address,
|
||||
*,
|
||||
cache: Optional[ExpiringDict] = None,
|
||||
nameservers: list[str] = None,
|
||||
timeout: int = 2.0,
|
||||
) -> str:
|
||||
nameservers: Optional[list[str]] = None,
|
||||
timeout: float = 2.0,
|
||||
) -> Optional[str]:
|
||||
"""
|
||||
Resolves an IP address to a hostname using a reverse DNS query
|
||||
|
||||
@@ -208,7 +201,7 @@ def get_reverse_dns(
|
||||
try:
|
||||
address = dns.reversename.from_address(ip_address)
|
||||
hostname = query_dns(
|
||||
address, "PTR", cache=cache, nameservers=nameservers, timeout=timeout
|
||||
str(address), "PTR", cache=cache, nameservers=nameservers, timeout=timeout
|
||||
)[0]
|
||||
|
||||
except dns.exception.DNSException as e:
|
||||
@@ -245,7 +238,7 @@ def timestamp_to_human(timestamp: int) -> str:
|
||||
|
||||
|
||||
def human_timestamp_to_datetime(
|
||||
human_timestamp: str, *, to_utc: Optional[bool] = False
|
||||
human_timestamp: str, *, to_utc: bool = False
|
||||
) -> datetime:
|
||||
"""
|
||||
Converts a human-readable timestamp into a Python ``datetime`` object
|
||||
@@ -276,10 +269,12 @@ def human_timestamp_to_unix_timestamp(human_timestamp: str) -> int:
|
||||
float: The converted timestamp
|
||||
"""
|
||||
human_timestamp = human_timestamp.replace("T", " ")
|
||||
return human_timestamp_to_datetime(human_timestamp).timestamp()
|
||||
return int(human_timestamp_to_datetime(human_timestamp).timestamp())
|
||||
|
||||
|
||||
def get_ip_address_country(ip_address: str, *, db_path: Optional[str] = None) -> str:
|
||||
def get_ip_address_country(
|
||||
ip_address: str, *, db_path: Optional[str] = None
|
||||
) -> Optional[str]:
|
||||
"""
|
||||
Returns the ISO code for the country associated
|
||||
with the given IPv4 or IPv6 address
|
||||
@@ -342,14 +337,14 @@ def get_ip_address_country(ip_address: str, *, db_path: Optional[str] = None) ->
|
||||
|
||||
|
||||
def get_service_from_reverse_dns_base_domain(
|
||||
base_domain: str,
|
||||
base_domain,
|
||||
*,
|
||||
always_use_local_file: Optional[bool] = False,
|
||||
always_use_local_file: bool = False,
|
||||
local_file_path: Optional[str] = None,
|
||||
url: Optional[str] = None,
|
||||
offline: Optional[bool] = False,
|
||||
reverse_dns_map: Optional[dict[str, Any]] = None,
|
||||
) -> dict[str, Any]:
|
||||
offline: bool = False,
|
||||
reverse_dns_map: Optional[ReverseDNSMap] = None,
|
||||
) -> ReverseDNSService:
|
||||
"""
|
||||
Returns the service name of a given base domain name from reverse DNS.
|
||||
|
||||
@@ -366,12 +361,6 @@ def get_service_from_reverse_dns_base_domain(
|
||||
the supplied reverse_dns_base_domain and the type will be None
|
||||
"""
|
||||
|
||||
def load_csv(_csv_file):
|
||||
reader = csv.DictReader(_csv_file)
|
||||
for row in reader:
|
||||
key = row["base_reverse_dns"].lower().strip()
|
||||
reverse_dns_map[key] = dict(name=row["name"], type=row["type"])
|
||||
|
||||
base_domain = base_domain.lower().strip()
|
||||
if url is None:
|
||||
url = (
|
||||
@@ -379,11 +368,24 @@ def get_service_from_reverse_dns_base_domain(
|
||||
"/parsedmarc/master/parsedmarc/"
|
||||
"resources/maps/base_reverse_dns_map.csv"
|
||||
)
|
||||
reverse_dns_map_value: ReverseDNSMap
|
||||
if reverse_dns_map is None:
|
||||
reverse_dns_map = dict()
|
||||
reverse_dns_map_value = {}
|
||||
else:
|
||||
reverse_dns_map_value = reverse_dns_map
|
||||
|
||||
def load_csv(_csv_file):
|
||||
reader = csv.DictReader(_csv_file)
|
||||
for row in reader:
|
||||
key = row["base_reverse_dns"].lower().strip()
|
||||
reverse_dns_map_value[key] = {
|
||||
"name": row["name"],
|
||||
"type": row["type"],
|
||||
}
|
||||
|
||||
csv_file = io.StringIO()
|
||||
|
||||
if not (offline or always_use_local_file) and len(reverse_dns_map) == 0:
|
||||
if not (offline or always_use_local_file) and len(reverse_dns_map_value) == 0:
|
||||
try:
|
||||
logger.debug(f"Trying to fetch reverse DNS map from {url}...")
|
||||
headers = {"User-Agent": USER_AGENT}
|
||||
@@ -400,7 +402,7 @@ def get_service_from_reverse_dns_base_domain(
|
||||
logging.debug("Response body:")
|
||||
logger.debug(csv_file.read())
|
||||
|
||||
if len(reverse_dns_map) == 0:
|
||||
if len(reverse_dns_map_value) == 0:
|
||||
logger.info("Loading included reverse DNS map...")
|
||||
path = str(
|
||||
files(parsedmarc.resources.maps).joinpath("base_reverse_dns_map.csv")
|
||||
@@ -409,27 +411,28 @@ def get_service_from_reverse_dns_base_domain(
|
||||
path = local_file_path
|
||||
with open(path) as csv_file:
|
||||
load_csv(csv_file)
|
||||
service: ReverseDNSService
|
||||
try:
|
||||
service = reverse_dns_map[base_domain]
|
||||
service = reverse_dns_map_value[base_domain]
|
||||
except KeyError:
|
||||
service = dict(name=base_domain, type=None)
|
||||
service = {"name": base_domain, "type": None}
|
||||
|
||||
return service
|
||||
|
||||
|
||||
def get_ip_address_info(
|
||||
ip_address: str,
|
||||
ip_address,
|
||||
*,
|
||||
ip_db_path: Optional[str] = None,
|
||||
reverse_dns_map_path: Optional[str] = None,
|
||||
always_use_local_files: Optional[bool] = False,
|
||||
always_use_local_files: bool = False,
|
||||
reverse_dns_map_url: Optional[str] = None,
|
||||
cache: Optional[ExpiringDict] = None,
|
||||
reverse_dns_map: Optional[dict[str, Any]] = None,
|
||||
offline: Optional[bool] = False,
|
||||
reverse_dns_map: Optional[ReverseDNSMap] = None,
|
||||
offline: bool = False,
|
||||
nameservers: Optional[list[str]] = None,
|
||||
timeout: Optional[float] = 2.0,
|
||||
) -> dict[str, Any]:
|
||||
timeout: float = 2.0,
|
||||
) -> IPAddressInfo:
|
||||
"""
|
||||
Returns reverse DNS and country information for the given IP address
|
||||
|
||||
@@ -452,12 +455,22 @@ def get_ip_address_info(
|
||||
"""
|
||||
ip_address = ip_address.lower()
|
||||
if cache is not None:
|
||||
info = cache.get(ip_address, None)
|
||||
if info:
|
||||
cached_info = cache.get(ip_address, None)
|
||||
if (
|
||||
cached_info
|
||||
and isinstance(cached_info, dict)
|
||||
and "ip_address" in cached_info
|
||||
):
|
||||
logger.debug(f"IP address {ip_address} was found in cache")
|
||||
return info
|
||||
info = dict()
|
||||
info["ip_address"] = ip_address
|
||||
return cast(IPAddressInfo, cached_info)
|
||||
info: IPAddressInfo = {
|
||||
"ip_address": ip_address,
|
||||
"reverse_dns": None,
|
||||
"country": None,
|
||||
"base_domain": None,
|
||||
"name": None,
|
||||
"type": None,
|
||||
}
|
||||
if offline:
|
||||
reverse_dns = None
|
||||
else:
|
||||
@@ -467,9 +480,6 @@ def get_ip_address_info(
|
||||
country = get_ip_address_country(ip_address, db_path=ip_db_path)
|
||||
info["country"] = country
|
||||
info["reverse_dns"] = reverse_dns
|
||||
info["base_domain"] = None
|
||||
info["name"] = None
|
||||
info["type"] = None
|
||||
if reverse_dns is not None:
|
||||
base_domain = get_base_domain(reverse_dns)
|
||||
if base_domain is not None:
|
||||
@@ -494,7 +504,7 @@ def get_ip_address_info(
|
||||
return info
|
||||
|
||||
|
||||
def parse_email_address(original_address: str) -> EmailAddress:
|
||||
def parse_email_address(original_address: str) -> dict[str, Optional[str]]:
|
||||
if original_address[0] == "":
|
||||
display_name = None
|
||||
else:
|
||||
@@ -558,7 +568,7 @@ def is_mbox(path: str) -> bool:
|
||||
return _is_mbox
|
||||
|
||||
|
||||
def is_outlook_msg(content: Union[bytes, Any]) -> bool:
|
||||
def is_outlook_msg(content) -> bool:
|
||||
"""
|
||||
Checks if the given content is an Outlook msg OLE/MSG file
|
||||
|
||||
@@ -573,7 +583,7 @@ def is_outlook_msg(content: Union[bytes, Any]) -> bool:
|
||||
)
|
||||
|
||||
|
||||
def convert_outlook_msg(msg_bytes: bytes) -> str:
|
||||
def convert_outlook_msg(msg_bytes: bytes) -> bytes:
|
||||
"""
|
||||
Uses the ``msgconvert`` Perl utility to convert an Outlook MS file to
|
||||
standard RFC 822 format
|
||||
@@ -582,7 +592,7 @@ def convert_outlook_msg(msg_bytes: bytes) -> str:
|
||||
msg_bytes (bytes): the content of the .msg file
|
||||
|
||||
Returns:
|
||||
A RFC 822 string
|
||||
A RFC 822 bytes payload
|
||||
"""
|
||||
if not is_outlook_msg(msg_bytes):
|
||||
raise ValueError("The supplied bytes are not an Outlook MSG file")
|
||||
@@ -591,14 +601,13 @@ def convert_outlook_msg(msg_bytes: bytes) -> str:
|
||||
os.chdir(tmp_dir)
|
||||
with open("sample.msg", "wb") as msg_file:
|
||||
msg_file.write(msg_bytes)
|
||||
rfc822_bytes: bytes
|
||||
try:
|
||||
subprocess.check_call(
|
||||
["msgconvert", "sample.msg"], stdout=null_file, stderr=null_file
|
||||
)
|
||||
eml_path = "sample.eml"
|
||||
with open(eml_path, "rb") as eml_file:
|
||||
rfc822_bytes = eml_file.read()
|
||||
rfc822 = eml_file.read()
|
||||
except FileNotFoundError:
|
||||
raise EmailParserError(
|
||||
"Failed to convert Outlook MSG: msgconvert utility not found"
|
||||
@@ -607,12 +616,12 @@ def convert_outlook_msg(msg_bytes: bytes) -> str:
|
||||
os.chdir(orig_dir)
|
||||
shutil.rmtree(tmp_dir)
|
||||
|
||||
return rfc822_bytes.decode("utf-8", errors="replace")
|
||||
return rfc822
|
||||
|
||||
|
||||
def parse_email(
|
||||
data: Union[bytes, str], *, strip_attachment_payloads: Optional[bool] = False
|
||||
) -> dict[str, Any]:
|
||||
data: Union[bytes, str], *, strip_attachment_payloads: bool = False
|
||||
) -> dict:
|
||||
"""
|
||||
A simplified email parser
|
||||
|
||||
@@ -627,8 +636,7 @@ def parse_email(
|
||||
if isinstance(data, bytes):
|
||||
if is_outlook_msg(data):
|
||||
data = convert_outlook_msg(data)
|
||||
else:
|
||||
data = data.decode("utf-8", errors="replace")
|
||||
data = data.decode("utf-8", errors="replace")
|
||||
parsed_email = mailparser.parse_from_string(data)
|
||||
headers = json.loads(parsed_email.headers_json).copy()
|
||||
parsed_email = json.loads(parsed_email.mail_json).copy()
|
||||
|
||||
@@ -29,7 +29,7 @@ classifiers = [
|
||||
"Operating System :: OS Independent",
|
||||
"Programming Language :: Python :: 3"
|
||||
]
|
||||
requires-python = ">=3.9, <3.14"
|
||||
requires-python = ">=3.9"
|
||||
dependencies = [
|
||||
"azure-identity>=1.8.0",
|
||||
"azure-monitor-ingestion>=1.0.0",
|
||||
@@ -48,7 +48,7 @@ dependencies = [
|
||||
"imapclient>=2.1.0",
|
||||
"kafka-python-ng>=2.2.2",
|
||||
"lxml>=4.4.0",
|
||||
"mailsuite>=1.9.18",
|
||||
"mailsuite>=1.11.2",
|
||||
"msgraph-core==0.2.2",
|
||||
"opensearch-py>=2.4.2,<=3.0.0",
|
||||
"publicsuffixlist>=0.10.0",
|
||||
|
||||
19
tests.py
Normal file → Executable file
19
tests.py
Normal file → Executable file
@@ -12,6 +12,9 @@ from lxml import etree
|
||||
import parsedmarc
|
||||
import parsedmarc.utils
|
||||
|
||||
# Detect if running in GitHub Actions to skip DNS lookups
|
||||
OFFLINE_MODE = os.environ.get("GITHUB_ACTIONS", "false").lower() == "true"
|
||||
|
||||
|
||||
def minify_xml(xml_string):
|
||||
parser = etree.XMLParser(remove_blank_text=True)
|
||||
@@ -121,7 +124,7 @@ class Test(unittest.TestCase):
|
||||
continue
|
||||
print("Testing {0}: ".format(sample_path), end="")
|
||||
parsed_report = parsedmarc.parse_report_file(
|
||||
sample_path, always_use_local_files=True
|
||||
sample_path, always_use_local_files=True, offline=OFFLINE_MODE
|
||||
)["report"]
|
||||
parsedmarc.parsed_aggregate_reports_to_csv(parsed_report)
|
||||
print("Passed!")
|
||||
@@ -129,7 +132,7 @@ class Test(unittest.TestCase):
|
||||
def testEmptySample(self):
|
||||
"""Test empty/unparasable report"""
|
||||
with self.assertRaises(parsedmarc.ParserError):
|
||||
parsedmarc.parse_report_file("samples/empty.xml")
|
||||
parsedmarc.parse_report_file("samples/empty.xml", offline=OFFLINE_MODE)
|
||||
|
||||
def testForensicSamples(self):
|
||||
"""Test sample forensic/ruf/failure DMARC reports"""
|
||||
@@ -139,8 +142,12 @@ class Test(unittest.TestCase):
|
||||
print("Testing {0}: ".format(sample_path), end="")
|
||||
with open(sample_path) as sample_file:
|
||||
sample_content = sample_file.read()
|
||||
parsed_report = parsedmarc.parse_report_email(sample_content)["report"]
|
||||
parsed_report = parsedmarc.parse_report_file(sample_path)["report"]
|
||||
parsed_report = parsedmarc.parse_report_email(
|
||||
sample_content, offline=OFFLINE_MODE
|
||||
)["report"]
|
||||
parsed_report = parsedmarc.parse_report_file(
|
||||
sample_path, offline=OFFLINE_MODE
|
||||
)["report"]
|
||||
parsedmarc.parsed_forensic_reports_to_csv(parsed_report)
|
||||
print("Passed!")
|
||||
|
||||
@@ -152,7 +159,9 @@ class Test(unittest.TestCase):
|
||||
if os.path.isdir(sample_path):
|
||||
continue
|
||||
print("Testing {0}: ".format(sample_path), end="")
|
||||
parsed_report = parsedmarc.parse_report_file(sample_path)["report"]
|
||||
parsed_report = parsedmarc.parse_report_file(
|
||||
sample_path, offline=OFFLINE_MODE
|
||||
)["report"]
|
||||
parsedmarc.parsed_smtp_tls_reports_to_csv(parsed_report)
|
||||
print("Passed!")
|
||||
|
||||
|
||||
Reference in New Issue
Block a user