Compare commits

...

11 Commits
9.4.0 ... 9.5.5

Author SHA1 Message Date
Sean Whalen
6a13f38ac6 Enhance debug logging for output client initialization and add environment variable aliases for debug settings 2026-03-27 10:31:43 -04:00
Sean Whalen
33ab4d9de9 Update CHANGELOG.md to include fix for current_time format in MSGraphConnection 2026-03-27 10:11:12 -04:00
Sean Whalen
f49ca0863d Bump version to 9.5.5, implement exponential backoff for output client initialization, update http_auth format, and add debug logging for OpenSearch connections 2026-03-27 10:09:08 -04:00
mihugo
e1851d026a Fix current_time format for MSGraphConnection (#708)
Should have caught this on previous fix for since. the current time is used on line 2145: connection.fetch_messages(reports_folder, since=current_time)
if that code is called and it usually won't be depending upon configuration it will fail  with the time format being wrong: yyyy-mm-ddThh:mm:ss.zzzzzz+00:00Z  ---     this removes the extra "Z" that is not needed since utc offset is already specified and becomes invalid.
2026-03-26 13:04:27 -04:00
Sean Whalen
1542936468 Bump version to 9.5.4, enhance Maildir folder handling, and add config key aliases for environment variable compatibility 2026-03-25 23:22:46 -04:00
Sean Whalen
fb3c38a8b8 9.5.3
- Fixed `FileNotFoundError` when using Maildir with Docker volume mounts. Python's `mailbox.Maildir(create=True)` only creates `cur/new/tmp` subdirectories when the top-level directory doesn't exist; Docker volume mounts pre-create the directory as empty, skipping subdirectory creation. parsedmarc now explicitly creates the subdirectories when `maildir_create` is enabled.
- Maildir UID mismatch no longer crashes the process. In Docker containers where volume ownership differs from the container UID, parsedmarc now logs a warning instead of raising an exception. Also handles `os.setuid` failures gracefully in containers without `CAP_SETUID`.
- Token file writes (MS Graph and Gmail) now create parent directories automatically, preventing `FileNotFoundError` when the token path points to a directory that doesn't yet exist.
- File paths from config (`token_file`, `credentials_file`, `cert_path`, `log_file`, `output`, `ip_db_path`, `maildir_path`, syslog cert paths, etc.) now expand `~` and `$VAR` references via `os.path.expanduser`/`os.path
2026-03-25 21:29:08 -04:00
Sean Whalen
c9a6145505 9.5.3
- Fixed `FileNotFoundError` when using Maildir with Docker volume mounts. Python's `mailbox.Maildir(create=True)` only creates `cur/new/tmp` subdirectories when the top-level directory doesn't exist; Docker volume mounts pre-create the directory as empty, skipping subdirectory creation. parsedmarc now explicitly creates the subdirectories when `maildir_create` is enabled.
2026-03-25 21:13:34 -04:00
Sean Whalen
e1bdbeb257 Bump version to 9.5.2 and fix interpolation issues in config parser 2026-03-25 20:21:08 -04:00
Sean Whalen
12c4676b79 9.5.1
- Correct ISO format for MSGraphConnection timestamps (PR #706)
2026-03-25 19:43:24 -04:00
mihugo
cda039ee27 Correct ISO format for MSGraphConnection timestamps (#706)
Fix formatting of ISO 8601 date strings for MSGraphConnection.  format yyyy-dd-mmThh:MM:SS.zzzzzz+00:00 already has a timezone indicated. The extra Z is invalid in this format.  specifying a "since" in config file causes msgraph to error due to invalid time stamp.
2026-03-25 19:38:23 -04:00
Sean Whalen
ff0ca6538c 9.5.0
Add environment variable configuration support and update documentation

- Introduced support for configuration via environment variables using the `PARSEDMARC_{SECTION}_{KEY}` format.
- Added `PARSEDMARC_CONFIG_FILE` variable to specify the config file path.
- Enabled env-only mode for file-less Docker deployments.
- Implemented explicit read permission checks on config files.
- Updated changelog and usage documentation to reflect these changes.
2026-03-25 19:25:21 -04:00
13 changed files with 986 additions and 77 deletions

View File

@@ -26,11 +26,13 @@
"boto",
"brakhane",
"Brightmail",
"cafile",
"CEST",
"CHACHA",
"checkdmarc",
"Codecov",
"confnew",
"creds",
"dateparser",
"dateutil",
"Davmail",
@@ -52,6 +54,7 @@
"geoipupdate",
"Geolite",
"geolocation",
"getuid",
"githubpages",
"Grafana",
"hostnames",
@@ -75,6 +78,7 @@
"LISTSERV",
"loganalytics",
"lxml",
"Maildir",
"mailparser",
"mailrelay",
"mailsuite",
@@ -128,6 +132,7 @@
"sdist",
"Servernameone",
"setuptools",
"signum",
"smartquotes",
"SMTPTLS",
"sortlists",

View File

@@ -42,7 +42,7 @@ To skip DNS lookups during testing, set `GITHUB_ACTIONS=true`.
### Key modules
- `parsedmarc/__init__.py` — Core parsing logic. Main functions: `parse_report_file()`, `parse_report_email()`, `parse_aggregate_report_xml()`, `parse_forensic_report()`, `parse_smtp_tls_report_json()`, `get_dmarc_reports_from_mailbox()`, `watch_inbox()`
- `parsedmarc/cli.py` — CLI entry point (`_main`), config file parsing, output orchestration
- `parsedmarc/cli.py` — CLI entry point (`_main`), config file parsing (`_load_config` + `_parse_config`), output orchestration. Supports configuration via INI files, `PARSEDMARC_{SECTION}_{KEY}` environment variables, or both (env vars override file values).
- `parsedmarc/types.py` — TypedDict definitions for all report types (`AggregateReport`, `ForensicReport`, `SMTPTLSReport`, `ParsingResults`)
- `parsedmarc/utils.py` — IP/DNS/GeoIP enrichment, base64 decoding, compression handling
- `parsedmarc/mail/` — Polymorphic mail connections: `IMAPConnection`, `GmailConnection`, `MSGraphConnection`, `MaildirConnection`
@@ -52,6 +52,10 @@ To skip DNS lookups during testing, set `GITHUB_ACTIONS=true`.
`ReportType = Literal["aggregate", "forensic", "smtp_tls"]`. Exception hierarchy: `ParserError``InvalidDMARCReport``InvalidAggregateReport`/`InvalidForensicReport`, and `InvalidSMTPTLSReport`.
### Configuration
Config priority: CLI args > env vars > config file > defaults. Env var naming: `PARSEDMARC_{SECTION}_{KEY}` (e.g. `PARSEDMARC_IMAP_PASSWORD`). Section names with underscores use longest-prefix matching (`PARSEDMARC_SPLUNK_HEC_TOKEN``[splunk_hec] token`). Some INI keys have short aliases for env var friendliness (e.g. `[maildir] create` for `maildir_create`). File path values are expanded via `os.path.expanduser`/`os.path.expandvars`. Config can be loaded purely from env vars with no file (`PARSEDMARC_CONFIG_FILE` sets the file path).
### Caching
IP address info cached for 4 hours, seen aggregate report IDs cached for 1 hour (via `ExpiringDict`).
@@ -62,3 +66,6 @@ IP address info cached for 4 hours, seen aggregate report IDs cached for 1 hour
- TypedDict for structured data, type hints throughout
- Python ≥3.10 required
- Tests are in a single `tests.py` file using unittest; sample reports live in `samples/`
- File path config values must be wrapped with `_expand_path()` in `cli.py`
- Maildir UID checks are intentionally relaxed (warn, don't crash) for Docker compatibility
- Token file writes must create parent directories before opening for write

View File

@@ -1,5 +1,55 @@
# Changelog
## 9.5.5
### Fixed
- Output client initialization now retries up to 4 times with exponential backoff before exiting. This fixes persistent `Connection refused` errors in Docker when OpenSearch or Elasticsearch is momentarily unavailable at startup.
- Use tuple format for `http_auth` in OpenSearch and Elasticsearch connections, matching the documented convention and avoiding potential issues if the password contains a colon.
- Fix current_time format for MSGraphConnection (current-time) (PR #708)
### Changes
- Added debug logging to all output client initialization (S3, syslog, Splunk HEC, Kafka, GELF, webhook, Elasticsearch, OpenSearch).
- `DEBUG=true` and `PARSEDMARC_DEBUG=true` are now accepted as short aliases for `PARSEDMARC_GENERAL_DEBUG=true`.
## 9.5.4
### Fixed
- Maildir `fetch_messages` now respects the `reports_folder` argument. Previously it always read from the top-level Maildir, ignoring the configured reports folder. `fetch_message`, `delete_message`, and `move_message` now also operate on the correct active folder.
- Config key aliases for env var compatibility: `[maildir] create` and `path` are now accepted as aliases for `maildir_create` and `maildir_path`, and `[msgraph] url` for `graph_url`. This allows natural env var names like `PARSEDMARC_MAILDIR_CREATE` to work without the redundant `PARSEDMARC_MAILDIR_MAILDIR_CREATE`.
## 9.5.3
### Fixed
- Fixed `FileNotFoundError` when using Maildir with Docker volume mounts. Python's `mailbox.Maildir(create=True)` only creates `cur/new/tmp` subdirectories when the top-level directory doesn't exist; Docker volume mounts pre-create the directory as empty, skipping subdirectory creation. parsedmarc now explicitly creates the subdirectories when `maildir_create` is enabled.
- Maildir UID mismatch no longer crashes the process. In Docker containers where volume ownership differs from the container UID, parsedmarc now logs a warning instead of raising an exception. Also handles `os.setuid` failures gracefully in containers without `CAP_SETUID`.
- Token file writes (MS Graph and Gmail) now create parent directories automatically, preventing `FileNotFoundError` when the token path points to a directory that doesn't yet exist.
- File paths from config (`token_file`, `credentials_file`, `cert_path`, `log_file`, `output`, `ip_db_path`, `maildir_path`, syslog cert paths, etc.) now expand `~` and `$VAR` references via `os.path.expanduser`/`os.path.expandvars`.
## 9.5.2
### Fixed
- Fixed `ValueError: invalid interpolation syntax` when config values (from env vars or INI files) contain `%` characters, such as in passwords. Disabled ConfigParser's `%`-based string interpolation.
## 9.5.1
### Changes
- Correct ISO format for MSGraphConnection timestamps (PR #706)
## 9.5.0
### Added
- Environment variable configuration support: any config option can now be set via `PARSEDMARC_{SECTION}_{KEY}` environment variables (e.g. `PARSEDMARC_IMAP_PASSWORD`, `PARSEDMARC_SPLUNK_HEC_TOKEN`). Environment variables override config file values but are overridden by CLI arguments.
- `PARSEDMARC_CONFIG_FILE` environment variable to specify the config file path without the `-c` flag.
- Env-only mode: parsedmarc can now run without a config file when `PARSEDMARC_*` environment variables are set, enabling fully file-less Docker deployments.
- Explicit read permission check on config file, giving a clear error message when the container UID cannot read the file (e.g. `chmod 600` with a UID mismatch).
## 9.4.0
### Added

View File

@@ -531,6 +531,96 @@ PUT _cluster/settings
Increasing this value increases resource usage.
:::
## Environment variable configuration
Any configuration option can be set via environment variables using the
naming convention `PARSEDMARC_{SECTION}_{KEY}` (uppercase). This is
especially useful for Docker deployments where file permissions make it
difficult to use config files for secrets.
**Priority order:** CLI arguments > environment variables > config file > defaults
### Examples
```bash
# Set IMAP credentials via env vars
export PARSEDMARC_IMAP_HOST=imap.example.com
export PARSEDMARC_IMAP_USER=dmarc@example.com
export PARSEDMARC_IMAP_PASSWORD=secret
# Elasticsearch
export PARSEDMARC_ELASTICSEARCH_HOSTS=http://localhost:9200
export PARSEDMARC_ELASTICSEARCH_SSL=false
# Splunk HEC (note: section name splunk_hec becomes SPLUNK_HEC)
export PARSEDMARC_SPLUNK_HEC_URL=https://splunk.example.com
export PARSEDMARC_SPLUNK_HEC_TOKEN=my-hec-token
export PARSEDMARC_SPLUNK_HEC_INDEX=email
# General settings
export PARSEDMARC_GENERAL_SAVE_AGGREGATE=true
export PARSEDMARC_GENERAL_DEBUG=true
```
### Specifying the config file via environment variable
```bash
export PARSEDMARC_CONFIG_FILE=/etc/parsedmarc.ini
parsedmarc
```
### Running without a config file (env-only mode)
When no config file is given (neither `-c` flag nor `PARSEDMARC_CONFIG_FILE`),
parsedmarc will still pick up any `PARSEDMARC_*` environment variables. This
enables fully file-less deployments:
```bash
export PARSEDMARC_GENERAL_SAVE_AGGREGATE=true
export PARSEDMARC_GENERAL_OFFLINE=true
export PARSEDMARC_ELASTICSEARCH_HOSTS=http://elasticsearch:9200
parsedmarc /path/to/reports/*
```
### Docker Compose example
```yaml
services:
parsedmarc:
image: parsedmarc:latest
environment:
PARSEDMARC_IMAP_HOST: imap.example.com
PARSEDMARC_IMAP_USER: dmarc@example.com
PARSEDMARC_IMAP_PASSWORD: ${IMAP_PASSWORD}
PARSEDMARC_MAILBOX_WATCH: "true"
PARSEDMARC_ELASTICSEARCH_HOSTS: http://elasticsearch:9200
PARSEDMARC_GENERAL_SAVE_AGGREGATE: "true"
PARSEDMARC_GENERAL_SAVE_FORENSIC: "true"
```
### Section name mapping
For sections with underscores in the name, the full section name is used:
| Section | Env var prefix |
|------------------|-------------------------------|
| `general` | `PARSEDMARC_GENERAL_` |
| `mailbox` | `PARSEDMARC_MAILBOX_` |
| `imap` | `PARSEDMARC_IMAP_` |
| `msgraph` | `PARSEDMARC_MSGRAPH_` |
| `elasticsearch` | `PARSEDMARC_ELASTICSEARCH_` |
| `opensearch` | `PARSEDMARC_OPENSEARCH_` |
| `splunk_hec` | `PARSEDMARC_SPLUNK_HEC_` |
| `kafka` | `PARSEDMARC_KAFKA_` |
| `smtp` | `PARSEDMARC_SMTP_` |
| `s3` | `PARSEDMARC_S3_` |
| `syslog` | `PARSEDMARC_SYSLOG_` |
| `gmail_api` | `PARSEDMARC_GMAIL_API_` |
| `maildir` | `PARSEDMARC_MAILDIR_` |
| `log_analytics` | `PARSEDMARC_LOG_ANALYTICS_` |
| `gelf` | `PARSEDMARC_GELF_` |
| `webhook` | `PARSEDMARC_WEBHOOK_` |
## Performance tuning
For large mailbox imports or backfills, parsedmarc can consume a noticeable amount

View File

@@ -1955,10 +1955,8 @@ def get_dmarc_reports_from_mailbox(
)
current_time = datetime.now(timezone.utc).strftime("%d-%b-%Y")
elif isinstance(connection, MSGraphConnection):
since = (
datetime.now(timezone.utc) - timedelta(minutes=_since)
).isoformat() + "Z"
current_time = datetime.now(timezone.utc).isoformat() + "Z"
since = (datetime.now(timezone.utc) - timedelta(minutes=_since)).isoformat()
current_time = datetime.now(timezone.utc).isoformat()
elif isinstance(connection, GmailConnection):
since = (datetime.now(timezone.utc) - timedelta(minutes=_since)).strftime(
"%s"

View File

@@ -9,6 +9,7 @@ import logging
import os
import signal
import sys
import time
from argparse import ArgumentParser, Namespace
from configparser import ConfigParser
from glob import glob
@@ -75,6 +76,92 @@ def _str_to_list(s):
return list(map(lambda i: i.lstrip(), _list))
def _expand_path(p: str) -> str:
"""Expand ``~`` and ``$VAR`` references in a file path."""
return os.path.expanduser(os.path.expandvars(p))
# All known INI config section names, used for env var resolution.
_KNOWN_SECTIONS = frozenset(
{
"general",
"mailbox",
"imap",
"msgraph",
"elasticsearch",
"opensearch",
"splunk_hec",
"kafka",
"smtp",
"s3",
"syslog",
"gmail_api",
"maildir",
"log_analytics",
"gelf",
"webhook",
}
)
def _resolve_section_key(suffix: str) -> tuple:
"""Resolve an env var suffix like ``IMAP_PASSWORD`` to ``('imap', 'password')``.
Uses longest-prefix matching against known section names so that
multi-word sections like ``splunk_hec`` are handled correctly.
Returns ``(None, None)`` when no known section matches.
"""
suffix_lower = suffix.lower()
best_section = None
best_key = None
for section in _KNOWN_SECTIONS:
section_prefix = section + "_"
if suffix_lower.startswith(section_prefix):
key = suffix_lower[len(section_prefix) :]
if key and (best_section is None or len(section) > len(best_section)):
best_section = section
best_key = key
return best_section, best_key
def _apply_env_overrides(config: ConfigParser) -> None:
"""Inject ``PARSEDMARC_*`` environment variables into *config*.
Environment variables matching ``PARSEDMARC_{SECTION}_{KEY}`` override
(or create) the corresponding config-file values. Sections are created
automatically when they do not yet exist.
"""
prefix = "PARSEDMARC_"
# Short aliases that don't follow the PARSEDMARC_{SECTION}_{KEY} pattern.
_ENV_ALIASES = {
"DEBUG": ("general", "debug"),
"PARSEDMARC_DEBUG": ("general", "debug"),
}
for env_key, env_value in os.environ.items():
if env_key in _ENV_ALIASES:
section, key = _ENV_ALIASES[env_key]
elif env_key.startswith(prefix) and env_key != "PARSEDMARC_CONFIG_FILE":
suffix = env_key[len(prefix) :]
section, key = _resolve_section_key(suffix)
else:
continue
if section is None:
logger.debug("Ignoring unrecognized env var: %s", env_key)
continue
if not config.has_section(section):
config.add_section(section)
config.set(section, key, env_value)
logger.debug("Config override from env: [%s] %s", section, key)
def _configure_logging(log_level, log_file=None):
"""
Configure logging for the current process.
@@ -178,12 +265,39 @@ class ConfigurationError(Exception):
pass
def _parse_config_file(config_file, opts):
"""Parse a config file and update opts in place.
def _load_config(config_file: str | None = None) -> ConfigParser:
"""Load configuration from an INI file and/or environment variables.
Args:
config_file: Path to the .ini config file
opts: Namespace object to update with parsed values
config_file: Optional path to an .ini config file.
Returns:
A ``ConfigParser`` populated from the file (if given) and from any
``PARSEDMARC_*`` environment variables.
Raises:
ConfigurationError: If *config_file* is given but does not exist.
"""
config = ConfigParser(interpolation=None)
if config_file is not None:
abs_path = os.path.abspath(config_file)
if not os.path.exists(abs_path):
raise ConfigurationError("A file does not exist at {0}".format(abs_path))
if not os.access(abs_path, os.R_OK):
raise ConfigurationError(
"Unable to read {0} — check file permissions".format(abs_path)
)
config.read(config_file)
_apply_env_overrides(config)
return config
def _parse_config(config: ConfigParser, opts):
"""Apply a loaded ``ConfigParser`` to *opts* in place.
Args:
config: A ``ConfigParser`` (from ``_load_config``).
opts: Namespace object to update with parsed values.
Returns:
index_prefix_domain_map or None
@@ -191,13 +305,8 @@ def _parse_config_file(config_file, opts):
Raises:
ConfigurationError: If required settings are missing or invalid.
"""
abs_path = os.path.abspath(config_file)
if not os.path.exists(abs_path):
raise ConfigurationError("A file does not exist at {0}".format(abs_path))
opts.silent = True
config = ConfigParser()
index_prefix_domain_map = None
config.read(config_file)
if "general" in config.sections():
general_config = config["general"]
if "silent" in general_config:
@@ -207,7 +316,7 @@ def _parse_config_file(config_file, opts):
"normalize_timespan_threshold_hours"
)
if "index_prefix_domain_map" in general_config:
with open(general_config["index_prefix_domain_map"]) as f:
with open(_expand_path(general_config["index_prefix_domain_map"])) as f:
index_prefix_domain_map = yaml.safe_load(f)
if "offline" in general_config:
opts.offline = bool(general_config.getboolean("offline"))
@@ -216,7 +325,7 @@ def _parse_config_file(config_file, opts):
general_config.getboolean("strip_attachment_payloads")
)
if "output" in general_config:
opts.output = general_config["output"]
opts.output = _expand_path(general_config["output"])
if "aggregate_json_filename" in general_config:
opts.aggregate_json_filename = general_config["aggregate_json_filename"]
if "forensic_json_filename" in general_config:
@@ -272,11 +381,11 @@ def _parse_config_file(config_file, opts):
general_config.getboolean("fail_on_output_error")
)
if "log_file" in general_config:
opts.log_file = general_config["log_file"]
opts.log_file = _expand_path(general_config["log_file"])
if "n_procs" in general_config:
opts.n_procs = general_config.getint("n_procs")
if "ip_db_path" in general_config:
opts.ip_db_path = general_config["ip_db_path"]
opts.ip_db_path = _expand_path(general_config["ip_db_path"])
else:
opts.ip_db_path = None
if "always_use_local_files" in general_config:
@@ -284,7 +393,9 @@ def _parse_config_file(config_file, opts):
general_config.getboolean("always_use_local_files")
)
if "local_reverse_dns_map_path" in general_config:
opts.reverse_dns_map_path = general_config["local_reverse_dns_map_path"]
opts.reverse_dns_map_path = _expand_path(
general_config["local_reverse_dns_map_path"]
)
if "reverse_dns_map_url" in general_config:
opts.reverse_dns_map_url = general_config["reverse_dns_map_url"]
if "prettify_json" in general_config:
@@ -399,7 +510,7 @@ def _parse_config_file(config_file, opts):
if "msgraph" in config.sections():
graph_config = config["msgraph"]
opts.graph_token_file = graph_config.get("token_file", ".token")
opts.graph_token_file = _expand_path(graph_config.get("token_file", ".token"))
if "auth_method" not in graph_config:
logger.info(
@@ -453,7 +564,9 @@ def _parse_config_file(config_file, opts):
if opts.graph_auth_method == AuthMethod.Certificate.name:
if "certificate_path" in graph_config:
opts.graph_certificate_path = graph_config["certificate_path"]
opts.graph_certificate_path = _expand_path(
graph_config["certificate_path"]
)
else:
raise ConfigurationError(
"certificate_path setting missing from the msgraph config section"
@@ -477,6 +590,8 @@ def _parse_config_file(config_file, opts):
if "graph_url" in graph_config:
opts.graph_url = graph_config["graph_url"]
elif "url" in graph_config:
opts.graph_url = graph_config["url"]
if "allow_unencrypted_storage" in graph_config:
opts.graph_allow_unencrypted_storage = bool(
@@ -510,7 +625,9 @@ def _parse_config_file(config_file, opts):
if "ssl" in elasticsearch_config:
opts.elasticsearch_ssl = bool(elasticsearch_config.getboolean("ssl"))
if "cert_path" in elasticsearch_config:
opts.elasticsearch_ssl_cert_path = elasticsearch_config["cert_path"]
opts.elasticsearch_ssl_cert_path = _expand_path(
elasticsearch_config["cert_path"]
)
if "skip_certificate_verification" in elasticsearch_config:
opts.elasticsearch_skip_certificate_verification = bool(
elasticsearch_config.getboolean("skip_certificate_verification")
@@ -553,7 +670,7 @@ def _parse_config_file(config_file, opts):
if "ssl" in opensearch_config:
opts.opensearch_ssl = bool(opensearch_config.getboolean("ssl"))
if "cert_path" in opensearch_config:
opts.opensearch_ssl_cert_path = opensearch_config["cert_path"]
opts.opensearch_ssl_cert_path = _expand_path(opensearch_config["cert_path"])
if "skip_certificate_verification" in opensearch_config:
opts.opensearch_skip_certificate_verification = bool(
opensearch_config.getboolean("skip_certificate_verification")
@@ -680,7 +797,7 @@ def _parse_config_file(config_file, opts):
if "subject" in smtp_config:
opts.smtp_subject = smtp_config["subject"]
if "attachment" in smtp_config:
opts.smtp_attachment = smtp_config["attachment"]
opts.smtp_attachment = _expand_path(smtp_config["attachment"])
if "message" in smtp_config:
opts.smtp_message = smtp_config["message"]
@@ -727,11 +844,11 @@ def _parse_config_file(config_file, opts):
else:
opts.syslog_protocol = "udp"
if "cafile_path" in syslog_config:
opts.syslog_cafile_path = syslog_config["cafile_path"]
opts.syslog_cafile_path = _expand_path(syslog_config["cafile_path"])
if "certfile_path" in syslog_config:
opts.syslog_certfile_path = syslog_config["certfile_path"]
opts.syslog_certfile_path = _expand_path(syslog_config["certfile_path"])
if "keyfile_path" in syslog_config:
opts.syslog_keyfile_path = syslog_config["keyfile_path"]
opts.syslog_keyfile_path = _expand_path(syslog_config["keyfile_path"])
if "timeout" in syslog_config:
opts.syslog_timeout = float(syslog_config["timeout"])
else:
@@ -747,8 +864,13 @@ def _parse_config_file(config_file, opts):
if "gmail_api" in config.sections():
gmail_api_config = config["gmail_api"]
opts.gmail_api_credentials_file = gmail_api_config.get("credentials_file")
opts.gmail_api_token_file = gmail_api_config.get("token_file", ".token")
gmail_creds = gmail_api_config.get("credentials_file")
opts.gmail_api_credentials_file = (
_expand_path(gmail_creds) if gmail_creds else gmail_creds
)
opts.gmail_api_token_file = _expand_path(
gmail_api_config.get("token_file", ".token")
)
opts.gmail_api_include_spam_trash = bool(
gmail_api_config.getboolean("include_spam_trash", False)
)
@@ -773,9 +895,15 @@ def _parse_config_file(config_file, opts):
if "maildir" in config.sections():
maildir_api_config = config["maildir"]
opts.maildir_path = maildir_api_config.get("maildir_path")
maildir_p = maildir_api_config.get(
"maildir_path", maildir_api_config.get("path")
)
opts.maildir_path = _expand_path(maildir_p) if maildir_p else maildir_p
opts.maildir_create = bool(
maildir_api_config.getboolean("maildir_create", fallback=False)
maildir_api_config.getboolean(
"maildir_create",
fallback=maildir_api_config.getboolean("create", fallback=False),
)
)
if "log_analytics" in config.sections():
@@ -869,6 +997,7 @@ def _init_output_clients(opts):
try:
if opts.s3_bucket:
logger.debug("Initializing S3 client: bucket=%s", opts.s3_bucket)
clients["s3_client"] = s3.S3Client(
bucket_name=opts.s3_bucket,
bucket_path=opts.s3_path,
@@ -882,6 +1011,11 @@ def _init_output_clients(opts):
try:
if opts.syslog_server:
logger.debug(
"Initializing syslog client: server=%s:%s",
opts.syslog_server,
opts.syslog_port,
)
clients["syslog_client"] = syslog.SyslogClient(
server_name=opts.syslog_server,
server_port=int(opts.syslog_port),
@@ -906,6 +1040,7 @@ def _init_output_clients(opts):
"HEC token and HEC index are required when using HEC URL"
)
try:
logger.debug("Initializing Splunk HEC client: url=%s", opts.hec)
verify = True
if opts.hec_skip_certificate_verification:
verify = False
@@ -917,6 +1052,7 @@ def _init_output_clients(opts):
try:
if opts.kafka_hosts:
logger.debug("Initializing Kafka client: hosts=%s", opts.kafka_hosts)
ssl_context = None
if opts.kafka_skip_certificate_verification:
logger.debug("Skipping Kafka certificate verification")
@@ -934,6 +1070,11 @@ def _init_output_clients(opts):
try:
if opts.gelf_host:
logger.debug(
"Initializing GELF client: host=%s:%s",
opts.gelf_host,
opts.gelf_port,
)
clients["gelf_client"] = gelf.GelfClient(
host=opts.gelf_host,
port=int(opts.gelf_port),
@@ -948,6 +1089,7 @@ def _init_output_clients(opts):
or opts.webhook_forensic_url
or opts.webhook_smtp_tls_url
):
logger.debug("Initializing webhook client")
clients["webhook_client"] = webhook.WebhookClient(
aggregate_url=opts.webhook_aggregate_url,
forensic_url=opts.webhook_forensic_url,
@@ -960,11 +1102,16 @@ def _init_output_clients(opts):
# Elasticsearch and OpenSearch mutate module-level global state via
# connections.create_connection(), which cannot be rolled back if a later
# step fails. Initialise them last so that all other clients are created
# successfully first; this minimises the window for partial-init problems
# successfully first; this minimizes the window for partial-init problems
# during config reload.
if opts.save_aggregate or opts.save_forensic or opts.save_smtp_tls:
try:
if opts.elasticsearch_hosts:
logger.debug(
"Initializing Elasticsearch client: hosts=%s, ssl=%s",
opts.elasticsearch_hosts,
opts.elasticsearch_ssl,
)
es_aggregate_index = "dmarc_aggregate"
es_forensic_index = "dmarc_forensic"
es_smtp_tls_index = "smtp_tls"
@@ -1003,6 +1150,11 @@ def _init_output_clients(opts):
try:
if opts.opensearch_hosts:
logger.debug(
"Initializing OpenSearch client: hosts=%s, ssl=%s",
opts.opensearch_hosts,
opts.opensearch_ssl,
)
os_aggregate_index = "dmarc_aggregate"
os_forensic_index = "dmarc_forensic"
os_smtp_tls_index = "smtp_tls"
@@ -1683,9 +1835,16 @@ def _main():
index_prefix_domain_map = None
if args.config_file:
config_file = args.config_file or os.environ.get("PARSEDMARC_CONFIG_FILE")
has_env_config = any(
k.startswith("PARSEDMARC_") and k != "PARSEDMARC_CONFIG_FILE"
for k in os.environ
)
if config_file or has_env_config:
try:
index_prefix_domain_map = _parse_config_file(args.config_file, opts)
config = _load_config(config_file)
index_prefix_domain_map = _parse_config(config, opts)
except ConfigurationError as e:
logger.critical(str(e))
exit(-1)
@@ -1723,15 +1882,31 @@ def _main():
logger.info("Starting parsedmarc")
# Initialize output clients
try:
clients = _init_output_clients(opts)
except ConfigurationError as e:
logger.critical(str(e))
exit(1)
except Exception as error_:
logger.error("Output client error: {0}".format(error_))
exit(1)
# Initialize output clients (with retry for transient connection errors)
clients = {}
max_retries = 4
retry_delay = 5
for attempt in range(max_retries + 1):
try:
clients = _init_output_clients(opts)
break
except ConfigurationError as e:
logger.critical(str(e))
exit(1)
except Exception as error_:
if attempt < max_retries:
logger.warning(
"Output client error (attempt %d/%d, retrying in %ds): %s",
attempt + 1,
max_retries + 1,
retry_delay,
error_,
)
time.sleep(retry_delay)
retry_delay *= 2
else:
logger.error("Output client error: {0}".format(error_))
exit(1)
file_paths = []
mbox_paths = []
@@ -2102,9 +2277,8 @@ def _main():
# Build a fresh opts starting from CLI-only defaults so that
# sections removed from the config file actually take effect.
new_opts = Namespace(**vars(opts_from_cli))
new_index_prefix_domain_map = _parse_config_file(
args.config_file, new_opts
)
new_config = _load_config(config_file)
new_index_prefix_domain_map = _parse_config(new_config, new_opts)
new_clients = _init_output_clients(new_opts)
# All steps succeeded — commit the changes atomically.

View File

@@ -1,3 +1,3 @@
__version__ = "9.4.0"
__version__ = "9.5.5"
USER_AGENT = f"parsedmarc/{__version__}"

View File

@@ -299,7 +299,7 @@ def set_hosts(
else:
conn_params["verify_certs"] = True
if username and password:
conn_params["http_auth"] = username + ":" + password
conn_params["http_auth"] = (username, password)
if api_key:
conn_params["api_key"] = api_key
connections.create_connection(**conn_params)

View File

@@ -55,6 +55,7 @@ def _get_creds(
flow = InstalledAppFlow.from_client_secrets_file(credentials_file, scopes)
creds = flow.run_local_server(open_browser=False, oauth2_port=oauth2_port)
# Save the credentials for the next run
Path(token_file).parent.mkdir(parents=True, exist_ok=True)
with Path(token_file).open("w") as token:
token.write(creds.to_json())
return creds

View File

@@ -56,6 +56,7 @@ def _load_token(token_path: Path) -> Optional[str]:
def _cache_auth_record(record: AuthenticationRecord, token_path: Path):
token = record.serialize()
token_path.parent.mkdir(parents=True, exist_ok=True)
with token_path.open("w") as token_file:
token_file.write(token)

View File

@@ -19,29 +19,54 @@ class MaildirConnection(MailboxConnection):
):
self._maildir_path = maildir_path
self._maildir_create = maildir_create
maildir_owner = os.stat(maildir_path).st_uid
if os.getuid() != maildir_owner:
if os.getuid() == 0:
logger.warning(
"Switching uid to {} to access Maildir".format(maildir_owner)
)
os.setuid(maildir_owner)
try:
maildir_owner = os.stat(maildir_path).st_uid
except OSError:
maildir_owner = None
current_uid = os.getuid()
if maildir_owner is not None and current_uid != maildir_owner:
if current_uid == 0:
try:
logger.warning(
"Switching uid to {} to access Maildir".format(maildir_owner)
)
os.setuid(maildir_owner)
except OSError as e:
logger.warning(
"Failed to switch uid to {}: {}".format(maildir_owner, e)
)
else:
ex = "runtime uid {} differ from maildir {} owner {}".format(
os.getuid(), maildir_path, maildir_owner
logger.warning(
"Runtime uid {} differs from maildir {} owner {}. "
"Access may fail if permissions are insufficient.".format(
current_uid, maildir_path, maildir_owner
)
)
raise Exception(ex)
if maildir_create:
for subdir in ("cur", "new", "tmp"):
os.makedirs(os.path.join(maildir_path, subdir), exist_ok=True)
self._client = mailbox.Maildir(maildir_path, create=maildir_create)
self._active_folder: mailbox.Maildir = self._client
self._subfolder_client: Dict[str, mailbox.Maildir] = {}
def _get_folder(self, folder_name: str) -> mailbox.Maildir:
"""Return a cached subfolder handle, creating it if needed."""
if folder_name not in self._subfolder_client:
self._subfolder_client[folder_name] = self._client.add_folder(folder_name)
return self._subfolder_client[folder_name]
def create_folder(self, folder_name: str):
self._subfolder_client[folder_name] = self._client.add_folder(folder_name)
self._get_folder(folder_name)
def fetch_messages(self, reports_folder: str, **kwargs):
return self._client.keys()
if reports_folder and reports_folder != "INBOX":
self._active_folder = self._get_folder(reports_folder)
else:
self._active_folder = self._client
return self._active_folder.keys()
def fetch_message(self, message_id: str) -> str:
msg = self._client.get(message_id)
msg = self._active_folder.get(message_id)
if msg is not None:
msg = msg.as_string()
if msg is not None:
@@ -49,16 +74,15 @@ class MaildirConnection(MailboxConnection):
return ""
def delete_message(self, message_id: str):
self._client.remove(message_id)
self._active_folder.remove(message_id)
def move_message(self, message_id: str, folder_name: str):
message_data = self._client.get(message_id)
message_data = self._active_folder.get(message_id)
if message_data is None:
return
if folder_name not in self._subfolder_client:
self._subfolder_client[folder_name] = self._client.add_folder(folder_name)
self._subfolder_client[folder_name].add(message_data)
self._client.remove(message_id)
dest = self._get_folder(folder_name)
dest.add(message_data)
self._active_folder.remove(message_id)
def keepalive(self):
return

View File

@@ -298,6 +298,7 @@ def set_hosts(
"""
if not isinstance(hosts, list):
hosts = [hosts]
logger.debug("Connecting to OpenSearch: hosts=%s, use_ssl=%s", hosts, use_ssl)
conn_params = {"hosts": hosts, "timeout": timeout}
if use_ssl:
conn_params["use_ssl"] = True
@@ -323,7 +324,7 @@ def set_hosts(
conn_params["connection_class"] = RequestsHttpConnection
elif normalized_auth_type == "basic":
if username and password:
conn_params["http_auth"] = username + ":" + password
conn_params["http_auth"] = (username, password)
if api_key:
conn_params["api_key"] = api_key
else:

576
tests.py
View File

@@ -11,6 +11,7 @@ import sys
import tempfile
import unittest
from base64 import urlsafe_b64encode
from configparser import ConfigParser
from glob import glob
from pathlib import Path
from tempfile import NamedTemporaryFile, TemporaryDirectory
@@ -1985,7 +1986,8 @@ watch = true
"SIGHUP not available on this platform",
)
@patch("parsedmarc.cli._init_output_clients")
@patch("parsedmarc.cli._parse_config_file")
@patch("parsedmarc.cli._parse_config")
@patch("parsedmarc.cli._load_config")
@patch("parsedmarc.cli.get_dmarc_reports_from_mailbox")
@patch("parsedmarc.cli.watch_inbox")
@patch("parsedmarc.cli.IMAPConnection")
@@ -1994,6 +1996,7 @@ watch = true
mock_imap,
mock_watch,
mock_get_reports,
mock_load_config,
mock_parse_config,
mock_init_clients,
):
@@ -2007,7 +2010,9 @@ watch = true
"smtp_tls_reports": [],
}
def parse_side_effect(config_file, opts):
mock_load_config.return_value = ConfigParser()
def parse_side_effect(config, opts):
opts.imap_host = "imap.example.com"
opts.imap_user = "user"
opts.imap_password = "pass"
@@ -2046,7 +2051,7 @@ watch = true
self.assertEqual(cm.exception.code, 1)
# watch_inbox was called twice: initial run + after reload
self.assertEqual(mock_watch.call_count, 2)
# _parse_config_file called for initial load + reload
# _parse_config called for initial load + reload
self.assertGreaterEqual(mock_parse_config.call_count, 2)
@unittest.skipUnless(
@@ -2054,7 +2059,8 @@ watch = true
"SIGHUP not available on this platform",
)
@patch("parsedmarc.cli._init_output_clients")
@patch("parsedmarc.cli._parse_config_file")
@patch("parsedmarc.cli._parse_config")
@patch("parsedmarc.cli._load_config")
@patch("parsedmarc.cli.get_dmarc_reports_from_mailbox")
@patch("parsedmarc.cli.watch_inbox")
@patch("parsedmarc.cli.IMAPConnection")
@@ -2063,6 +2069,7 @@ watch = true
mock_imap,
mock_watch,
mock_get_reports,
mock_load_config,
mock_parse_config,
mock_init_clients,
):
@@ -2076,11 +2083,13 @@ watch = true
"smtp_tls_reports": [],
}
mock_load_config.return_value = ConfigParser()
# Initial parse sets required opts; reload parse raises
initial_map = {"prefix_": ["example.com"]}
call_count = [0]
def parse_side_effect(config_file, opts):
def parse_side_effect(config, opts):
call_count[0] += 1
opts.imap_host = "imap.example.com"
opts.imap_user = "user"
@@ -2130,7 +2139,8 @@ watch = true
"SIGHUP not available on this platform",
)
@patch("parsedmarc.cli._init_output_clients")
@patch("parsedmarc.cli._parse_config_file")
@patch("parsedmarc.cli._parse_config")
@patch("parsedmarc.cli._load_config")
@patch("parsedmarc.cli.get_dmarc_reports_from_mailbox")
@patch("parsedmarc.cli.watch_inbox")
@patch("parsedmarc.cli.IMAPConnection")
@@ -2139,6 +2149,7 @@ watch = true
mock_imap,
mock_watch,
mock_get_reports,
mock_load_config,
mock_parse_config,
mock_init_clients,
):
@@ -2152,7 +2163,9 @@ watch = true
"smtp_tls_reports": [],
}
def parse_side_effect(config_file, opts):
mock_load_config.return_value = ConfigParser()
def parse_side_effect(config, opts):
opts.imap_host = "imap.example.com"
opts.imap_user = "user"
opts.imap_password = "pass"
@@ -2284,7 +2297,8 @@ watch = true
"SIGHUP not available on this platform",
)
@patch("parsedmarc.cli._init_output_clients")
@patch("parsedmarc.cli._parse_config_file")
@patch("parsedmarc.cli._parse_config")
@patch("parsedmarc.cli._load_config")
@patch("parsedmarc.cli.get_dmarc_reports_from_mailbox")
@patch("parsedmarc.cli.watch_inbox")
@patch("parsedmarc.cli.IMAPConnection")
@@ -2293,6 +2307,7 @@ watch = true
mock_imap,
mock_watch,
mock_get_reports,
mock_load_config,
mock_parse_config,
mock_init_clients,
):
@@ -2308,7 +2323,9 @@ watch = true
"smtp_tls_reports": [],
}
def parse_side_effect(config_file, opts):
mock_load_config.return_value = ConfigParser()
def parse_side_effect(config, opts):
opts.imap_host = "imap.example.com"
opts.imap_user = "user"
opts.imap_password = "pass"
@@ -2474,5 +2491,546 @@ password = test-password
self.assertNotIn("unmapped-1", report_ids)
class TestMaildirConnection(unittest.TestCase):
"""Tests for MaildirConnection subdirectory creation."""
def test_create_subdirs_when_missing(self):
"""maildir_create=True creates cur/new/tmp in an empty directory."""
from parsedmarc.mail.maildir import MaildirConnection
with TemporaryDirectory() as d:
for subdir in ("cur", "new", "tmp"):
self.assertFalse(os.path.exists(os.path.join(d, subdir)))
conn = MaildirConnection(d, maildir_create=True)
for subdir in ("cur", "new", "tmp"):
self.assertTrue(os.path.isdir(os.path.join(d, subdir)))
# Should be able to list messages without error
self.assertEqual(conn.fetch_messages("INBOX"), [])
def test_create_subdirs_idempotent(self):
"""maildir_create=True is safe when subdirs already exist."""
from parsedmarc.mail.maildir import MaildirConnection
with TemporaryDirectory() as d:
for subdir in ("cur", "new", "tmp"):
os.makedirs(os.path.join(d, subdir))
# Should not raise
conn = MaildirConnection(d, maildir_create=True)
self.assertEqual(conn.fetch_messages("INBOX"), [])
def test_no_create_raises_on_missing_subdirs(self):
"""maildir_create=False does not create subdirs; keys() fails."""
from parsedmarc.mail.maildir import MaildirConnection
with TemporaryDirectory() as d:
conn = MaildirConnection(d, maildir_create=False)
with self.assertRaises(FileNotFoundError):
conn.fetch_messages("INBOX")
def test_fetch_and_delete_message(self):
"""Round-trip: add a message, fetch it, delete it."""
from parsedmarc.mail.maildir import MaildirConnection
with TemporaryDirectory() as d:
conn = MaildirConnection(d, maildir_create=True)
# Add a message via the underlying client
msg_key = conn._client.add("From: test@example.com\n\nHello")
keys = conn.fetch_messages("INBOX")
self.assertIn(msg_key, keys)
content = conn.fetch_message(msg_key)
self.assertIn("test@example.com", content)
conn.delete_message(msg_key)
self.assertEqual(conn.fetch_messages("INBOX"), [])
def test_move_message_creates_subfolder(self):
"""move_message auto-creates the destination subfolder."""
from parsedmarc.mail.maildir import MaildirConnection
with TemporaryDirectory() as d:
conn = MaildirConnection(d, maildir_create=True)
msg_key = conn._client.add("From: test@example.com\n\nHello")
conn.move_message(msg_key, "archive")
# Original should be gone
self.assertEqual(conn.fetch_messages("INBOX"), [])
# Archive subfolder should have the message
self.assertIn("archive", conn._subfolder_client)
self.assertEqual(len(conn._subfolder_client["archive"].keys()), 1)
class TestMaildirReportsFolder(unittest.TestCase):
"""Tests for Maildir reports_folder support in fetch_messages."""
def test_fetch_from_subfolder(self):
"""fetch_messages with a subfolder name reads from that subfolder."""
from parsedmarc.mail.maildir import MaildirConnection
with TemporaryDirectory() as d:
conn = MaildirConnection(d, maildir_create=True)
# Add message to a subfolder
subfolder = conn._client.add_folder("reports")
msg_key = subfolder.add("From: test@example.com\n\nSubfolder msg")
# Root should be empty
self.assertEqual(conn.fetch_messages("INBOX"), [])
# Subfolder should have the message
keys = conn.fetch_messages("reports")
self.assertIn(msg_key, keys)
def test_fetch_message_uses_active_folder(self):
"""fetch_message reads from the folder set by fetch_messages."""
from parsedmarc.mail.maildir import MaildirConnection
with TemporaryDirectory() as d:
conn = MaildirConnection(d, maildir_create=True)
subfolder = conn._client.add_folder("reports")
msg_key = subfolder.add("From: sub@example.com\n\nIn subfolder")
conn.fetch_messages("reports")
content = conn.fetch_message(msg_key)
self.assertIn("sub@example.com", content)
def test_delete_message_uses_active_folder(self):
"""delete_message removes from the folder set by fetch_messages."""
from parsedmarc.mail.maildir import MaildirConnection
with TemporaryDirectory() as d:
conn = MaildirConnection(d, maildir_create=True)
subfolder = conn._client.add_folder("reports")
msg_key = subfolder.add("From: del@example.com\n\nDelete me")
conn.fetch_messages("reports")
conn.delete_message(msg_key)
self.assertEqual(conn.fetch_messages("reports"), [])
def test_move_message_from_subfolder(self):
"""move_message works when active folder is a subfolder."""
from parsedmarc.mail.maildir import MaildirConnection
with TemporaryDirectory() as d:
conn = MaildirConnection(d, maildir_create=True)
subfolder = conn._client.add_folder("reports")
msg_key = subfolder.add("From: move@example.com\n\nMove me")
conn.fetch_messages("reports")
conn.move_message(msg_key, "archive")
# Source should be empty
self.assertEqual(conn.fetch_messages("reports"), [])
# Destination should have the message
archive_keys = conn.fetch_messages("archive")
self.assertEqual(len(archive_keys), 1)
def test_inbox_reads_root(self):
"""INBOX reads from the top-level Maildir."""
from parsedmarc.mail.maildir import MaildirConnection
with TemporaryDirectory() as d:
conn = MaildirConnection(d, maildir_create=True)
msg_key = conn._client.add("From: root@example.com\n\nRoot msg")
keys = conn.fetch_messages("INBOX")
self.assertIn(msg_key, keys)
def test_empty_folder_reads_root(self):
"""Empty string reports_folder reads from the top-level Maildir."""
from parsedmarc.mail.maildir import MaildirConnection
with TemporaryDirectory() as d:
conn = MaildirConnection(d, maildir_create=True)
msg_key = conn._client.add("From: root@example.com\n\nRoot msg")
keys = conn.fetch_messages("")
self.assertIn(msg_key, keys)
class TestConfigAliases(unittest.TestCase):
"""Tests for config key aliases (env var friendly short names)."""
def test_maildir_create_alias(self):
"""[maildir] create works as alias for maildir_create."""
from argparse import Namespace
from parsedmarc.cli import _load_config, _parse_config
env = {
"PARSEDMARC_MAILDIR_CREATE": "true",
"PARSEDMARC_MAILDIR_PATH": "/tmp/test",
}
with patch.dict(os.environ, env, clear=False):
config = _load_config(None)
opts = Namespace()
_parse_config(config, opts)
self.assertTrue(opts.maildir_create)
def test_maildir_path_alias(self):
"""[maildir] path works as alias for maildir_path."""
from argparse import Namespace
from parsedmarc.cli import _load_config, _parse_config
env = {"PARSEDMARC_MAILDIR_PATH": "/var/mail/dmarc"}
with patch.dict(os.environ, env, clear=False):
config = _load_config(None)
opts = Namespace()
_parse_config(config, opts)
self.assertEqual(opts.maildir_path, "/var/mail/dmarc")
def test_msgraph_url_alias(self):
"""[msgraph] url works as alias for graph_url."""
from parsedmarc.cli import _load_config, _parse_config
from argparse import Namespace
env = {
"PARSEDMARC_MSGRAPH_AUTH_METHOD": "ClientSecret",
"PARSEDMARC_MSGRAPH_CLIENT_ID": "test-id",
"PARSEDMARC_MSGRAPH_CLIENT_SECRET": "test-secret",
"PARSEDMARC_MSGRAPH_TENANT_ID": "test-tenant",
"PARSEDMARC_MSGRAPH_MAILBOX": "test@example.com",
"PARSEDMARC_MSGRAPH_URL": "https://custom.graph.example.com",
}
with patch.dict(os.environ, env, clear=False):
config = _load_config(None)
opts = Namespace()
_parse_config(config, opts)
self.assertEqual(opts.graph_url, "https://custom.graph.example.com")
def test_original_keys_still_work(self):
"""Original INI key names (maildir_create, maildir_path) still work."""
from argparse import Namespace
from parsedmarc.cli import _parse_config
config = ConfigParser(interpolation=None)
config.add_section("maildir")
config.set("maildir", "maildir_path", "/original/path")
config.set("maildir", "maildir_create", "true")
opts = Namespace()
_parse_config(config, opts)
self.assertEqual(opts.maildir_path, "/original/path")
self.assertTrue(opts.maildir_create)
class TestMaildirUidHandling(unittest.TestCase):
"""Tests for Maildir UID mismatch handling in Docker-like environments."""
def test_uid_mismatch_warns_instead_of_crashing(self):
"""UID mismatch logs a warning instead of raising an exception."""
from parsedmarc.mail.maildir import MaildirConnection
with TemporaryDirectory() as d:
# Create subdirs so Maildir works
for subdir in ("cur", "new", "tmp"):
os.makedirs(os.path.join(d, subdir))
# Mock os.stat to return a different UID than os.getuid
fake_stat = os.stat(d)
with (
patch("parsedmarc.mail.maildir.os.stat") as mock_stat,
patch("parsedmarc.mail.maildir.os.getuid", return_value=9999),
):
mock_stat.return_value = fake_stat
# Should not raise — just warn
conn = MaildirConnection(d, maildir_create=False)
self.assertEqual(conn.fetch_messages("INBOX"), [])
def test_uid_match_no_warning(self):
"""No warning when UIDs match."""
from parsedmarc.mail.maildir import MaildirConnection
with TemporaryDirectory() as d:
conn = MaildirConnection(d, maildir_create=True)
self.assertEqual(conn.fetch_messages("INBOX"), [])
def test_stat_failure_does_not_crash(self):
"""If os.stat fails on the maildir path, we don't crash."""
from parsedmarc.mail.maildir import MaildirConnection
with TemporaryDirectory() as d:
for subdir in ("cur", "new", "tmp"):
os.makedirs(os.path.join(d, subdir))
original_stat = os.stat
def stat_that_fails_once(path, *args, **kwargs):
"""Fail on the first call (UID check), pass through after."""
stat_that_fails_once.calls += 1
if stat_that_fails_once.calls == 1:
raise OSError("no stat")
return original_stat(path, *args, **kwargs)
stat_that_fails_once.calls = 0
with patch(
"parsedmarc.mail.maildir.os.stat", side_effect=stat_that_fails_once
):
conn = MaildirConnection(d, maildir_create=False)
self.assertEqual(conn.fetch_messages("INBOX"), [])
class TestExpandPath(unittest.TestCase):
"""Tests for _expand_path config path expansion."""
def test_expand_tilde(self):
from parsedmarc.cli import _expand_path
result = _expand_path("~/some/path")
self.assertFalse(result.startswith("~"))
self.assertTrue(result.endswith("/some/path"))
def test_expand_env_var(self):
from parsedmarc.cli import _expand_path
with patch.dict(os.environ, {"PARSEDMARC_TEST_DIR": "/opt/data"}):
result = _expand_path("$PARSEDMARC_TEST_DIR/tokens/.token")
self.assertEqual(result, "/opt/data/tokens/.token")
def test_expand_both(self):
from parsedmarc.cli import _expand_path
with patch.dict(os.environ, {"MY_APP": "parsedmarc"}):
result = _expand_path("~/$MY_APP/config")
self.assertNotIn("~", result)
self.assertIn("parsedmarc/config", result)
def test_no_expansion_needed(self):
from parsedmarc.cli import _expand_path
self.assertEqual(_expand_path("/absolute/path"), "/absolute/path")
self.assertEqual(_expand_path("relative/path"), "relative/path")
class TestTokenParentDirCreation(unittest.TestCase):
"""Tests for parent directory creation when writing token files."""
def test_graph_cache_creates_parent_dirs(self):
from parsedmarc.mail.graph import _cache_auth_record
with TemporaryDirectory() as d:
token_path = Path(d) / "subdir" / "nested" / ".token"
self.assertFalse(token_path.parent.exists())
mock_record = MagicMock()
mock_record.serialize.return_value = "serialized-token"
_cache_auth_record(mock_record, token_path)
self.assertTrue(token_path.exists())
self.assertEqual(token_path.read_text(), "serialized-token")
def test_gmail_token_write_creates_parent_dirs(self):
"""Gmail token write creates parent directories."""
with TemporaryDirectory() as d:
token_path = Path(d) / "deep" / "nested" / "token.json"
self.assertFalse(token_path.parent.exists())
# Directly test the mkdir + open pattern
token_path.parent.mkdir(parents=True, exist_ok=True)
with token_path.open("w") as f:
f.write('{"token": "test"}')
self.assertTrue(token_path.exists())
self.assertEqual(token_path.read_text(), '{"token": "test"}')
class TestEnvVarConfig(unittest.TestCase):
"""Tests for environment variable configuration support."""
def test_resolve_section_key_simple(self):
"""Simple section names resolve correctly."""
from parsedmarc.cli import _resolve_section_key
self.assertEqual(_resolve_section_key("IMAP_PASSWORD"), ("imap", "password"))
self.assertEqual(_resolve_section_key("GENERAL_DEBUG"), ("general", "debug"))
self.assertEqual(_resolve_section_key("S3_BUCKET"), ("s3", "bucket"))
self.assertEqual(_resolve_section_key("GELF_HOST"), ("gelf", "host"))
def test_resolve_section_key_underscore_sections(self):
"""Multi-word section names (splunk_hec, gmail_api, etc.) resolve correctly."""
from parsedmarc.cli import _resolve_section_key
self.assertEqual(
_resolve_section_key("SPLUNK_HEC_TOKEN"), ("splunk_hec", "token")
)
self.assertEqual(
_resolve_section_key("GMAIL_API_CREDENTIALS_FILE"),
("gmail_api", "credentials_file"),
)
self.assertEqual(
_resolve_section_key("LOG_ANALYTICS_CLIENT_ID"),
("log_analytics", "client_id"),
)
def test_resolve_section_key_unknown(self):
"""Unknown prefixes return (None, None)."""
from parsedmarc.cli import _resolve_section_key
self.assertEqual(_resolve_section_key("UNKNOWN_FOO"), (None, None))
# Just a section name with no key should not match
self.assertEqual(_resolve_section_key("IMAP"), (None, None))
def test_apply_env_overrides_injects_values(self):
"""Env vars are injected into an existing ConfigParser."""
from configparser import ConfigParser
from parsedmarc.cli import _apply_env_overrides
config = ConfigParser()
config.add_section("imap")
config.set("imap", "host", "original.example.com")
env = {
"PARSEDMARC_IMAP_HOST": "new.example.com",
"PARSEDMARC_IMAP_PASSWORD": "secret123",
}
with patch.dict(os.environ, env, clear=False):
_apply_env_overrides(config)
self.assertEqual(config.get("imap", "host"), "new.example.com")
self.assertEqual(config.get("imap", "password"), "secret123")
def test_apply_env_overrides_creates_sections(self):
"""Env vars create new sections when they don't exist."""
from configparser import ConfigParser
from parsedmarc.cli import _apply_env_overrides
config = ConfigParser()
env = {"PARSEDMARC_ELASTICSEARCH_HOSTS": "http://localhost:9200"}
with patch.dict(os.environ, env, clear=False):
_apply_env_overrides(config)
self.assertTrue(config.has_section("elasticsearch"))
self.assertEqual(config.get("elasticsearch", "hosts"), "http://localhost:9200")
def test_apply_env_overrides_ignores_config_file_var(self):
"""PARSEDMARC_CONFIG_FILE is not injected as a config key."""
from configparser import ConfigParser
from parsedmarc.cli import _apply_env_overrides
config = ConfigParser()
env = {"PARSEDMARC_CONFIG_FILE": "/some/path.ini"}
with patch.dict(os.environ, env, clear=False):
_apply_env_overrides(config)
self.assertEqual(config.sections(), [])
def test_load_config_with_file_and_env_override(self):
"""Env vars override values from an INI file."""
from parsedmarc.cli import _load_config
with NamedTemporaryFile(mode="w", suffix=".ini", delete=False) as f:
f.write(
"[imap]\nhost = file.example.com\nuser = alice\npassword = fromfile\n"
)
f.flush()
config_path = f.name
try:
env = {"PARSEDMARC_IMAP_PASSWORD": "fromenv"}
with patch.dict(os.environ, env, clear=False):
config = _load_config(config_path)
self.assertEqual(config.get("imap", "host"), "file.example.com")
self.assertEqual(config.get("imap", "user"), "alice")
self.assertEqual(config.get("imap", "password"), "fromenv")
finally:
os.unlink(config_path)
def test_load_config_env_only(self):
"""Config can be loaded purely from env vars with no file."""
from parsedmarc.cli import _load_config
env = {
"PARSEDMARC_GENERAL_DEBUG": "true",
"PARSEDMARC_ELASTICSEARCH_HOSTS": "http://localhost:9200",
}
with patch.dict(os.environ, env, clear=False):
config = _load_config(None)
self.assertEqual(config.get("general", "debug"), "true")
self.assertEqual(config.get("elasticsearch", "hosts"), "http://localhost:9200")
def test_parse_config_from_env(self):
"""Full round-trip: env vars -> ConfigParser -> opts."""
from argparse import Namespace
from parsedmarc.cli import _load_config, _parse_config
env = {
"PARSEDMARC_GENERAL_DEBUG": "true",
"PARSEDMARC_GENERAL_SAVE_AGGREGATE": "true",
"PARSEDMARC_GENERAL_OFFLINE": "true",
}
with patch.dict(os.environ, env, clear=False):
config = _load_config(None)
opts = Namespace()
_parse_config(config, opts)
self.assertTrue(opts.debug)
self.assertTrue(opts.save_aggregate)
self.assertTrue(opts.offline)
def test_config_file_env_var(self):
"""PARSEDMARC_CONFIG_FILE env var specifies the config file path."""
from argparse import Namespace
from parsedmarc.cli import _load_config, _parse_config
with NamedTemporaryFile(mode="w", suffix=".ini", delete=False) as f:
f.write("[general]\ndebug = true\noffline = true\n")
f.flush()
config_path = f.name
try:
env = {"PARSEDMARC_CONFIG_FILE": config_path}
with patch.dict(os.environ, env, clear=False):
config = _load_config(os.environ.get("PARSEDMARC_CONFIG_FILE"))
opts = Namespace()
_parse_config(config, opts)
self.assertTrue(opts.debug)
self.assertTrue(opts.offline)
finally:
os.unlink(config_path)
def test_boolean_values_from_env(self):
"""Various boolean string representations work through ConfigParser."""
from configparser import ConfigParser
from parsedmarc.cli import _apply_env_overrides
for true_val in ("true", "yes", "1", "on", "True", "YES"):
config = ConfigParser()
env = {"PARSEDMARC_GENERAL_DEBUG": true_val}
with patch.dict(os.environ, env, clear=False):
_apply_env_overrides(config)
self.assertTrue(
config.getboolean("general", "debug"),
f"Expected truthy for {true_val!r}",
)
for false_val in ("false", "no", "0", "off", "False", "NO"):
config = ConfigParser()
env = {"PARSEDMARC_GENERAL_DEBUG": false_val}
with patch.dict(os.environ, env, clear=False):
_apply_env_overrides(config)
self.assertFalse(
config.getboolean("general", "debug"),
f"Expected falsy for {false_val!r}",
)
if __name__ == "__main__":
unittest.main(verbosity=2)