diff --git a/CHANGELOG.md b/CHANGELOG.md index c2044f9..eae978d 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,17 @@ # Changelog +## 9.7.1 + +### Changes + +- Ported DNS lookup reliability improvements from checkdmarc 5.15.x: + - Per-query UDP timeout is now capped at `min(1.0, timeout)` in `query_dns()`, so a single dropped UDP datagram no longer consumes the entire lifetime budget — dnspython retries UDP within the lifetime window (mirroring `dig`'s default `+tries=3`). With multiple nameservers configured, the same cap also makes a slow or broken nameserver fall through to the next quickly. + - With multiple nameservers configured, the resolver lifetime is now `timeout × len(nameservers)` so each nameserver gets its own timeout budget for failover rather than sharing one overall deadline. + - New `retries` kwarg on `query_dns()`, `get_reverse_dns()`, and `get_ip_address_info()` retries the whole query on transient errors (`LifetimeTimeout`, `NoNameservers`/SERVFAIL, and `OSError` during TCP fallback). `NXDOMAIN` and `NoAnswer` remain non-retryable. Default is 0 (no behavior change for existing callers). + - Threaded `dns_retries` through the parser API (`parse_report_file`, `parse_aggregate_report_xml`, `parse_forensic_report`, `parse_report_email`, `get_dmarc_reports_from_mbox`, `get_dmarc_reports_from_mailbox`, `watch_inbox`). +- Added `--dns-retries N` CLI flag and `dns_retries` INI option (`[general]` section, also surfaced via `PARSEDMARC_GENERAL_DNS_RETRIES` env var). +- Centralized DNS defaults in `parsedmarc.constants`: `DEFAULT_DNS_TIMEOUT`, `DEFAULT_DNS_MAX_RETRIES`, and `RECOMMENDED_DNS_NAMESERVERS` (a cross-provider mix — `("1.1.1.1", "8.8.8.8")` — for callers that want public-resolver failover). The existing default nameservers (all-Cloudflare) are preserved for backward compatibility; callers opt in by passing `nameservers=RECOMMENDED_DNS_NAMESERVERS`. + ## 9.7.0 ### Changes diff --git a/parsedmarc/__init__.py b/parsedmarc/__init__.py index b184f75..12b6b70 100644 --- a/parsedmarc/__init__.py +++ b/parsedmarc/__init__.py @@ -38,7 +38,11 @@ import xmltodict from expiringdict import ExpiringDict from mailsuite.smtp import send_email -from parsedmarc.constants import __version__ +from parsedmarc.constants import ( + DEFAULT_DNS_MAX_RETRIES, + DEFAULT_DNS_TIMEOUT, + __version__, +) from parsedmarc.log import logger from parsedmarc.mail import ( GmailConnection, @@ -301,7 +305,8 @@ def _parse_report_record( reverse_dns_map_url: Optional[str] = None, offline: bool = False, nameservers: Optional[list[str]] = None, - dns_timeout: float = 2.0, + dns_timeout: float = DEFAULT_DNS_TIMEOUT, + dns_retries: int = DEFAULT_DNS_MAX_RETRIES, ) -> dict[str, Any]: """ Converts a record from a DMARC aggregate report into a more consistent @@ -317,6 +322,8 @@ def _parse_report_record( nameservers (list): A list of one or more nameservers to use (Cloudflare's public DNS resolvers by default) dns_timeout (float): Sets the DNS timeout in seconds + dns_retries (int): Number of times to retry DNS queries on timeout + or other transient errors Returns: dict: The converted record @@ -336,6 +343,7 @@ def _parse_report_record( offline=offline, nameservers=nameservers, timeout=dns_timeout, + retries=dns_retries, ) new_record["source"] = new_record_source new_record["count"] = int(record["row"]["count"]) @@ -668,7 +676,8 @@ def parse_aggregate_report_xml( reverse_dns_map_url: Optional[str] = None, offline: bool = False, nameservers: Optional[list[str]] = None, - timeout: float = 2.0, + timeout: float = DEFAULT_DNS_TIMEOUT, + retries: int = DEFAULT_DNS_MAX_RETRIES, keep_alive: Optional[Callable] = None, normalize_timespan_threshold_hours: float = 24.0, ) -> AggregateReport: @@ -684,6 +693,8 @@ def parse_aggregate_report_xml( nameservers (list): A list of one or more nameservers to use (Cloudflare's public DNS resolvers by default) timeout (float): Sets the DNS timeout in seconds + retries (int): Number of times to retry DNS queries on timeout or + other transient errors keep_alive (callable): Keep alive function normalize_timespan_threshold_hours (float): Normalize timespans beyond this @@ -828,6 +839,7 @@ def parse_aggregate_report_xml( reverse_dns_map_url=reverse_dns_map_url, nameservers=nameservers, dns_timeout=timeout, + dns_retries=retries, ) _append_parsed_record( parsed_record=report_record, @@ -849,6 +861,7 @@ def parse_aggregate_report_xml( offline=offline, nameservers=nameservers, dns_timeout=timeout, + dns_retries=retries, ) _append_parsed_record( parsed_record=report_record, @@ -982,7 +995,8 @@ def parse_aggregate_report_file( reverse_dns_map_url: Optional[str] = None, ip_db_path: Optional[str] = None, nameservers: Optional[list[str]] = None, - dns_timeout: float = 2.0, + dns_timeout: float = DEFAULT_DNS_TIMEOUT, + dns_retries: int = DEFAULT_DNS_MAX_RETRIES, keep_alive: Optional[Callable] = None, normalize_timespan_threshold_hours: float = 24.0, ) -> AggregateReport: @@ -999,6 +1013,8 @@ def parse_aggregate_report_file( nameservers (list): A list of one or more nameservers to use (Cloudflare's public DNS resolvers by default) dns_timeout (float): Sets the DNS timeout in seconds + dns_retries (int): Number of times to retry DNS queries on timeout + or other transient errors keep_alive (callable): Keep alive function normalize_timespan_threshold_hours (float): Normalize timespans beyond this @@ -1020,6 +1036,7 @@ def parse_aggregate_report_file( offline=offline, nameservers=nameservers, timeout=dns_timeout, + retries=dns_retries, keep_alive=keep_alive, normalize_timespan_threshold_hours=normalize_timespan_threshold_hours, ) @@ -1230,7 +1247,8 @@ def parse_forensic_report( offline: bool = False, ip_db_path: Optional[str] = None, nameservers: Optional[list[str]] = None, - dns_timeout: float = 2.0, + dns_timeout: float = DEFAULT_DNS_TIMEOUT, + dns_retries: int = DEFAULT_DNS_MAX_RETRIES, strip_attachment_payloads: bool = False, ) -> ForensicReport: """ @@ -1248,6 +1266,8 @@ def parse_forensic_report( nameservers (list): A list of one or more nameservers to use (Cloudflare's public DNS resolvers by default) dns_timeout (float): Sets the DNS timeout in seconds + dns_retries (int): Number of times to retry DNS queries on timeout + or other transient errors strip_attachment_payloads (bool): Remove attachment payloads from forensic report results @@ -1302,6 +1322,7 @@ def parse_forensic_report( offline=offline, nameservers=nameservers, timeout=dns_timeout, + retries=dns_retries, ) parsed_report["source"] = parsed_report_source del parsed_report["source_ip"] @@ -1461,7 +1482,8 @@ def parse_report_email( reverse_dns_map_path: Optional[str] = None, reverse_dns_map_url: Optional[str] = None, nameservers: Optional[list[str]] = None, - dns_timeout: float = 2.0, + dns_timeout: float = DEFAULT_DNS_TIMEOUT, + dns_retries: int = DEFAULT_DNS_MAX_RETRIES, strip_attachment_payloads: bool = False, keep_alive: Optional[Callable] = None, normalize_timespan_threshold_hours: float = 24.0, @@ -1478,6 +1500,8 @@ def parse_report_email( offline (bool): Do not query online for geolocation on DNS nameservers (list): A list of one or more nameservers to use dns_timeout (float): Sets the DNS timeout in seconds + dns_retries (int): Number of times to retry DNS queries on timeout + or other transient errors strip_attachment_payloads (bool): Remove attachment payloads from forensic report results keep_alive (callable): keep alive function @@ -1604,6 +1628,7 @@ def parse_report_email( offline=offline, nameservers=nameservers, timeout=dns_timeout, + retries=dns_retries, keep_alive=keep_alive, normalize_timespan_threshold_hours=normalize_timespan_threshold_hours, ) @@ -1639,6 +1664,7 @@ def parse_report_email( reverse_dns_map_url=reverse_dns_map_url, nameservers=nameservers, dns_timeout=dns_timeout, + dns_retries=dns_retries, strip_attachment_payloads=strip_attachment_payloads, ) except InvalidForensicReport as e: @@ -1665,7 +1691,8 @@ def parse_report_file( input_: Union[bytes, str, os.PathLike[str], os.PathLike[bytes], BinaryIO], *, nameservers: Optional[list[str]] = None, - dns_timeout: float = 2.0, + dns_timeout: float = DEFAULT_DNS_TIMEOUT, + dns_retries: int = DEFAULT_DNS_MAX_RETRIES, strip_attachment_payloads: bool = False, ip_db_path: Optional[str] = None, always_use_local_files: bool = False, @@ -1684,6 +1711,8 @@ def parse_report_file( nameservers (list): A list of one or more nameservers to use (Cloudflare's public DNS resolvers by default) dns_timeout (float): Sets the DNS timeout in seconds + dns_retries (int): Number of times to retry DNS queries on timeout + or other transient errors strip_attachment_payloads (bool): Remove attachment payloads from forensic report results ip_db_path (str): Path to a MMDB file from MaxMind or DBIP @@ -1723,6 +1752,7 @@ def parse_report_file( offline=offline, nameservers=nameservers, dns_timeout=dns_timeout, + dns_retries=dns_retries, keep_alive=keep_alive, normalize_timespan_threshold_hours=normalize_timespan_threshold_hours, ) @@ -1742,6 +1772,7 @@ def parse_report_file( offline=offline, nameservers=nameservers, dns_timeout=dns_timeout, + dns_retries=dns_retries, strip_attachment_payloads=strip_attachment_payloads, keep_alive=keep_alive, normalize_timespan_threshold_hours=normalize_timespan_threshold_hours, @@ -1758,7 +1789,8 @@ def get_dmarc_reports_from_mbox( input_: str, *, nameservers: Optional[list[str]] = None, - dns_timeout: float = 2.0, + dns_timeout: float = DEFAULT_DNS_TIMEOUT, + dns_retries: int = DEFAULT_DNS_MAX_RETRIES, strip_attachment_payloads: bool = False, ip_db_path: Optional[str] = None, always_use_local_files: bool = False, @@ -1775,6 +1807,8 @@ def get_dmarc_reports_from_mbox( nameservers (list): A list of one or more nameservers to use (Cloudflare's public DNS resolvers by default) dns_timeout (float): Sets the DNS timeout in seconds + dns_retries (int): Number of times to retry DNS queries on timeout + or other transient errors strip_attachment_payloads (bool): Remove attachment payloads from forensic report results always_use_local_files (bool): Do not download files @@ -1811,6 +1845,7 @@ def get_dmarc_reports_from_mbox( offline=offline, nameservers=nameservers, dns_timeout=dns_timeout, + dns_retries=dns_retries, strip_attachment_payloads=sa, normalize_timespan_threshold_hours=normalize_timespan_threshold_hours, ) @@ -1855,6 +1890,7 @@ def get_dmarc_reports_from_mailbox( offline: bool = False, nameservers: Optional[list[str]] = None, dns_timeout: float = 6.0, + dns_retries: int = DEFAULT_DNS_MAX_RETRIES, strip_attachment_payloads: bool = False, results: Optional[ParsingResults] = None, batch_size: int = 10, @@ -1878,6 +1914,8 @@ def get_dmarc_reports_from_mailbox( offline (bool): Do not query online for geolocation or DNS nameservers (list): A list of DNS nameservers to query dns_timeout (float): Set the DNS query timeout + dns_retries (int): Number of times to retry DNS queries on timeout + or other transient errors strip_attachment_payloads (bool): Remove attachment payloads from forensic report results results (dict): Results from the previous run @@ -2001,6 +2039,7 @@ def get_dmarc_reports_from_mailbox( msg_content, nameservers=nameservers, dns_timeout=dns_timeout, + dns_retries=dns_retries, ip_db_path=ip_db_path, always_use_local_files=always_use_local_files, reverse_dns_map_path=reverse_dns_map_path, @@ -2159,6 +2198,7 @@ def get_dmarc_reports_from_mailbox( test=test, nameservers=nameservers, dns_timeout=dns_timeout, + dns_retries=dns_retries, strip_attachment_payloads=strip_attachment_payloads, results=results, ip_db_path=ip_db_path, @@ -2189,6 +2229,7 @@ def watch_inbox( offline: bool = False, nameservers: Optional[list[str]] = None, dns_timeout: float = 6.0, + dns_retries: int = DEFAULT_DNS_MAX_RETRIES, strip_attachment_payloads: bool = False, batch_size: int = 10, since: Optional[Union[datetime, date, str]] = None, @@ -2216,6 +2257,8 @@ def watch_inbox( nameservers (list): A list of one or more nameservers to use (Cloudflare's public DNS resolvers by default) dns_timeout (float): Set the DNS query timeout + dns_retries (int): Number of times to retry DNS queries on timeout + or other transient errors strip_attachment_payloads (bool): Replace attachment payloads in forensic report samples with None batch_size (int): Number of messages to read and process before saving @@ -2239,6 +2282,7 @@ def watch_inbox( offline=offline, nameservers=nameservers, dns_timeout=dns_timeout, + dns_retries=dns_retries, strip_attachment_payloads=strip_attachment_payloads, batch_size=batch_size, since=since, diff --git a/parsedmarc/cli.py b/parsedmarc/cli.py index eadfebc..bef7534 100644 --- a/parsedmarc/cli.py +++ b/parsedmarc/cli.py @@ -211,6 +211,7 @@ def cli_parse( sa, nameservers, dns_timeout, + dns_retries, ip_db_path, offline, always_use_local_files, @@ -228,6 +229,7 @@ def cli_parse( sa: Strip attachment payloads flag nameservers: List of nameservers dns_timeout: DNS timeout + dns_retries: Number of DNS retries on transient errors ip_db_path: Path to IP database offline: Offline mode flag always_use_local_files: Always use local files flag @@ -251,6 +253,7 @@ def cli_parse( reverse_dns_map_url=reverse_dns_map_url, nameservers=nameservers, dns_timeout=dns_timeout, + dns_retries=dns_retries, strip_attachment_payloads=sa, normalize_timespan_threshold_hours=normalize_timespan_threshold_hours, ) @@ -344,6 +347,10 @@ def _parse_config(config: ConfigParser, opts): opts.dns_timeout = general_config.getfloat("dns_timeout") if opts.dns_timeout is None: opts.dns_timeout = 2 + if "dns_retries" in general_config: + opts.dns_retries = general_config.getint("dns_retries") + if opts.dns_retries is None: + opts.dns_retries = 0 if "dns_test_address" in general_config: opts.dns_test_address = general_config["dns_test_address"] if "nameservers" in general_config: @@ -1652,6 +1659,14 @@ def _main(): type=float, default=2.0, ) + arg_parser.add_argument( + "--dns-retries", + dest="dns_retries", + help="number of times to retry DNS queries on timeout or other " + "transient errors (default: 0)", + type=int, + default=0, + ) arg_parser.add_argument( "--offline", action="store_true", @@ -1704,6 +1719,7 @@ def _main(): silent=args.silent, warnings=args.warnings, dns_timeout=args.dns_timeout, + dns_retries=args.dns_retries, debug=args.debug, verbose=args.verbose, prettify_json=args.prettify_json, @@ -1984,6 +2000,7 @@ def _main(): opts.strip_attachment_payloads, opts.nameservers, opts.dns_timeout, + opts.dns_retries, opts.ip_db_path, opts.offline, opts.always_use_local_files, @@ -2044,6 +2061,7 @@ def _main(): mbox_path, nameservers=opts.nameservers, dns_timeout=opts.dns_timeout, + dns_retries=opts.dns_retries, strip_attachment_payloads=strip, ip_db_path=opts.ip_db_path, always_use_local_files=opts.always_use_local_files, @@ -2189,6 +2207,7 @@ def _main(): test=opts.mailbox_test, strip_attachment_payloads=opts.strip_attachment_payloads, since=opts.mailbox_since, + dns_retries=opts.dns_retries, normalize_timespan_threshold_hours=normalize_timespan_threshold_hours_value, ) @@ -2272,6 +2291,7 @@ def _main(): check_timeout=mailbox_check_timeout_value, nameservers=opts.nameservers, dns_timeout=opts.dns_timeout, + dns_retries=opts.dns_retries, strip_attachment_payloads=opts.strip_attachment_payloads, batch_size=mailbox_batch_size_value, since=opts.mailbox_since, diff --git a/parsedmarc/constants.py b/parsedmarc/constants.py index a9100cd..2058710 100644 --- a/parsedmarc/constants.py +++ b/parsedmarc/constants.py @@ -1,3 +1,12 @@ -__version__ = "9.7.0" +__version__ = "9.7.1" USER_AGENT = f"parsedmarc/{__version__}" + +DEFAULT_DNS_TIMEOUT = 2.0 +DEFAULT_DNS_MAX_RETRIES = 0 +# Recommended mix of public resolvers for cross-provider DNS failover. Not +# applied automatically — callers opt in by passing +# ``nameservers=RECOMMENDED_DNS_NAMESERVERS``. Mixing providers means a single +# operator's anycast outage or authoritative-server incompatibility falls +# through to a different provider within one resolve() call. +RECOMMENDED_DNS_NAMESERVERS = ("1.1.1.1", "8.8.8.8") diff --git a/parsedmarc/utils.py b/parsedmarc/utils.py index da2285a..1df6ea2 100644 --- a/parsedmarc/utils.py +++ b/parsedmarc/utils.py @@ -40,9 +40,22 @@ from dateutil.parser import parse as parse_date import parsedmarc.resources.dbip import parsedmarc.resources.maps -from parsedmarc.constants import USER_AGENT +from parsedmarc.constants import ( + DEFAULT_DNS_MAX_RETRIES, + DEFAULT_DNS_TIMEOUT, + USER_AGENT, +) from parsedmarc.log import logger +# Errors considered transient and retryable by query_dns. LifetimeTimeout is +# dnspython's deadline expiry; NoNameservers typically wraps a SERVFAIL from +# upstream; OSError covers socket-level failures during TCP fallback. +_RETRYABLE_DNS_ERRORS = ( + dns.resolver.LifetimeTimeout, + dns.resolver.NoNameservers, + OSError, +) + parenthesis_regex = re.compile(r"\s*\(.*\)\s*") null_file = open(os.devnull, "w") @@ -189,7 +202,9 @@ def query_dns( *, cache: Optional[ExpiringDict] = None, nameservers: Optional[list[str]] = None, - timeout: float = 2.0, + timeout: float = DEFAULT_DNS_TIMEOUT, + retries: int = DEFAULT_DNS_MAX_RETRIES, + _attempt: int = 0, ) -> list[str]: """ Queries DNS @@ -199,8 +214,21 @@ def query_dns( record_type (str): The record type to query for cache (ExpiringDict): Cache storage nameservers (list): A list of one or more nameservers to use - (Cloudflare's public DNS resolvers by default) - timeout (float): Sets the DNS timeout in seconds + (Cloudflare's public DNS resolvers by default). Pass + ``parsedmarc.constants.RECOMMENDED_DNS_NAMESERVERS`` for a + cross-provider mix that fails over when one provider's path is + slow or broken. + timeout (float): Overall DNS lifetime budget in seconds per + configured nameserver. Per-query UDP attempts are capped at + ``min(1.0, timeout)`` so dnspython retries within the lifetime on + transient UDP packet loss (mirroring ``dig``'s default + ``+tries=3`` behavior); with multiple nameservers configured this + same cap also makes a slow or broken nameserver fall through to + the next quickly. + retries (int): Number of times to retry the whole query after a + timeout or other transient error (``LifetimeTimeout``, + ``NoNameservers``, ``OSError``). Failover between configured + nameservers happens within each attempt. Returns: list: A list of answers @@ -223,12 +251,36 @@ def query_dns( "2606:4700:4700::1001", ] resolver.nameservers = nameservers - resolver.timeout = timeout - resolver.lifetime = timeout + # Cap per-query UDP timeout at 1s so dnspython retries within the + # lifetime window on transient packet loss — otherwise with a single + # nameserver and timeout == lifetime, one dropped UDP datagram consumes + # the whole budget and raises LifetimeTimeout without a retry (dig's + # default +tries=3 masks this case). With multiple nameservers the same + # cap lets a slow/broken one fall through. + resolver.timeout = min(1.0, timeout) + if len(resolver.nameservers) > 1: + resolver.lifetime = timeout * len(resolver.nameservers) + else: + resolver.lifetime = timeout + try: + answers = resolver.resolve(domain, record_type, lifetime=resolver.lifetime) + except _RETRYABLE_DNS_ERRORS as e: + _attempt += 1 + if _attempt > retries: + raise e + return query_dns( + domain, + record_type, + cache=cache, + nameservers=nameservers, + timeout=timeout, + retries=retries, + _attempt=_attempt, + ) records = list( map( lambda r: r.to_text().replace('"', "").rstrip("."), - resolver.resolve(domain, record_type, lifetime=timeout), + answers, ) ) if cache: @@ -242,7 +294,8 @@ def get_reverse_dns( *, cache: Optional[ExpiringDict] = None, nameservers: Optional[list[str]] = None, - timeout: float = 2.0, + timeout: float = DEFAULT_DNS_TIMEOUT, + retries: int = DEFAULT_DNS_MAX_RETRIES, ) -> Optional[str]: """ Resolves an IP address to a hostname using a reverse DNS query @@ -253,6 +306,8 @@ def get_reverse_dns( nameservers (list): A list of one or more nameservers to use (Cloudflare's public DNS resolvers by default) timeout (float): Sets the DNS query timeout in seconds + retries (int): Number of times to retry on timeout or other transient + errors Returns: str: The reverse DNS hostname (if any) @@ -261,7 +316,12 @@ def get_reverse_dns( try: address = dns.reversename.from_address(ip_address) hostname = query_dns( - str(address), "PTR", cache=cache, nameservers=nameservers, timeout=timeout + str(address), + "PTR", + cache=cache, + nameservers=nameservers, + timeout=timeout, + retries=retries, )[0] except dns.exception.DNSException as e: @@ -616,7 +676,8 @@ def get_ip_address_info( reverse_dns_map: Optional[ReverseDNSMap] = None, offline: bool = False, nameservers: Optional[list[str]] = None, - timeout: float = 2.0, + timeout: float = DEFAULT_DNS_TIMEOUT, + retries: int = DEFAULT_DNS_MAX_RETRIES, ) -> IPAddressInfo: """ Returns reverse DNS and country information for the given IP address @@ -633,6 +694,8 @@ def get_ip_address_info( nameservers (list): A list of one or more nameservers to use (Cloudflare's public DNS resolvers by default) timeout (float): Sets the DNS timeout in seconds + retries (int): Number of times to retry on timeout or other transient + errors Returns: dict: ``ip_address``, ``reverse_dns``, ``country`` @@ -660,7 +723,10 @@ def get_ip_address_info( reverse_dns = None else: reverse_dns = get_reverse_dns( - ip_address, nameservers=nameservers, timeout=timeout + ip_address, + nameservers=nameservers, + timeout=timeout, + retries=retries, ) country = get_ip_address_country(ip_address, db_path=ip_db_path) info["country"] = country