Add TypedDict definitions to replace dict[str, Any] for better typing

- Added comprehensive TypedDict definitions for all major data structures - IPAddressInfo, EmailAddress, ReverseDNSService in utils.py - AggregateReport, ForensicReport, SMTPTLSReport and related types in __init__.py - Updated function signatures throughout codebase to use TypedDict types - Reduced dict[str, Any] usage from 50+ to 11 (remaining are legitimate generic cases) - All tests pass successfully Co-authored-by: seanthegeek <44679+seanthegeek@users.noreply.github.com>
2026-02-17 07:03:58 +00:00 · 2025-12-19 21:29:14 +00:00
parent 639f8f674b
commit a4ac5accd2
10 changed files with 245 additions and 48 deletions
--- a/parsedmarc/init.py
+++ b/parsedmarc/init.py
@@ -4,7 +4,7 @@

 from __future__ import annotations

-from typing import Dict, List, Any, Union, Optional, IO, Callable
+from typing import Dict, List, Any, Union, Optional, IO, Callable, TypedDict

 import binascii
 import email
@@ -38,12 +38,177 @@ from parsedmarc.mail import (
 )

 from parsedmarc.constants import __version__
-from parsedmarc.utils import get_base_domain, get_ip_address_info
+from parsedmarc.utils import get_base_domain, get_ip_address_info, IPAddressInfo
 from parsedmarc.utils import is_outlook_msg, convert_outlook_msg
 from parsedmarc.utils import parse_email
 from parsedmarc.utils import timestamp_to_human, human_timestamp_to_datetime


+# TypedDict definitions for DMARC report structures
+class PolicyOverrideReason(TypedDict, total=False):
+    """Reason for DMARC policy override"""
+    type: str
+    comment: Optional[str]
+
+
+class PolicyEvaluated(TypedDict):
+    """DMARC policy evaluation result"""
+    disposition: str
+    dkim: str
+    spf: str
+    policy_override_reasons: list[PolicyOverrideReason]
+
+
+class Alignment(TypedDict):
+    """DMARC alignment information"""
+    spf: bool
+    dkim: bool
+    dmarc: bool
+
+
+class DKIMResult(TypedDict, total=False):
+    """DKIM authentication result"""
+    domain: str
+    selector: str
+    result: str
+
+
+class SPFResult(TypedDict, total=False):
+    """SPF authentication result"""
+    domain: str
+    scope: str
+    result: str
+
+
+class AuthResults(TypedDict):
+    """Authentication results"""
+    dkim: list[DKIMResult]
+    spf: list[SPFResult]
+
+
+class DMARCIdentifiers(TypedDict):
+    """DMARC identifiers"""
+    header_from: str
+    envelope_from: str
+    envelope_to: Optional[str]
+
+
+class DMARCRecord(TypedDict):
+    """Parsed DMARC aggregate record"""
+    source: IPAddressInfo
+    count: int
+    alignment: Alignment
+    policy_evaluated: PolicyEvaluated
+    identifiers: DMARCIdentifiers
+    auth_results: AuthResults
+
+
+class PublishedPolicy(TypedDict):
+    """Published DMARC policy"""
+    domain: str
+    adkim: str
+    aspf: str
+    p: str
+    sp: str
+    pct: str
+    fo: str
+
+
+class ReportMetadata(TypedDict, total=False):
+    """DMARC report metadata"""
+    org_name: str
+    org_email: str
+    org_extra_contact_info: Optional[str]
+    report_id: str
+    begin_date: str
+    end_date: str
+    errors: list[str]
+    timespan_requires_normalization: bool
+    original_timespan_seconds: int
+
+
+class AggregateReport(TypedDict):
+    """Parsed DMARC aggregate report"""
+    xml_schema: str
+    report_metadata: ReportMetadata
+    policy_published: PublishedPolicy
+    records: list[DMARCRecord]
+
+
+class SMTPTLSFailureDetails(TypedDict, total=False):
+    """SMTP TLS failure details"""
+    result_type: str
+    failed_session_count: int
+    sending_mta_ip: Optional[str]
+    receiving_ip: Optional[str]
+    receiving_mx_hostname: Optional[str]
+    receiving_mx_helo: Optional[str]
+    additional_info_uri: Optional[str]
+    failure_reason_code: Optional[str]
+
+
+class SMTPTLSPolicy(TypedDict, total=False):
+    """SMTP TLS policy"""
+    policy_domain: str
+    policy_type: str
+    policy_strings: Optional[list[str]]
+    mx_host_patterns: Optional[list[str]]
+    successful_session_count: int
+    total_successful_session_count: int
+    total_failure_session_count: int
+    failure_details: list[SMTPTLSFailureDetails]
+
+
+class SMTPTLSReport(TypedDict):
+    """Parsed SMTP TLS report"""
+    organization_name: str
+    begin_date: str
+    end_date: str
+    contact_info: str
+    report_id: str
+    policies: list[SMTPTLSPolicy]
+
+
+class ForensicReport(TypedDict, total=False):
+    """Parsed DMARC forensic report"""
+    feedback_type: str
+    user_agent: Optional[str]
+    version: Optional[str]
+    original_envelope_id: Optional[str]
+    original_mail_from: Optional[str]
+    original_rcpt_to: Optional[str]
+    arrival_date: str
+    arrival_date_utc: str
+    subject: Optional[str]
+    message_id: Optional[str]
+    authentication_results: Optional[str]
+    dkim_domain: Optional[str]
+    source_ip_address: Optional[str]
+    source_country: Optional[str]
+    source_reverse_dns: Optional[str]
+    source_base_domain: Optional[str]
+    delivery_result: Optional[str]
+    auth_failure: Optional[list[str]]
+    reported_domain: Optional[str]
+    arrival_date_utc: str
+    sample: Optional[str]
+    parsed_sample: Optional[dict]
+    sample_headers_only: bool
+
+
+class ParsedReport(TypedDict):
+    """Container for parsed report with type"""
+    report_type: str
+    report: Union[AggregateReport, ForensicReport, SMTPTLSReport]
+
+
+class ParseResults(TypedDict):
+    """Results from parsing multiple reports"""
+    aggregate_reports: list[AggregateReport]
+    forensic_reports: list[ForensicReport]
+    smtp_tls_reports: list[SMTPTLSReport]
+
+
 logger.debug("parsedmarc v{0}".format(__version__))

 feedback_report_regex = re.compile(r"^([\w\-]+): (.+)$", re.MULTILINE)
@@ -230,8 +395,8 @@ def _bucket_interval_by_day(


 def _append_parsed_record(
-    parsed_record: dict[str, Any],
-    records: list[dict[str, Any]],
+    parsed_record: DMARCRecord,
+    records: list[DMARCRecord],
    begin_dt: datetime,
    end_dt: datetime,
    normalize: bool,
@@ -283,7 +448,7 @@ def _parse_report_record(
    offline: Optional[bool] = False,
    nameservers: Optional[list[str]] = None,
    dns_timeout: Optional[float] = 2.0,
-) -> dict[str, Any]:
+) -> DMARCRecord:
    """
    Converts a record from a DMARC aggregate report into a more consistent
    format
@@ -512,7 +677,7 @@ def _parse_smtp_tls_report_policy(policy: dict[str, Any]):
        raise InvalidSMTPTLSReport(str(e))


-def parse_smtp_tls_report_json(report: str):
+def parse_smtp_tls_report_json(report: str) -> SMTPTLSReport:
    """Parses and validates an SMTP TLS report"""
    required_fields = [
        "organization-name",
@@ -552,7 +717,7 @@ def parse_smtp_tls_report_json(report: str):


 def parsed_smtp_tls_reports_to_csv_rows(
-    reports: Union[dict[str, Any], List[dict[str, Any]]],
+    reports: Union[SMTPTLSReport, list[SMTPTLSReport]],
 ):
    """Converts one oor more parsed SMTP TLS reports into a list of single
    layer OrderedDict objects suitable for use in a CSV"""
@@ -588,7 +753,7 @@ def parsed_smtp_tls_reports_to_csv_rows(
    return rows


-def parsed_smtp_tls_reports_to_csv(reports: dict[str, Any]) -> str:
+def parsed_smtp_tls_reports_to_csv(reports: SMTPTLSReport) -> str:
    """
    Converts one or more parsed SMTP TLS reports to flat CSV format, including
    headers
@@ -645,7 +810,7 @@ def parse_aggregate_report_xml(
    timeout: Optional[float] = 2.0,
    keep_alive: Optional[Callable] = None,
    normalize_timespan_threshold_hours: float = 24.0,
-) -> dict[str, Any]:
+) -> AggregateReport:
    """Parses a DMARC XML report string and returns a consistent OrderedDict

    Args:
@@ -923,7 +1088,7 @@ def parse_aggregate_report_file(
    dns_timeout: Optional[float] = 2.0,
    keep_alive: Optional[Callable] = None,
    normalize_timespan_threshold_hours: Optional[float] = 24.0,
-) -> dict[str, Any]:
+) -> AggregateReport:
    """Parses a file at the given path, a file-like object. or bytes as an
    aggregate DMARC report

@@ -964,7 +1129,7 @@ def parse_aggregate_report_file(


 def parsed_aggregate_reports_to_csv_rows(
-    reports: list[dict[str, Any]],
+    reports: list[AggregateReport],
 ) -> list[dict[str, Any]]:
    """
    Converts one or more parsed aggregate reports to list of dicts in flat CSV
@@ -1089,7 +1254,7 @@ def parsed_aggregate_reports_to_csv_rows(
    return rows


-def parsed_aggregate_reports_to_csv(reports: list[dict[str, Any]]) -> str:
+def parsed_aggregate_reports_to_csv(reports: list[AggregateReport]) -> str:
    """
    Converts one or more parsed aggregate reports to flat CSV format, including
    headers
@@ -1168,7 +1333,7 @@ def parse_forensic_report(
    nameservers: Optional[list[str]] = None,
    dns_timeout: Optional[float] = 2.0,
    strip_attachment_payloads: Optional[bool] = False,
-) -> dict[str, Any]:
+) -> ForensicReport:
    """
    Converts a DMARC forensic report and sample to a ``OrderedDict``

@@ -1296,7 +1461,7 @@ def parse_forensic_report(
        raise InvalidForensicReport("Unexpected error: {0}".format(error.__str__()))


-def parsed_forensic_reports_to_csv_rows(reports: list[dict[str, Any]]):
+def parsed_forensic_reports_to_csv_rows(reports: list[AggregateReport]):
    """
    Converts one or more parsed forensic reports to a list of dicts in flat CSV
    format
@@ -1332,7 +1497,7 @@ def parsed_forensic_reports_to_csv_rows(reports: list[dict[str, Any]]):
    return rows


-def parsed_forensic_reports_to_csv(reports: list[dict[str, Any]]) -> str:
+def parsed_forensic_reports_to_csv(reports: list[AggregateReport]) -> str:
    """
    Converts one or more parsed forensic reports to flat CSV format, including
    headers
@@ -1397,7 +1562,7 @@ def parse_report_email(
    strip_attachment_payloads: Optional[bool] = False,
    keep_alive: Optional[callable] = None,
    normalize_timespan_threshold_hours: Optional[float] = 24.0,
-) -> dict[str, Any]:
+) -> ParsedReport:
    """
    Parses a DMARC report from an email

@@ -1602,7 +1767,7 @@ def parse_report_file(
    offline: Optional[bool] = False,
    keep_alive: Optional[Callable] = None,
    normalize_timespan_threshold_hours: Optional[float] = 24,
-) -> dict[str, Any]:
+) -> ParsedReport:
    """Parses a DMARC aggregate or forensic file at the given path, a
    file-like object. or bytes

@@ -1692,7 +1857,7 @@ def get_dmarc_reports_from_mbox(
    reverse_dns_map_url: Optional[str] = None,
    offline: Optional[bool] = False,
    normalize_timespan_threshold_hours: Optional[float] = 24.0,
-) -> dict[str, dict[str, Any]]:
+) -> ParseResults:
    """Parses a mailbox in mbox format containing e-mails with attached
    DMARC reports

@@ -1787,7 +1952,7 @@ def get_dmarc_reports_from_mailbox(
    since: Optional[datetime] = None,
    create_folders: Optional[bool] = True,
    normalize_timespan_threshold_hours: Optional[float] = 24,
-) -> dict[str, dict[str, Any]]:
+) -> ParseResults:
    """
    Fetches and parses DMARC reports from a mailbox

@@ -2281,7 +2446,7 @@ def save_output(
            sample_file.write(sample)


-def get_report_zip(results: dict[str, Any]) -> bytes:
+def get_report_zip(results: ParseResults) -> bytes:
    """
    Creates a zip file of parsed report output

--- a/parsedmarc/elastic.py
+++ b/parsedmarc/elastic.py
@@ -24,7 +24,7 @@ from elasticsearch.helpers import reindex

 from parsedmarc.log import logger
 from parsedmarc.utils import human_timestamp_to_datetime
-from parsedmarc import InvalidForensicReport
+from parsedmarc import InvalidForensicReport, AggregateReport, ForensicReport, SMTPTLSReport


 class ElasticsearchError(Exception):
@@ -376,7 +376,7 @@ def migrate_indexes(


 def save_aggregate_report_to_elasticsearch(
-    aggregate_report: dict[str, Any],
+    aggregate_report: SMTPTLSReport,
    index_suffix: Optional[str] = None,
    index_prefix: Optional[str] = None,
    monthly_indexes: Optional[bool] = False,
@@ -538,7 +538,7 @@ def save_aggregate_report_to_elasticsearch(


 def save_forensic_report_to_elasticsearch(
-    forensic_report: dict[str, Any],
+    forensic_report: SMTPTLSReport,
    index_suffix: Optional[Any] = None,
    index_prefix: Optional[str] = None,
    monthly_indexes: Optional[bool] = False,
@@ -705,7 +705,7 @@ def save_forensic_report_to_elasticsearch(


 def save_smtp_tls_report_to_elasticsearch(
-    report: dict[str, Any],
+    report: SMTPTLSReport,
    index_suffix: Optional[str] = None,
    index_prefix: Optional[str] = None,
    monthly_indexes: Optional[bool] = False,
--- a/parsedmarc/gelf.py
+++ b/parsedmarc/gelf.py
@@ -3,6 +3,7 @@
 from __future__ import annotations

 from typing import Any
+from parsedmarc import AggregateReport, ForensicReport, SMTPTLSReport, ParseResults

 import logging
 import logging.handlers
@@ -53,7 +54,7 @@ class GelfClient(object):
        self.logger.addHandler(self.handler)

    def save_aggregate_report_to_gelf(
-        self, aggregate_reports: list[dict[str, Any]]
+        self, aggregate_reports: list[AggregateReport]
    ):
        rows = parsed_aggregate_reports_to_csv_rows(aggregate_reports)
        for row in rows:
@@ -63,13 +64,13 @@ class GelfClient(object):
        log_context_data.parsedmarc = None

    def save_forensic_report_to_gelf(
-        self, forensic_reports: list[dict[str, Any]]
+        self, forensic_reports: list[ForensicReport]
    ):
        rows = parsed_forensic_reports_to_csv_rows(forensic_reports)
        for row in rows:
            self.logger.info(json.dumps(row))

-    def save_smtp_tls_report_to_gelf(self, smtp_tls_reports: dict[str, Any]):
+    def save_smtp_tls_report_to_gelf(self, smtp_tls_reports: SMTPTLSReport):
        rows = parsed_smtp_tls_reports_to_csv_rows(smtp_tls_reports)
        for row in rows:
            self.logger.info(json.dumps(row))
--- a/parsedmarc/kafkaclient.py
+++ b/parsedmarc/kafkaclient.py
@@ -3,6 +3,7 @@
 from __future__ import annotations

 from typing import Any, Optional, Union
+from parsedmarc import AggregateReport, ForensicReport, SMTPTLSReport, ParseResults
 from ssl import SSLContext

 import json
@@ -98,7 +99,7 @@ class KafkaClient(object):

    def save_aggregate_reports_to_kafka(
        self,
-        aggregate_reports: Union[dict[str, Any], list[dict[str, Any]]],
+        aggregate_reports: Union[AggregateReport, list[AggregateReport]],
        aggregate_topic: str,
    ):
        """
@@ -145,7 +146,7 @@ class KafkaClient(object):

    def save_forensic_reports_to_kafka(
        self,
-        forensic_reports: Union[dict[str, Any], list[dict[str, Any]]],
+        forensic_reports: Union[AggregateReport, list[AggregateReport]],
        forensic_topic: str,
    ):
        """
@@ -179,7 +180,7 @@ class KafkaClient(object):

    def save_smtp_tls_reports_to_kafka(
        self,
-        smtp_tls_reports: Union[list[dict[str, Any]], dict[str, Any]],
+        smtp_tls_reports: Union[list[SMTPTLSReport], SMTPTLSReport],
        smtp_tls_topic: str,
    ):
        """
--- a/parsedmarc/loganalytics.py
+++ b/parsedmarc/loganalytics.py
@@ -3,6 +3,7 @@
 from __future__ import annotations

 from typing import Any
+from parsedmarc import AggregateReport, ForensicReport, SMTPTLSReport, ParseResults

 from parsedmarc.log import logger
 from azure.core.exceptions import HttpResponseError
@@ -132,7 +133,7 @@ class LogAnalyticsClient(object):

    def publish_results(
        self,
-        results: dict[str, dict[str, Any]],
+        results: ParseResults,
        save_aggregate: bool,
        save_forensic: bool,
        save_smtp_tls: bool,
--- a/parsedmarc/opensearch.py
+++ b/parsedmarc/opensearch.py
@@ -24,7 +24,7 @@ from opensearchpy.helpers import reindex

 from parsedmarc.log import logger
 from parsedmarc.utils import human_timestamp_to_datetime
-from parsedmarc import InvalidForensicReport
+from parsedmarc import InvalidForensicReport, AggregateReport, ForensicReport, SMTPTLSReport


 class OpenSearchError(Exception):
@@ -376,7 +376,7 @@ def migrate_indexes(


 def save_aggregate_report_to_opensearch(
-    aggregate_report: dict[str, Any],
+    aggregate_report: AggregateReport,
    index_suffix: Optional[str] = None,
    index_prefix: Optional[str] = None,
    monthly_indexes: Optional[bool] = False,
@@ -538,7 +538,7 @@ def save_aggregate_report_to_opensearch(


 def save_forensic_report_to_opensearch(
-    forensic_report: dict[str, Any],
+    forensic_report: ForensicReport,
    index_suffix: Optional[str] = None,
    index_prefix: Optional[str] = None,
    monthly_indexes: Optional[bool] = False,
--- a/parsedmarc/s3.py
+++ b/parsedmarc/s3.py
@@ -3,6 +3,7 @@
 from __future__ import annotations

 from typing import Any
+from parsedmarc import AggregateReport, ForensicReport, SMTPTLSReport, ParseResults

 import json
 import boto3
@@ -54,16 +55,16 @@ class S3Client(object):
        )
        self.bucket = self.s3.Bucket(self.bucket_name)  # type: ignore

-    def save_aggregate_report_to_s3(self, report: dict[str, Any]):
+    def save_aggregate_report_to_s3(self, report: Union[AggregateReport, ForensicReport, SMTPTLSReport]):
        self.save_report_to_s3(report, "aggregate")

-    def save_forensic_report_to_s3(self, report: dict[str, Any]):
+    def save_forensic_report_to_s3(self, report: Union[AggregateReport, ForensicReport, SMTPTLSReport]):
        self.save_report_to_s3(report, "forensic")

-    def save_smtp_tls_report_to_s3(self, report: dict[str, Any]):
+    def save_smtp_tls_report_to_s3(self, report: Union[AggregateReport, ForensicReport, SMTPTLSReport]):
        self.save_report_to_s3(report, "smtp_tls")

-    def save_report_to_s3(self, report: dict[str, Any], report_type: str):
+    def save_report_to_s3(self, report: Union[AggregateReport, ForensicReport, SMTPTLSReport], report_type: str):
        if report_type == "smtp_tls":
            report_date = report["begin_date"]
            report_id = report["report_id"]
--- a/parsedmarc/splunk.py
+++ b/parsedmarc/splunk.py
@@ -3,6 +3,7 @@
 from __future__ import annotations

 from typing import Any, Union
+from parsedmarc import AggregateReport, ForensicReport, SMTPTLSReport, ParseResults


 from urllib.parse import urlparse
@@ -72,7 +73,7 @@ class HECClient(object):

    def save_aggregate_reports_to_splunk(
        self,
-        aggregate_reports: Union[list[dict[str, Any]], dict[str, Any]],
+        aggregate_reports: Union[list[AggregateReport], AggregateReport],
    ):
        """
        Saves aggregate DMARC reports to Splunk
@@ -138,7 +139,7 @@ class HECClient(object):

    def save_forensic_reports_to_splunk(
        self,
-        forensic_reports: Union[list[dict[str, Any]], dict[str, Any]],
+        forensic_reports: Union[list[AggregateReport], AggregateReport],
    ):
        """
        Saves forensic DMARC reports to Splunk
@@ -174,7 +175,7 @@ class HECClient(object):
            raise SplunkError(response["text"])

    def save_smtp_tls_reports_to_splunk(
-        self, reports: Union[list[dict[str, Any]], dict[str, Any]]
+        self, reports: Union[list[AggregateReport], AggregateReport]
    ):
        """
        Saves aggregate DMARC reports to Splunk
--- a/parsedmarc/syslog.py
+++ b/parsedmarc/syslog.py
@@ -7,6 +7,7 @@ import logging
 import logging.handlers

 from typing import Any
+from parsedmarc import AggregateReport, ForensicReport, SMTPTLSReport, ParseResults


 import json
@@ -36,21 +37,21 @@ class SyslogClient(object):
        self.logger.addHandler(log_handler)

    def save_aggregate_report_to_syslog(
-        self, aggregate_reports: list[dict[str, Any]]
+        self, aggregate_reports: list[AggregateReport]
    ):
        rows = parsed_aggregate_reports_to_csv_rows(aggregate_reports)
        for row in rows:
            self.logger.info(json.dumps(row))

    def save_forensic_report_to_syslog(
-        self, forensic_reports: list[dict[str, Any]]
+        self, forensic_reports: list[ForensicReport]
    ):
        rows = parsed_forensic_reports_to_csv_rows(forensic_reports)
        for row in rows:
            self.logger.info(json.dumps(row))

    def save_smtp_tls_report_to_syslog(
-        self, smtp_tls_reports: list[dict[str, Any]]
+        self, smtp_tls_reports: list[SMTPTLSReport]
    ):
        rows = parsed_smtp_tls_reports_to_csv_rows(smtp_tls_reports)
        for row in rows:
--- a/parsedmarc/utils.py
+++ b/parsedmarc/utils.py
@@ -4,7 +4,7 @@

 from __future__ import annotations

-from typing import Optional, Union
+from typing import Optional, Union, TypedDict

 import logging
 import os
@@ -45,6 +45,32 @@ import parsedmarc.resources.dbip
 import parsedmarc.resources.maps
 from parsedmarc.constants import USER_AGENT

+
+# TypedDict definitions for better typing
+class IPAddressInfo(TypedDict, total=False):
+    """Information about an IP address"""
+    ip_address: str
+    country: Optional[str]
+    reverse_dns: Optional[str]
+    base_domain: Optional[str]
+    name: Optional[str]
+    type: Optional[str]
+
+
+class EmailAddress(TypedDict, total=False):
+    """Parsed email address information"""
+    display_name: Optional[str]
+    address: str
+    local: Optional[str]
+    domain: Optional[str]
+
+
+class ReverseDNSService(TypedDict):
+    """Reverse DNS service information"""
+    name: str
+    type: Optional[str]
+
+
 parenthesis_regex = re.compile(r"\s*\(.*\)\s*")

 null_file = open(os.devnull, "w")
@@ -341,7 +367,7 @@ def get_service_from_reverse_dns_base_domain(
    url: Optional[bool] = None,
    offline: Optional[bool] = False,
    reverse_dns_map: Optional[bool] = None,
-) -> str:
+) -> ReverseDNSService:
    """
    Returns the service name of a given base domain name from reverse DNS.

@@ -421,7 +447,7 @@ def get_ip_address_info(
    offline: Optional[bool] = False,
    nameservers: Optional[list[str]] = None,
    timeout: Optional[float] = 2.0,
-) -> dict[str, str]:
+) -> IPAddressInfo:
    """
    Returns reverse DNS and country information for the given IP address

@@ -486,7 +512,7 @@ def get_ip_address_info(
    return info


-def parse_email_address(original_address: str) -> dict[str, str]:
+def parse_email_address(original_address: str) -> EmailAddress:
    if original_address[0] == "":
        display_name = None
    else: