mirror of
https://github.com/domainaware/parsedmarc.git
synced 2026-02-18 07:26:25 +00:00
Compare commits
6 Commits
9.0.10
...
copilot/re
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
2174f23eb5 | ||
|
|
febbb107c4 | ||
|
|
9a64b494e7 | ||
|
|
e93209c766 | ||
|
|
d1c22466be | ||
|
|
3d1b2522d3 |
@@ -4,7 +4,7 @@
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from typing import Dict, List, Any, Union, Optional, IO, Callable
|
||||
from typing import Any, Union, Optional, IO, Callable, TypedDict, NotRequired
|
||||
|
||||
import binascii
|
||||
import email
|
||||
@@ -19,7 +19,6 @@ import xml.parsers.expat as expat
|
||||
import zipfile
|
||||
import zlib
|
||||
from base64 import b64decode
|
||||
from collections import OrderedDict
|
||||
from csv import DictWriter
|
||||
from datetime import datetime, timedelta, timezone, tzinfo
|
||||
from io import BytesIO, StringIO
|
||||
@@ -93,11 +92,188 @@ class InvalidForensicReport(InvalidDMARCReport):
|
||||
"""Raised when an invalid DMARC forensic report is encountered"""
|
||||
|
||||
|
||||
# TypedDict definitions for structured data
|
||||
class DateIntervalBucket(TypedDict):
|
||||
"""Represents a time bucket for interval normalization"""
|
||||
begin: datetime
|
||||
end: datetime
|
||||
count: int
|
||||
|
||||
|
||||
class IPAddressInfo(TypedDict, total=False):
|
||||
"""Information about an IP address"""
|
||||
ip_address: str
|
||||
country: NotRequired[Optional[str]]
|
||||
reverse_dns: NotRequired[Optional[str]]
|
||||
base_domain: NotRequired[Optional[str]]
|
||||
type: NotRequired[Optional[str]]
|
||||
name: NotRequired[Optional[str]]
|
||||
|
||||
|
||||
class AlignmentInfo(TypedDict):
|
||||
"""DMARC alignment information"""
|
||||
spf: bool
|
||||
dkim: bool
|
||||
dmarc: bool
|
||||
|
||||
|
||||
class PolicyOverrideReason(TypedDict, total=False):
|
||||
"""DMARC policy override reason"""
|
||||
type: NotRequired[str]
|
||||
comment: NotRequired[Optional[str]]
|
||||
|
||||
|
||||
class PolicyEvaluated(TypedDict):
|
||||
"""DMARC policy evaluation result"""
|
||||
disposition: str
|
||||
dkim: str
|
||||
spf: str
|
||||
policy_override_reasons: list[PolicyOverrideReason]
|
||||
|
||||
|
||||
class DKIMAuthResult(TypedDict):
|
||||
"""DKIM authentication result"""
|
||||
domain: str
|
||||
selector: str
|
||||
result: str
|
||||
|
||||
|
||||
class SPFAuthResult(TypedDict):
|
||||
"""SPF authentication result"""
|
||||
domain: str
|
||||
scope: str
|
||||
result: str
|
||||
|
||||
|
||||
class AuthResults(TypedDict):
|
||||
"""Authentication results for DKIM and SPF"""
|
||||
dkim: list[DKIMAuthResult]
|
||||
spf: list[SPFAuthResult]
|
||||
|
||||
|
||||
class Identifiers(TypedDict):
|
||||
"""Message identifiers"""
|
||||
header_from: str
|
||||
envelope_from: Optional[str]
|
||||
envelope_to: Optional[str]
|
||||
|
||||
|
||||
class ParsedReportRecord(TypedDict):
|
||||
"""A parsed DMARC aggregate report record"""
|
||||
source: IPAddressInfo
|
||||
count: int
|
||||
alignment: AlignmentInfo
|
||||
policy_evaluated: PolicyEvaluated
|
||||
identifiers: Identifiers
|
||||
auth_results: AuthResults
|
||||
|
||||
|
||||
class ParsedReportRecordWithMetadata(ParsedReportRecord, total=False):
|
||||
"""A parsed DMARC report record with normalization metadata"""
|
||||
normalized_timespan: bool
|
||||
interval_begin: NotRequired[str]
|
||||
interval_end: NotRequired[str]
|
||||
|
||||
|
||||
class ReportMetadata(TypedDict, total=False):
|
||||
"""DMARC report metadata"""
|
||||
org_name: str
|
||||
org_email: NotRequired[Optional[str]]
|
||||
org_extra_contact_info: NotRequired[Optional[str]]
|
||||
report_id: str
|
||||
begin_date: str
|
||||
end_date: str
|
||||
errors: NotRequired[list[str]]
|
||||
|
||||
|
||||
class PolicyPublished(TypedDict, total=False):
|
||||
"""DMARC policy as published in DNS"""
|
||||
domain: str
|
||||
adkim: NotRequired[str]
|
||||
aspf: NotRequired[str]
|
||||
p: str
|
||||
sp: NotRequired[str]
|
||||
pct: NotRequired[str]
|
||||
fo: NotRequired[str]
|
||||
|
||||
|
||||
class ParsedAggregateReport(TypedDict):
|
||||
"""A complete parsed DMARC aggregate report"""
|
||||
xml_schema: str
|
||||
report_metadata: ReportMetadata
|
||||
policy_published: PolicyPublished
|
||||
records: list[ParsedReportRecord]
|
||||
|
||||
|
||||
class SMTPTLSFailureDetails(TypedDict):
|
||||
"""SMTP TLS failure details"""
|
||||
result_type: str
|
||||
sending_mta_ip: NotRequired[Optional[str]]
|
||||
receiving_mx_hostname: NotRequired[Optional[str]]
|
||||
receiving_mx_helo: NotRequired[Optional[str]]
|
||||
receiving_ip: NotRequired[Optional[str]]
|
||||
failed_session_count: int
|
||||
additional_information: NotRequired[Optional[str]]
|
||||
failure_reason_code: NotRequired[Optional[str]]
|
||||
|
||||
|
||||
class SMTPTLSPolicy(TypedDict, total=False):
|
||||
"""SMTP TLS policy information"""
|
||||
policy_type: str
|
||||
policy_domain: str
|
||||
policy_strings: NotRequired[list[str]]
|
||||
mx_host_patterns: NotRequired[list[str]]
|
||||
successful_session_count: int
|
||||
failed_session_count: int
|
||||
failure_details: NotRequired[list[SMTPTLSFailureDetails]]
|
||||
|
||||
|
||||
class ParsedSMTPTLSReport(TypedDict):
|
||||
"""A complete parsed SMTP TLS report"""
|
||||
organization_name: str
|
||||
begin_date: str
|
||||
end_date: str
|
||||
contact_info: str
|
||||
report_id: str
|
||||
policies: list[SMTPTLSPolicy]
|
||||
|
||||
|
||||
class ParsedForensicReport(TypedDict, total=False):
|
||||
"""A parsed DMARC forensic report"""
|
||||
feedback_type: str
|
||||
user_agent: NotRequired[Optional[str]]
|
||||
version: NotRequired[Optional[str]]
|
||||
original_envelope_id: NotRequired[Optional[str]]
|
||||
original_mail_from: NotRequired[Optional[str]]
|
||||
original_rcpt_to: NotRequired[Optional[str]]
|
||||
arrival_date: str
|
||||
arrival_date_utc: str
|
||||
subject: NotRequired[str]
|
||||
message_id: str
|
||||
authentication_results: str
|
||||
delivery_result: str
|
||||
auth_failure: list[str]
|
||||
reported_domain: str
|
||||
arrival_date_utc: str
|
||||
source: IPAddressInfo
|
||||
authentication_mechanisms: list[str]
|
||||
dkim_domain: NotRequired[Optional[str]]
|
||||
sample_headers_only: bool
|
||||
sample: NotRequired[str]
|
||||
parsed_sample: NotRequired[dict[str, Any]]
|
||||
|
||||
|
||||
class ReportTypeWrapper(TypedDict):
|
||||
"""Wrapper for report type identification"""
|
||||
report_type: str
|
||||
report: Union[ParsedAggregateReport, ParsedForensicReport, ParsedSMTPTLSReport]
|
||||
|
||||
|
||||
def _bucket_interval_by_day(
|
||||
begin: datetime,
|
||||
end: datetime,
|
||||
total_count: int,
|
||||
) -> List[Dict[str, Any]]:
|
||||
) -> list[DateIntervalBucket]:
|
||||
"""
|
||||
Split the interval [begin, end) into daily buckets and distribute
|
||||
`total_count` proportionally across those buckets.
|
||||
@@ -159,7 +335,7 @@ def _bucket_interval_by_day(
|
||||
if day_cursor > begin:
|
||||
day_cursor -= timedelta(days=1)
|
||||
|
||||
day_buckets: List[Dict[str, Any]] = []
|
||||
day_buckets: list[dict[str, Any]] = []
|
||||
|
||||
while day_cursor < end:
|
||||
day_start = day_cursor
|
||||
@@ -191,12 +367,12 @@ def _bucket_interval_by_day(
|
||||
# Then apply a "largest remainder" rounding strategy to ensure the sum
|
||||
# equals exactly total_count.
|
||||
|
||||
exact_values: List[float] = [
|
||||
exact_values: list[float] = [
|
||||
(b["seconds"] / interval_seconds) * total_count for b in day_buckets
|
||||
]
|
||||
|
||||
floor_values: List[int] = [int(x) for x in exact_values]
|
||||
fractional_parts: List[float] = [x - int(x) for x in exact_values]
|
||||
floor_values: list[int] = [int(x) for x in exact_values]
|
||||
fractional_parts: list[float] = [x - int(x) for x in exact_values]
|
||||
|
||||
# How many counts do we still need to distribute after flooring?
|
||||
remainder = total_count - sum(floor_values)
|
||||
@@ -216,7 +392,7 @@ def _bucket_interval_by_day(
|
||||
final_counts[idx] += 1
|
||||
|
||||
# --- Step 3: Build the final per-day result list -------------------------
|
||||
results: List[Dict[str, Any]] = []
|
||||
results: list[DateIntervalBucket] = []
|
||||
for bucket, count in zip(day_buckets, final_counts):
|
||||
if count > 0:
|
||||
results.append(
|
||||
@@ -231,8 +407,8 @@ def _bucket_interval_by_day(
|
||||
|
||||
|
||||
def _append_parsed_record(
|
||||
parsed_record: OrderedDict[str, Any],
|
||||
records: List[OrderedDict[str, Any]],
|
||||
parsed_record: dict[str, Any],
|
||||
records: list[dict[str, Any]],
|
||||
begin_dt: datetime,
|
||||
end_dt: datetime,
|
||||
normalize: bool,
|
||||
@@ -275,7 +451,7 @@ def _append_parsed_record(
|
||||
|
||||
|
||||
def _parse_report_record(
|
||||
record: OrderedDict,
|
||||
record: dict[str, Any],
|
||||
*,
|
||||
ip_db_path: Optional[str] = None,
|
||||
always_use_local_files: Optional[bool] = False,
|
||||
@@ -284,13 +460,13 @@ def _parse_report_record(
|
||||
offline: Optional[bool] = False,
|
||||
nameservers: Optional[list[str]] = None,
|
||||
dns_timeout: Optional[float] = 2.0,
|
||||
) -> OrderedDict[str, Any]:
|
||||
) -> dict[str, Any]:
|
||||
"""
|
||||
Converts a record from a DMARC aggregate report into a more consistent
|
||||
format
|
||||
|
||||
Args:
|
||||
record (OrderedDict): The record to convert
|
||||
record (dict): The record to convert
|
||||
always_use_local_files (bool): Do not download files
|
||||
reverse_dns_map_path (str): Path to a reverse DNS map file
|
||||
reverse_dns_map_url (str): URL to a reverse DNS map file
|
||||
@@ -301,10 +477,10 @@ def _parse_report_record(
|
||||
dns_timeout (float): Sets the DNS timeout in seconds
|
||||
|
||||
Returns:
|
||||
OrderedDict: The converted record
|
||||
dict: The converted record
|
||||
"""
|
||||
record = record.copy()
|
||||
new_record = OrderedDict()
|
||||
new_record: dict[str, Any] = {}
|
||||
if record["row"]["source_ip"] is None:
|
||||
raise ValueError("Source IP address is empty")
|
||||
new_record_source = get_ip_address_info(
|
||||
@@ -322,14 +498,12 @@ def _parse_report_record(
|
||||
new_record["source"] = new_record_source
|
||||
new_record["count"] = int(record["row"]["count"])
|
||||
policy_evaluated = record["row"]["policy_evaluated"].copy()
|
||||
new_policy_evaluated = OrderedDict(
|
||||
[
|
||||
("disposition", "none"),
|
||||
("dkim", "fail"),
|
||||
("spf", "fail"),
|
||||
("policy_override_reasons", []),
|
||||
]
|
||||
)
|
||||
new_policy_evaluated: dict[str, Any] = {
|
||||
"disposition": "none",
|
||||
"dkim": "fail",
|
||||
"spf": "fail",
|
||||
"policy_override_reasons": [],
|
||||
}
|
||||
if "disposition" in policy_evaluated:
|
||||
new_policy_evaluated["disposition"] = policy_evaluated["disposition"]
|
||||
if new_policy_evaluated["disposition"].strip().lower() == "pass":
|
||||
@@ -366,7 +540,7 @@ def _parse_report_record(
|
||||
new_record["identifiers"] = record["identities"].copy()
|
||||
else:
|
||||
new_record["identifiers"] = record["identifiers"].copy()
|
||||
new_record["auth_results"] = OrderedDict([("dkim", []), ("spf", [])])
|
||||
new_record["auth_results"] = {"dkim": [], "spf": []}
|
||||
if type(new_record["identifiers"]["header_from"]) is str:
|
||||
lowered_from = new_record["identifiers"]["header_from"].lower()
|
||||
else:
|
||||
@@ -385,7 +559,7 @@ def _parse_report_record(
|
||||
auth_results["dkim"] = [auth_results["dkim"]]
|
||||
for result in auth_results["dkim"]:
|
||||
if "domain" in result and result["domain"] is not None:
|
||||
new_result = OrderedDict([("domain", result["domain"])])
|
||||
new_result: dict[str, str] = {"domain": result["domain"]}
|
||||
if "selector" in result and result["selector"] is not None:
|
||||
new_result["selector"] = result["selector"]
|
||||
else:
|
||||
@@ -400,16 +574,16 @@ def _parse_report_record(
|
||||
auth_results["spf"] = [auth_results["spf"]]
|
||||
for result in auth_results["spf"]:
|
||||
if "domain" in result and result["domain"] is not None:
|
||||
new_result = OrderedDict([("domain", result["domain"])])
|
||||
new_spf_result: dict[str, str] = {"domain": result["domain"]}
|
||||
if "scope" in result and result["scope"] is not None:
|
||||
new_result["scope"] = result["scope"]
|
||||
new_spf_result["scope"] = result["scope"]
|
||||
else:
|
||||
new_result["scope"] = "mfrom"
|
||||
new_spf_result["scope"] = "mfrom"
|
||||
if "result" in result and result["result"] is not None:
|
||||
new_result["result"] = result["result"]
|
||||
new_spf_result["result"] = result["result"]
|
||||
else:
|
||||
new_result["result"] = "none"
|
||||
new_record["auth_results"]["spf"].append(new_result)
|
||||
new_spf_result["result"] = "none"
|
||||
new_record["auth_results"]["spf"].append(new_spf_result)
|
||||
|
||||
if "envelope_from" not in new_record["identifiers"]:
|
||||
envelope_from = None
|
||||
@@ -438,12 +612,12 @@ def _parse_report_record(
|
||||
return new_record
|
||||
|
||||
|
||||
def _parse_smtp_tls_failure_details(failure_details: dict[str, Any]):
|
||||
def _parse_smtp_tls_failure_details(failure_details: dict[str, Any]) -> SMTPTLSFailureDetails:
|
||||
try:
|
||||
new_failure_details = OrderedDict(
|
||||
result_type=failure_details["result-type"],
|
||||
failed_session_count=failure_details["failed-session-count"],
|
||||
)
|
||||
new_failure_details: dict[str, Any] = {
|
||||
"result_type": failure_details["result-type"],
|
||||
"failed_session_count": failure_details["failed-session-count"],
|
||||
}
|
||||
|
||||
if "sending-mta-ip" in failure_details:
|
||||
new_failure_details["sending_mta_ip"] = failure_details["sending-mta-ip"]
|
||||
@@ -474,7 +648,7 @@ def _parse_smtp_tls_failure_details(failure_details: dict[str, Any]):
|
||||
raise InvalidSMTPTLSReport(str(e))
|
||||
|
||||
|
||||
def _parse_smtp_tls_report_policy(policy: OrderedDict[str, Any]):
|
||||
def _parse_smtp_tls_report_policy(policy: dict[str, Any]) -> SMTPTLSPolicy:
|
||||
policy_types = ["tlsa", "sts", "no-policy-found"]
|
||||
try:
|
||||
policy_domain = policy["policy"]["policy-domain"]
|
||||
@@ -482,7 +656,10 @@ def _parse_smtp_tls_report_policy(policy: OrderedDict[str, Any]):
|
||||
failure_details = []
|
||||
if policy_type not in policy_types:
|
||||
raise InvalidSMTPTLSReport(f"Invalid policy type {policy_type}")
|
||||
new_policy = OrderedDict(policy_domain=policy_domain, policy_type=policy_type)
|
||||
new_policy: dict[str, Any] = {
|
||||
"policy_domain": policy_domain,
|
||||
"policy_type": policy_type,
|
||||
}
|
||||
if "policy-string" in policy["policy"]:
|
||||
if isinstance(policy["policy"]["policy-string"], list):
|
||||
if len(policy["policy"]["policy-string"]) > 0:
|
||||
@@ -511,7 +688,7 @@ def _parse_smtp_tls_report_policy(policy: OrderedDict[str, Any]):
|
||||
raise InvalidSMTPTLSReport(str(e))
|
||||
|
||||
|
||||
def parse_smtp_tls_report_json(report: str):
|
||||
def parse_smtp_tls_report_json(report: str) -> ParsedSMTPTLSReport:
|
||||
"""Parses and validates an SMTP TLS report"""
|
||||
required_fields = [
|
||||
"organization-name",
|
||||
@@ -533,39 +710,39 @@ def parse_smtp_tls_report_json(report: str):
|
||||
for policy in report_dict["policies"]:
|
||||
policies.append(_parse_smtp_tls_report_policy(policy))
|
||||
|
||||
new_report = OrderedDict(
|
||||
organization_name=report_dict["organization-name"],
|
||||
begin_date=report_dict["date-range"]["start-datetime"],
|
||||
end_date=report_dict["date-range"]["end-datetime"],
|
||||
contact_info=report_dict["contact-info"],
|
||||
report_id=report_dict["report-id"],
|
||||
policies=policies,
|
||||
)
|
||||
new_report: dict[str, Any] = {
|
||||
"organization_name": report_dict["organization-name"],
|
||||
"begin_date": report_dict["date-range"]["start-datetime"],
|
||||
"end_date": report_dict["date-range"]["end-datetime"],
|
||||
"contact_info": report_dict["contact-info"],
|
||||
"report_id": report_dict["report-id"],
|
||||
"policies": policies,
|
||||
}
|
||||
|
||||
return new_report
|
||||
|
||||
except KeyError as e:
|
||||
InvalidSMTPTLSReport(f"Missing required field: {e}")
|
||||
raise InvalidSMTPTLSReport(f"Missing required field: {e}")
|
||||
except Exception as e:
|
||||
raise InvalidSMTPTLSReport(str(e))
|
||||
|
||||
|
||||
def parsed_smtp_tls_reports_to_csv_rows(
|
||||
reports: Union[OrderedDict[str, Any], List[OrderedDict[str, Any]]],
|
||||
):
|
||||
"""Converts one oor more parsed SMTP TLS reports into a list of single
|
||||
layer OrderedDict objects suitable for use in a CSV"""
|
||||
if type(reports) is OrderedDict:
|
||||
reports: Union[ParsedSMTPTLSReport, list[ParsedSMTPTLSReport]],
|
||||
) -> list[dict[str, Any]]:
|
||||
"""Converts one or more parsed SMTP TLS reports into a list of single
|
||||
layer dict objects suitable for use in a CSV"""
|
||||
if type(reports) is dict:
|
||||
reports = [reports]
|
||||
|
||||
rows = []
|
||||
for report in reports:
|
||||
common_fields = OrderedDict(
|
||||
organization_name=report["organization_name"],
|
||||
begin_date=report["begin_date"],
|
||||
end_date=report["end_date"],
|
||||
report_id=report["report_id"],
|
||||
)
|
||||
common_fields: dict[str, Any] = {
|
||||
"organization_name": report["organization_name"],
|
||||
"begin_date": report["begin_date"],
|
||||
"end_date": report["end_date"],
|
||||
"report_id": report["report_id"],
|
||||
}
|
||||
record = common_fields.copy()
|
||||
for policy in report["policies"]:
|
||||
if "policy_strings" in policy:
|
||||
@@ -587,7 +764,7 @@ def parsed_smtp_tls_reports_to_csv_rows(
|
||||
return rows
|
||||
|
||||
|
||||
def parsed_smtp_tls_reports_to_csv(reports: OrderedDict[str, Any]) -> str:
|
||||
def parsed_smtp_tls_reports_to_csv(reports: ParsedSMTPTLSReport) -> str:
|
||||
"""
|
||||
Converts one or more parsed SMTP TLS reports to flat CSV format, including
|
||||
headers
|
||||
@@ -644,8 +821,8 @@ def parse_aggregate_report_xml(
|
||||
timeout: Optional[float] = 2.0,
|
||||
keep_alive: Optional[Callable] = None,
|
||||
normalize_timespan_threshold_hours: float = 24.0,
|
||||
) -> OrderedDict[str, Any]:
|
||||
"""Parses a DMARC XML report string and returns a consistent OrderedDict
|
||||
) -> ParsedAggregateReport:
|
||||
"""Parses a DMARC XML report string and returns a consistent dict
|
||||
|
||||
Args:
|
||||
xml (str): A string of DMARC aggregate report XML
|
||||
@@ -661,7 +838,7 @@ def parse_aggregate_report_xml(
|
||||
normalize_timespan_threshold_hours (float): Normalize timespans beyond this
|
||||
|
||||
Returns:
|
||||
OrderedDict: The parsed aggregate DMARC report
|
||||
dict: The parsed aggregate DMARC report
|
||||
"""
|
||||
errors = []
|
||||
# Parse XML and recover from errors
|
||||
@@ -693,8 +870,8 @@ def parse_aggregate_report_xml(
|
||||
schema = "draft"
|
||||
if "version" in report:
|
||||
schema = report["version"]
|
||||
new_report = OrderedDict([("xml_schema", schema)])
|
||||
new_report_metadata = OrderedDict()
|
||||
new_report: dict[str, Any] = {"xml_schema": schema}
|
||||
new_report_metadata: dict[str, Any] = {}
|
||||
if report_metadata["org_name"] is None:
|
||||
if report_metadata["email"] is not None:
|
||||
report_metadata["org_name"] = report_metadata["email"].split("@")[-1]
|
||||
@@ -755,7 +932,7 @@ def parse_aggregate_report_xml(
|
||||
policy_published = report["policy_published"]
|
||||
if type(policy_published) is list:
|
||||
policy_published = policy_published[0]
|
||||
new_policy_published = OrderedDict()
|
||||
new_policy_published: dict[str, Any] = {}
|
||||
new_policy_published["domain"] = policy_published["domain"]
|
||||
adkim = "r"
|
||||
if "adkim" in policy_published:
|
||||
@@ -860,14 +1037,14 @@ def extract_report(content: Union[bytes, str, IO[Any]]) -> str:
|
||||
str: The extracted text
|
||||
|
||||
"""
|
||||
file_object = None
|
||||
file_object: Union[BytesIO, IO[Any]]
|
||||
try:
|
||||
if isinstance(content, str):
|
||||
try:
|
||||
file_object = BytesIO(b64decode(content))
|
||||
except binascii.Error:
|
||||
return content
|
||||
elif type(content) is bytes:
|
||||
elif isinstance(content, bytes):
|
||||
file_object = BytesIO(content)
|
||||
else:
|
||||
file_object = content
|
||||
@@ -922,12 +1099,12 @@ def parse_aggregate_report_file(
|
||||
dns_timeout: Optional[float] = 2.0,
|
||||
keep_alive: Optional[Callable] = None,
|
||||
normalize_timespan_threshold_hours: Optional[float] = 24.0,
|
||||
) -> OrderedDict[str, any]:
|
||||
) -> ParsedAggregateReport:
|
||||
"""Parses a file at the given path, a file-like object. or bytes as an
|
||||
aggregate DMARC report
|
||||
|
||||
Args:
|
||||
_input (str | bytes | IO): A path to a file, a file like object, or bytes
|
||||
_input (Union[str, bytes, IO]): A path to a file, a file like object, or bytes
|
||||
offline (bool): Do not query online for geolocation or DNS
|
||||
always_use_local_files (bool): Do not download files
|
||||
reverse_dns_map_path (str): Path to a reverse DNS map file
|
||||
@@ -940,7 +1117,7 @@ def parse_aggregate_report_file(
|
||||
normalize_timespan_threshold_hours (float): Normalize timespans beyond this
|
||||
|
||||
Returns:
|
||||
OrderedDict: The parsed DMARC aggregate report
|
||||
dict: The parsed DMARC aggregate report
|
||||
"""
|
||||
|
||||
try:
|
||||
@@ -963,7 +1140,7 @@ def parse_aggregate_report_file(
|
||||
|
||||
|
||||
def parsed_aggregate_reports_to_csv_rows(
|
||||
reports: list[OrderedDict[str, Any]],
|
||||
reports: list[ParsedAggregateReport],
|
||||
) -> list[dict[str, Any]]:
|
||||
"""
|
||||
Converts one or more parsed aggregate reports to list of dicts in flat CSV
|
||||
@@ -980,7 +1157,7 @@ def parsed_aggregate_reports_to_csv_rows(
|
||||
def to_str(obj):
|
||||
return str(obj).lower()
|
||||
|
||||
if type(reports) is OrderedDict:
|
||||
if type(reports) is dict:
|
||||
reports = [reports]
|
||||
|
||||
rows = []
|
||||
@@ -1088,7 +1265,7 @@ def parsed_aggregate_reports_to_csv_rows(
|
||||
return rows
|
||||
|
||||
|
||||
def parsed_aggregate_reports_to_csv(reports: list[OrderedDict[str, Any]]) -> str:
|
||||
def parsed_aggregate_reports_to_csv(reports: list[ParsedAggregateReport]) -> str:
|
||||
"""
|
||||
Converts one or more parsed aggregate reports to flat CSV format, including
|
||||
headers
|
||||
@@ -1167,9 +1344,9 @@ def parse_forensic_report(
|
||||
nameservers: Optional[list[str]] = None,
|
||||
dns_timeout: Optional[float] = 2.0,
|
||||
strip_attachment_payloads: Optional[bool] = False,
|
||||
) -> OrderedDict[str, Any]:
|
||||
) -> ParsedForensicReport:
|
||||
"""
|
||||
Converts a DMARC forensic report and sample to a ``OrderedDict``
|
||||
Converts a DMARC forensic report and sample to a ``dict``
|
||||
|
||||
Args:
|
||||
feedback_report (str): A message's feedback report as a string
|
||||
@@ -1187,12 +1364,12 @@ def parse_forensic_report(
|
||||
forensic report results
|
||||
|
||||
Returns:
|
||||
OrderedDict: A parsed report and sample
|
||||
dict: A parsed report and sample
|
||||
"""
|
||||
delivery_results = ["delivered", "spam", "policy", "reject", "other"]
|
||||
|
||||
try:
|
||||
parsed_report = OrderedDict()
|
||||
parsed_report: dict[str, Any] = {}
|
||||
report_values = feedback_report_regex.findall(feedback_report)
|
||||
for report_value in report_values:
|
||||
key = report_value[0].lower().replace("-", "_")
|
||||
@@ -1295,7 +1472,7 @@ def parse_forensic_report(
|
||||
raise InvalidForensicReport("Unexpected error: {0}".format(error.__str__()))
|
||||
|
||||
|
||||
def parsed_forensic_reports_to_csv_rows(reports: list[OrderedDict[str, Any]]):
|
||||
def parsed_forensic_reports_to_csv_rows(reports: list[ParsedForensicReport]):
|
||||
"""
|
||||
Converts one or more parsed forensic reports to a list of dicts in flat CSV
|
||||
format
|
||||
@@ -1306,7 +1483,7 @@ def parsed_forensic_reports_to_csv_rows(reports: list[OrderedDict[str, Any]]):
|
||||
Returns:
|
||||
list: Parsed forensic report data as a list of dicts in flat CSV format
|
||||
"""
|
||||
if type(reports) is OrderedDict:
|
||||
if type(reports) is dict:
|
||||
reports = [reports]
|
||||
|
||||
rows = []
|
||||
@@ -1331,7 +1508,7 @@ def parsed_forensic_reports_to_csv_rows(reports: list[OrderedDict[str, Any]]):
|
||||
return rows
|
||||
|
||||
|
||||
def parsed_forensic_reports_to_csv(reports: list[dict[str, Any]]) -> str:
|
||||
def parsed_forensic_reports_to_csv(reports: list[ParsedForensicReport]) -> str:
|
||||
"""
|
||||
Converts one or more parsed forensic reports to flat CSV format, including
|
||||
headers
|
||||
@@ -1396,7 +1573,7 @@ def parse_report_email(
|
||||
strip_attachment_payloads: Optional[bool] = False,
|
||||
keep_alive: Optional[callable] = None,
|
||||
normalize_timespan_threshold_hours: Optional[float] = 24.0,
|
||||
) -> OrderedDict[str, Any]:
|
||||
) -> dict[str, Any]:
|
||||
"""
|
||||
Parses a DMARC report from an email
|
||||
|
||||
@@ -1415,7 +1592,7 @@ def parse_report_email(
|
||||
normalize_timespan_threshold_hours (float): Normalize timespans beyond this
|
||||
|
||||
Returns:
|
||||
OrderedDict:
|
||||
dict:
|
||||
* ``report_type``: ``aggregate`` or ``forensic``
|
||||
* ``report``: The parsed report
|
||||
"""
|
||||
@@ -1472,15 +1649,17 @@ def parse_report_email(
|
||||
if not payload.strip().startswith("{"):
|
||||
payload = str(b64decode(payload))
|
||||
smtp_tls_report = parse_smtp_tls_report_json(payload)
|
||||
return OrderedDict(
|
||||
[("report_type", "smtp_tls"), ("report", smtp_tls_report)]
|
||||
)
|
||||
return {
|
||||
"report_type": "smtp_tls",
|
||||
"report": smtp_tls_report,
|
||||
}
|
||||
elif content_type == "application/tlsrpt+gzip":
|
||||
payload = extract_report(payload)
|
||||
smtp_tls_report = parse_smtp_tls_report_json(payload)
|
||||
return OrderedDict(
|
||||
[("report_type", "smtp_tls"), ("report", smtp_tls_report)]
|
||||
)
|
||||
return {
|
||||
"report_type": "smtp_tls",
|
||||
"report": smtp_tls_report,
|
||||
}
|
||||
elif content_type == "text/plain":
|
||||
if "A message claiming to be from you has failed" in payload:
|
||||
try:
|
||||
@@ -1510,10 +1689,11 @@ def parse_report_email(
|
||||
if isinstance(payload, bytes):
|
||||
payload = payload.decode("utf-8", errors="replace")
|
||||
if payload.strip().startswith("{"):
|
||||
result = parse_smtp_tls_report_json(payload)
|
||||
result = OrderedDict(
|
||||
[("report_type", "smtp_tls"), ("report", smtp_tls_report)]
|
||||
)
|
||||
smtp_tls_report = parse_smtp_tls_report_json(payload)
|
||||
result = {
|
||||
"report_type": "smtp_tls",
|
||||
"report": smtp_tls_report,
|
||||
}
|
||||
elif payload.strip().startswith("<"):
|
||||
aggregate_report = parse_aggregate_report_xml(
|
||||
payload,
|
||||
@@ -1527,9 +1707,10 @@ def parse_report_email(
|
||||
keep_alive=keep_alive,
|
||||
normalize_timespan_threshold_hours=normalize_timespan_threshold_hours,
|
||||
)
|
||||
result = OrderedDict(
|
||||
[("report_type", "aggregate"), ("report", aggregate_report)]
|
||||
)
|
||||
result = {
|
||||
"report_type": "aggregate",
|
||||
"report": aggregate_report,
|
||||
}
|
||||
|
||||
return result
|
||||
|
||||
@@ -1573,7 +1754,7 @@ def parse_report_email(
|
||||
except Exception as e:
|
||||
raise InvalidForensicReport(e.__str__())
|
||||
|
||||
result = OrderedDict([("report_type", "forensic"), ("report", forensic_report)])
|
||||
result = {"report_type": "forensic", "report": forensic_report}
|
||||
return result
|
||||
|
||||
if result is None:
|
||||
@@ -1594,12 +1775,12 @@ def parse_report_file(
|
||||
offline: Optional[bool] = False,
|
||||
keep_alive: Optional[Callable] = None,
|
||||
normalize_timespan_threshold_hours: Optional[float] = 24,
|
||||
) -> OrderedDict[str, Any]:
|
||||
) -> ReportTypeWrapper:
|
||||
"""Parses a DMARC aggregate or forensic file at the given path, a
|
||||
file-like object. or bytes
|
||||
|
||||
Args:
|
||||
input_ (str | bytes | IO): A path to a file, a file like object, or bytes
|
||||
input_ (Union[str, bytes, IO]): A path to a file, a file like object, or bytes
|
||||
nameservers (list): A list of one or more nameservers to use
|
||||
(Cloudflare's public DNS resolvers by default)
|
||||
dns_timeout (float): Sets the DNS timeout in seconds
|
||||
@@ -1613,7 +1794,7 @@ def parse_report_file(
|
||||
keep_alive (callable): Keep alive function
|
||||
|
||||
Returns:
|
||||
OrderedDict: The parsed DMARC report
|
||||
dict: The parsed DMARC report
|
||||
"""
|
||||
if type(input_) is str:
|
||||
logger.debug("Parsing {0}".format(input_))
|
||||
@@ -1640,11 +1821,11 @@ def parse_report_file(
|
||||
keep_alive=keep_alive,
|
||||
normalize_timespan_threshold_hours=normalize_timespan_threshold_hours,
|
||||
)
|
||||
results = OrderedDict([("report_type", "aggregate"), ("report", report)])
|
||||
results = {"report_type": "aggregate", "report": report}
|
||||
except InvalidAggregateReport:
|
||||
try:
|
||||
report = parse_smtp_tls_report_json(content)
|
||||
results = OrderedDict([("report_type", "smtp_tls"), ("report", report)])
|
||||
results = {"report_type": "smtp_tls", "report": report}
|
||||
except InvalidSMTPTLSReport:
|
||||
try:
|
||||
sa = strip_attachment_payloads
|
||||
@@ -1678,7 +1859,7 @@ def get_dmarc_reports_from_mbox(
|
||||
reverse_dns_map_url: Optional[str] = None,
|
||||
offline: Optional[bool] = False,
|
||||
normalize_timespan_threshold_hours: Optional[float] = 24.0,
|
||||
) -> OrderedDict[str, OrderedDict[str, Any]]:
|
||||
) -> dict[str, dict[str, Any]]:
|
||||
"""Parses a mailbox in mbox format containing e-mails with attached
|
||||
DMARC reports
|
||||
|
||||
@@ -1697,7 +1878,7 @@ def get_dmarc_reports_from_mbox(
|
||||
normalize_timespan_threshold_hours (float): Normalize timespans beyond this
|
||||
|
||||
Returns:
|
||||
OrderedDict: Lists of ``aggregate_reports``, ``forensic_reports``, and ``smtp_tls_reports``
|
||||
dict: Lists of ``aggregate_reports``, ``forensic_reports``, and ``smtp_tls_reports``
|
||||
|
||||
"""
|
||||
aggregate_reports = []
|
||||
@@ -1746,13 +1927,11 @@ def get_dmarc_reports_from_mbox(
|
||||
logger.warning(error.__str__())
|
||||
except mailbox.NoSuchMailboxError:
|
||||
raise InvalidDMARCReport("Mailbox {0} does not exist".format(input_))
|
||||
return OrderedDict(
|
||||
[
|
||||
("aggregate_reports", aggregate_reports),
|
||||
("forensic_reports", forensic_reports),
|
||||
("smtp_tls_reports", smtp_tls_reports),
|
||||
]
|
||||
)
|
||||
return {
|
||||
"aggregate_reports": aggregate_reports,
|
||||
"forensic_reports": forensic_reports,
|
||||
"smtp_tls_reports": smtp_tls_reports,
|
||||
}
|
||||
|
||||
|
||||
def get_dmarc_reports_from_mailbox(
|
||||
@@ -1770,12 +1949,12 @@ def get_dmarc_reports_from_mailbox(
|
||||
nameservers: Optional[list[str]] = None,
|
||||
dns_timeout: Optional[float] = 6.0,
|
||||
strip_attachment_payloads: Optional[bool] = False,
|
||||
results: Optional[OrderedDict[str, Any]] = None,
|
||||
results: Optional[dict[str, Any]] = None,
|
||||
batch_size: Optional[int] = 10,
|
||||
since: Optional[datetime] = None,
|
||||
create_folders: Optional[bool] = True,
|
||||
normalize_timespan_threshold_hours: Optional[float] = 24,
|
||||
) -> OrderedDict[str, OrderedDict[str, Any]]:
|
||||
) -> dict[str, dict[str, Any]]:
|
||||
"""
|
||||
Fetches and parses DMARC reports from a mailbox
|
||||
|
||||
@@ -1804,7 +1983,7 @@ def get_dmarc_reports_from_mailbox(
|
||||
normalize_timespan_threshold_hours (float): Normalize timespans beyond this
|
||||
|
||||
Returns:
|
||||
OrderedDict: Lists of ``aggregate_reports``, ``forensic_reports``, and ``smtp_tls_reports``
|
||||
dict: Lists of ``aggregate_reports``, ``forensic_reports``, and ``smtp_tls_reports``
|
||||
"""
|
||||
if delete and test:
|
||||
raise ValueError("delete and test options are mutually exclusive")
|
||||
@@ -2038,13 +2217,11 @@ def get_dmarc_reports_from_mailbox(
|
||||
except Exception as e:
|
||||
e = "Error moving message UID {0}: {1}".format(msg_uid, e)
|
||||
logger.error("Mailbox error: {0}".format(e))
|
||||
results = OrderedDict(
|
||||
[
|
||||
("aggregate_reports", aggregate_reports),
|
||||
("forensic_reports", forensic_reports),
|
||||
("smtp_tls_reports", smtp_tls_reports),
|
||||
]
|
||||
)
|
||||
results: dict[str, Any] = {
|
||||
"aggregate_reports": aggregate_reports,
|
||||
"forensic_reports": forensic_reports,
|
||||
"smtp_tls_reports": smtp_tls_reports,
|
||||
}
|
||||
|
||||
if current_time:
|
||||
total_messages = len(
|
||||
@@ -2185,7 +2362,7 @@ def append_csv(filename, csv):
|
||||
|
||||
|
||||
def save_output(
|
||||
results: OrderedDict[str, Any],
|
||||
results: dict[str, Any],
|
||||
*,
|
||||
output_directory: Optional[str] = "output",
|
||||
aggregate_json_filename: Optional[str] = "aggregate.json",
|
||||
@@ -2199,7 +2376,7 @@ def save_output(
|
||||
Save report data in the given directory
|
||||
|
||||
Args:
|
||||
results (OrderedDict): Parsing results
|
||||
results (dict): Parsing results
|
||||
output_directory (str): The path to the directory to save in
|
||||
aggregate_json_filename (str): Filename for the aggregate JSON file
|
||||
forensic_json_filename (str): Filename for the forensic JSON file
|
||||
@@ -2271,12 +2448,12 @@ def save_output(
|
||||
sample_file.write(sample)
|
||||
|
||||
|
||||
def get_report_zip(results: OrderedDict[str, Any]) -> bytes:
|
||||
def get_report_zip(results: dict[str, Any]) -> bytes:
|
||||
"""
|
||||
Creates a zip file of parsed report output
|
||||
|
||||
Args:
|
||||
results (OrderedDict): The parsed results
|
||||
results (dict): The parsed results
|
||||
|
||||
Returns:
|
||||
bytes: zip file bytes
|
||||
@@ -2317,7 +2494,7 @@ def get_report_zip(results: OrderedDict[str, Any]) -> bytes:
|
||||
|
||||
|
||||
def email_results(
|
||||
results: OrderedDict,
|
||||
results: dict[str, Any],
|
||||
*,
|
||||
host: str,
|
||||
mail_from: str,
|
||||
@@ -2337,7 +2514,7 @@ def email_results(
|
||||
Emails parsing results as a zip file
|
||||
|
||||
Args:
|
||||
results (OrderedDict): Parsing results
|
||||
results (dict): Parsing results
|
||||
host (str): Mail server hostname or IP address
|
||||
mail_from: The value of the message from header
|
||||
mail_to (list): A list of addresses to mail to
|
||||
|
||||
@@ -10,7 +10,6 @@ from glob import glob
|
||||
import logging
|
||||
import math
|
||||
import yaml
|
||||
from collections import OrderedDict
|
||||
import json
|
||||
from ssl import CERT_NONE, create_default_context
|
||||
from multiprocessing import Pipe, Process
|
||||
@@ -1634,7 +1633,7 @@ def _main():
|
||||
logger.exception("Mailbox Error")
|
||||
exit(1)
|
||||
|
||||
results = OrderedDict(
|
||||
results = dict(
|
||||
[
|
||||
("aggregate_reports", aggregate_reports),
|
||||
("forensic_reports", forensic_reports),
|
||||
|
||||
@@ -4,7 +4,6 @@ from __future__ import annotations
|
||||
|
||||
from typing import Optional, Union, Any
|
||||
|
||||
from collections import OrderedDict
|
||||
|
||||
from elasticsearch_dsl.search import Q
|
||||
from elasticsearch_dsl import (
|
||||
@@ -279,7 +278,7 @@ def set_hosts(
|
||||
Sets the Elasticsearch hosts to use
|
||||
|
||||
Args:
|
||||
hosts (str | list[str]): A single hostname or URL, or list of hostnames or URLs
|
||||
hosts (Union[str, list[str]]): A single hostname or URL, or list of hostnames or URLs
|
||||
use_ssl (bool): Use an HTTPS connection to the server
|
||||
ssl_cert_path (str): Path to the certificate chain
|
||||
username (str): The username to use for authentication
|
||||
@@ -377,7 +376,7 @@ def migrate_indexes(
|
||||
|
||||
|
||||
def save_aggregate_report_to_elasticsearch(
|
||||
aggregate_report: OrderedDict[str, Any],
|
||||
aggregate_report: dict[str, Any],
|
||||
index_suffix: Optional[str] = None,
|
||||
index_prefix: Optional[str] = None,
|
||||
monthly_indexes: Optional[bool] = False,
|
||||
@@ -388,7 +387,7 @@ def save_aggregate_report_to_elasticsearch(
|
||||
Saves a parsed DMARC aggregate report to Elasticsearch
|
||||
|
||||
Args:
|
||||
aggregate_report (OrderedDict): A parsed forensic report
|
||||
aggregate_report (dict): A parsed forensic report
|
||||
index_suffix (str): The suffix of the name of the index to save to
|
||||
index_prefix (str): The prefix of the name of the index to save to
|
||||
monthly_indexes (bool): Use monthly indexes instead of daily indexes
|
||||
@@ -539,7 +538,7 @@ def save_aggregate_report_to_elasticsearch(
|
||||
|
||||
|
||||
def save_forensic_report_to_elasticsearch(
|
||||
forensic_report: OrderedDict[str, Any],
|
||||
forensic_report: dict[str, Any],
|
||||
index_suffix: Optional[Any] = None,
|
||||
index_prefix: Optional[str] = None,
|
||||
monthly_indexes: Optional[bool] = False,
|
||||
@@ -550,7 +549,7 @@ def save_forensic_report_to_elasticsearch(
|
||||
Saves a parsed DMARC forensic report to Elasticsearch
|
||||
|
||||
Args:
|
||||
forensic_report (OrderedDict): A parsed forensic report
|
||||
forensic_report (dict): A parsed forensic report
|
||||
index_suffix (str): The suffix of the name of the index to save to
|
||||
index_prefix (str): The prefix of the name of the index to save to
|
||||
monthly_indexes (bool): Use monthly indexes instead of daily
|
||||
@@ -570,7 +569,7 @@ def save_forensic_report_to_elasticsearch(
|
||||
sample_date = forensic_report["parsed_sample"]["date"]
|
||||
sample_date = human_timestamp_to_datetime(sample_date)
|
||||
original_headers = forensic_report["parsed_sample"]["headers"]
|
||||
headers = OrderedDict()
|
||||
headers = dict()
|
||||
for original_header in original_headers:
|
||||
headers[original_header.lower()] = original_headers[original_header]
|
||||
|
||||
@@ -706,7 +705,7 @@ def save_forensic_report_to_elasticsearch(
|
||||
|
||||
|
||||
def save_smtp_tls_report_to_elasticsearch(
|
||||
report: OrderedDict[str, Any],
|
||||
report: dict[str, Any],
|
||||
index_suffix: Optional[str] = None,
|
||||
index_prefix: Optional[str] = None,
|
||||
monthly_indexes: Optional[bool] = False,
|
||||
@@ -717,7 +716,7 @@ def save_smtp_tls_report_to_elasticsearch(
|
||||
Saves a parsed SMTP TLS report to Elasticsearch
|
||||
|
||||
Args:
|
||||
report (OrderedDict): A parsed SMTP TLS report
|
||||
report (dict): A parsed SMTP TLS report
|
||||
index_suffix (str): The suffix of the name of the index to save to
|
||||
index_prefix (str): The prefix of the name of the index to save to
|
||||
monthly_indexes (bool): Use monthly indexes instead of daily indexes
|
||||
|
||||
@@ -8,7 +8,6 @@ import logging
|
||||
import logging.handlers
|
||||
import json
|
||||
import threading
|
||||
from collections import OrderedDict
|
||||
|
||||
from parsedmarc import (
|
||||
parsed_aggregate_reports_to_csv_rows,
|
||||
@@ -54,7 +53,7 @@ class GelfClient(object):
|
||||
self.logger.addHandler(self.handler)
|
||||
|
||||
def save_aggregate_report_to_gelf(
|
||||
self, aggregate_reports: list[OrderedDict[str, Any]]
|
||||
self, aggregate_reports: list[dict[str, Any]]
|
||||
):
|
||||
rows = parsed_aggregate_reports_to_csv_rows(aggregate_reports)
|
||||
for row in rows:
|
||||
@@ -64,13 +63,13 @@ class GelfClient(object):
|
||||
log_context_data.parsedmarc = None
|
||||
|
||||
def save_forensic_report_to_gelf(
|
||||
self, forensic_reports: list[OrderedDict[str, Any]]
|
||||
self, forensic_reports: list[dict[str, Any]]
|
||||
):
|
||||
rows = parsed_forensic_reports_to_csv_rows(forensic_reports)
|
||||
for row in rows:
|
||||
self.logger.info(json.dumps(row))
|
||||
|
||||
def save_smtp_tls_report_to_gelf(self, smtp_tls_reports: OrderedDict[str, Any]):
|
||||
def save_smtp_tls_report_to_gelf(self, smtp_tls_reports: dict[str, Any]):
|
||||
rows = parsed_smtp_tls_reports_to_csv_rows(smtp_tls_reports)
|
||||
for row in rows:
|
||||
self.logger.info(json.dumps(row))
|
||||
|
||||
@@ -10,7 +10,6 @@ from ssl import create_default_context
|
||||
|
||||
from kafka import KafkaProducer
|
||||
from kafka.errors import NoBrokersAvailable, UnknownTopicOrPartitionError
|
||||
from collections import OrderedDict
|
||||
from parsedmarc.utils import human_timestamp_to_datetime
|
||||
|
||||
from parsedmarc import __version__
|
||||
@@ -66,7 +65,7 @@ class KafkaClient(object):
|
||||
raise KafkaError("No Kafka brokers available")
|
||||
|
||||
@staticmethod
|
||||
def strip_metadata(report: OrderedDict[str, Any]):
|
||||
def strip_metadata(report: dict[str, Any]):
|
||||
"""
|
||||
Duplicates org_name, org_email and report_id into JSON root
|
||||
and removes report_metadata key to bring it more inline
|
||||
@@ -80,7 +79,7 @@ class KafkaClient(object):
|
||||
return report
|
||||
|
||||
@staticmethod
|
||||
def generate_date_range(report: OrderedDict[str, Any]):
|
||||
def generate_date_range(report: dict[str, Any]):
|
||||
"""
|
||||
Creates a date_range timestamp with format YYYY-MM-DD-T-HH:MM:SS
|
||||
based on begin and end dates for easier parsing in Kibana.
|
||||
@@ -99,7 +98,7 @@ class KafkaClient(object):
|
||||
|
||||
def save_aggregate_reports_to_kafka(
|
||||
self,
|
||||
aggregate_reports: Union[OrderedDict[str, Any], list[OrderedDict[str, Any]]],
|
||||
aggregate_reports: Union[dict[str, Any], list[dict[str, Any]]],
|
||||
aggregate_topic: str,
|
||||
):
|
||||
"""
|
||||
@@ -111,9 +110,7 @@ class KafkaClient(object):
|
||||
aggregate_topic (str): The name of the Kafka topic
|
||||
|
||||
"""
|
||||
if isinstance(aggregate_reports, dict) or isinstance(
|
||||
aggregate_reports, OrderedDict
|
||||
):
|
||||
if isinstance(aggregate_reports, dict):
|
||||
aggregate_reports = [aggregate_reports]
|
||||
|
||||
if len(aggregate_reports) < 1:
|
||||
@@ -146,7 +143,7 @@ class KafkaClient(object):
|
||||
|
||||
def save_forensic_reports_to_kafka(
|
||||
self,
|
||||
forensic_reports: Union[OrderedDict[str, Any], list[OrderedDict[str, Any]]],
|
||||
forensic_reports: Union[dict[str, Any], list[dict[str, Any]]],
|
||||
forensic_topic: str,
|
||||
):
|
||||
"""
|
||||
@@ -180,7 +177,7 @@ class KafkaClient(object):
|
||||
|
||||
def save_smtp_tls_reports_to_kafka(
|
||||
self,
|
||||
smtp_tls_reports: Union[list[OrderedDict[str, Any]], OrderedDict[str, Any]],
|
||||
smtp_tls_reports: Union[list[dict[str, Any]], dict[str, Any]],
|
||||
smtp_tls_topic: str,
|
||||
):
|
||||
"""
|
||||
|
||||
@@ -3,7 +3,6 @@
|
||||
from __future__ import annotations
|
||||
|
||||
from typing import Any
|
||||
from collections import OrderedDict
|
||||
|
||||
from parsedmarc.log import logger
|
||||
from azure.core.exceptions import HttpResponseError
|
||||
@@ -133,7 +132,7 @@ class LogAnalyticsClient(object):
|
||||
|
||||
def publish_results(
|
||||
self,
|
||||
results: OrderedDict[str, OrderedDict[str, Any]],
|
||||
results: dict[str, dict[str, Any]],
|
||||
save_aggregate: bool,
|
||||
save_forensic: bool,
|
||||
save_smtp_tls: bool,
|
||||
|
||||
@@ -4,7 +4,6 @@ from __future__ import annotations
|
||||
|
||||
from typing import Optional, Union, Any
|
||||
|
||||
from collections import OrderedDict
|
||||
|
||||
from opensearchpy import (
|
||||
Q,
|
||||
@@ -377,7 +376,7 @@ def migrate_indexes(
|
||||
|
||||
|
||||
def save_aggregate_report_to_opensearch(
|
||||
aggregate_report: OrderedDict[str, Any],
|
||||
aggregate_report: dict[str, Any],
|
||||
index_suffix: Optional[str] = None,
|
||||
index_prefix: Optional[str] = None,
|
||||
monthly_indexes: Optional[bool] = False,
|
||||
@@ -388,7 +387,7 @@ def save_aggregate_report_to_opensearch(
|
||||
Saves a parsed DMARC aggregate report to OpenSearch
|
||||
|
||||
Args:
|
||||
aggregate_report (OrderedDict): A parsed forensic report
|
||||
aggregate_report (dict): A parsed forensic report
|
||||
index_suffix (str): The suffix of the name of the index to save to
|
||||
index_prefix (str): The prefix of the name of the index to save to
|
||||
monthly_indexes (bool): Use monthly indexes instead of daily indexes
|
||||
@@ -539,7 +538,7 @@ def save_aggregate_report_to_opensearch(
|
||||
|
||||
|
||||
def save_forensic_report_to_opensearch(
|
||||
forensic_report: OrderedDict[str, Any],
|
||||
forensic_report: dict[str, Any],
|
||||
index_suffix: Optional[str] = None,
|
||||
index_prefix: Optional[str] = None,
|
||||
monthly_indexes: Optional[bool] = False,
|
||||
@@ -550,7 +549,7 @@ def save_forensic_report_to_opensearch(
|
||||
Saves a parsed DMARC forensic report to OpenSearch
|
||||
|
||||
Args:
|
||||
forensic_report (OrderedDict): A parsed forensic report
|
||||
forensic_report (dict): A parsed forensic report
|
||||
index_suffix (str): The suffix of the name of the index to save to
|
||||
index_prefix (str): The prefix of the name of the index to save to
|
||||
monthly_indexes (bool): Use monthly indexes instead of daily
|
||||
@@ -570,7 +569,7 @@ def save_forensic_report_to_opensearch(
|
||||
sample_date = forensic_report["parsed_sample"]["date"]
|
||||
sample_date = human_timestamp_to_datetime(sample_date)
|
||||
original_headers = forensic_report["parsed_sample"]["headers"]
|
||||
headers = OrderedDict()
|
||||
headers = dict()
|
||||
for original_header in original_headers:
|
||||
headers[original_header.lower()] = original_headers[original_header]
|
||||
|
||||
@@ -706,7 +705,7 @@ def save_forensic_report_to_opensearch(
|
||||
|
||||
|
||||
def save_smtp_tls_report_to_opensearch(
|
||||
report: OrderedDict[str, Any],
|
||||
report: dict[str, Any],
|
||||
index_suffix: Optional[str] = None,
|
||||
index_prefix: Optional[str] = None,
|
||||
monthly_indexes: Optional[bool] = False,
|
||||
@@ -717,7 +716,7 @@ def save_smtp_tls_report_to_opensearch(
|
||||
Saves a parsed SMTP TLS report to OpenSearch
|
||||
|
||||
Args:
|
||||
report (OrderedDict): A parsed SMTP TLS report
|
||||
report (dict): A parsed SMTP TLS report
|
||||
index_suffix (str): The suffix of the name of the index to save to
|
||||
index_prefix (str): The prefix of the name of the index to save to
|
||||
monthly_indexes (bool): Use monthly indexes instead of daily indexes
|
||||
|
||||
@@ -7,8 +7,6 @@ from typing import Any
|
||||
import json
|
||||
import boto3
|
||||
|
||||
from collections import OrderedDict
|
||||
|
||||
from parsedmarc.log import logger
|
||||
from parsedmarc.utils import human_timestamp_to_datetime
|
||||
|
||||
@@ -53,18 +51,18 @@ class S3Client(object):
|
||||
aws_access_key_id=access_key_id,
|
||||
aws_secret_access_key=secret_access_key,
|
||||
)
|
||||
self.bucket = self.s3.Bucket(self.bucket_name) # type: ignore
|
||||
self.bucket: Any = self.s3.Bucket(self.bucket_name)
|
||||
|
||||
def save_aggregate_report_to_s3(self, report: OrderedDict[str, Any]):
|
||||
def save_aggregate_report_to_s3(self, report: dict[str, Any]):
|
||||
self.save_report_to_s3(report, "aggregate")
|
||||
|
||||
def save_forensic_report_to_s3(self, report: OrderedDict[str, Any]):
|
||||
def save_forensic_report_to_s3(self, report: dict[str, Any]):
|
||||
self.save_report_to_s3(report, "forensic")
|
||||
|
||||
def save_smtp_tls_report_to_s3(self, report: OrderedDict[str, Any]):
|
||||
def save_smtp_tls_report_to_s3(self, report: dict[str, Any]):
|
||||
self.save_report_to_s3(report, "smtp_tls")
|
||||
|
||||
def save_report_to_s3(self, report: OrderedDict[str, Any], report_type: str):
|
||||
def save_report_to_s3(self, report: dict[str, Any], report_type: str):
|
||||
if report_type == "smtp_tls":
|
||||
report_date = report["begin_date"]
|
||||
report_id = report["report_id"]
|
||||
|
||||
@@ -4,7 +4,6 @@ from __future__ import annotations
|
||||
|
||||
from typing import Any, Union
|
||||
|
||||
from collections import OrderedDict
|
||||
|
||||
from urllib.parse import urlparse
|
||||
import socket
|
||||
@@ -73,7 +72,7 @@ class HECClient(object):
|
||||
|
||||
def save_aggregate_reports_to_splunk(
|
||||
self,
|
||||
aggregate_reports: Union[list[OrderedDict[str, Any]], OrderedDict[str, Any]],
|
||||
aggregate_reports: Union[list[dict[str, Any]], dict[str, Any]],
|
||||
):
|
||||
"""
|
||||
Saves aggregate DMARC reports to Splunk
|
||||
@@ -139,7 +138,7 @@ class HECClient(object):
|
||||
|
||||
def save_forensic_reports_to_splunk(
|
||||
self,
|
||||
forensic_reports: Union[list[OrderedDict[str, Any]], OrderedDict[str, Any]],
|
||||
forensic_reports: Union[list[dict[str, Any]], dict[str, Any]],
|
||||
):
|
||||
"""
|
||||
Saves forensic DMARC reports to Splunk
|
||||
@@ -175,7 +174,7 @@ class HECClient(object):
|
||||
raise SplunkError(response["text"])
|
||||
|
||||
def save_smtp_tls_reports_to_splunk(
|
||||
self, reports: Union[list[OrderedDict[str, Any]], OrderedDict[str, Any]]
|
||||
self, reports: Union[list[dict[str, Any]], dict[str, Any]]
|
||||
):
|
||||
"""
|
||||
Saves aggregate DMARC reports to Splunk
|
||||
|
||||
@@ -8,7 +8,6 @@ import logging.handlers
|
||||
|
||||
from typing import Any
|
||||
|
||||
from collections import OrderedDict
|
||||
|
||||
import json
|
||||
|
||||
@@ -37,21 +36,21 @@ class SyslogClient(object):
|
||||
self.logger.addHandler(log_handler)
|
||||
|
||||
def save_aggregate_report_to_syslog(
|
||||
self, aggregate_reports: list[OrderedDict[str, Any]]
|
||||
self, aggregate_reports: list[dict[str, Any]]
|
||||
):
|
||||
rows = parsed_aggregate_reports_to_csv_rows(aggregate_reports)
|
||||
for row in rows:
|
||||
self.logger.info(json.dumps(row))
|
||||
|
||||
def save_forensic_report_to_syslog(
|
||||
self, forensic_reports: list[OrderedDict[str, Any]]
|
||||
self, forensic_reports: list[dict[str, Any]]
|
||||
):
|
||||
rows = parsed_forensic_reports_to_csv_rows(forensic_reports)
|
||||
for row in rows:
|
||||
self.logger.info(json.dumps(row))
|
||||
|
||||
def save_smtp_tls_report_to_syslog(
|
||||
self, smtp_tls_reports: list[OrderedDict[str, Any]]
|
||||
self, smtp_tls_reports: list[dict[str, Any]]
|
||||
):
|
||||
rows = parsed_smtp_tls_reports_to_csv_rows(smtp_tls_reports)
|
||||
for row in rows:
|
||||
|
||||
@@ -4,14 +4,13 @@
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from typing import Optional, Union
|
||||
from typing import Optional, Union, TypedDict, Any
|
||||
|
||||
import logging
|
||||
import os
|
||||
from datetime import datetime
|
||||
from datetime import timezone
|
||||
from datetime import timedelta
|
||||
from collections import OrderedDict
|
||||
from expiringdict import ExpiringDict
|
||||
import tempfile
|
||||
import subprocess
|
||||
@@ -67,7 +66,15 @@ class DownloadError(RuntimeError):
|
||||
"""Raised when an error occurs when downloading a file"""
|
||||
|
||||
|
||||
def decode_base64(data) -> bytes:
|
||||
class EmailAddress(TypedDict):
|
||||
"""Parsed email address information"""
|
||||
display_name: Optional[str]
|
||||
address: str
|
||||
local: Optional[str]
|
||||
domain: Optional[str]
|
||||
|
||||
|
||||
def decode_base64(data: str) -> bytes:
|
||||
"""
|
||||
Decodes a base64 string, with padding being optional
|
||||
|
||||
@@ -335,14 +342,14 @@ def get_ip_address_country(ip_address: str, *, db_path: Optional[str] = None) ->
|
||||
|
||||
|
||||
def get_service_from_reverse_dns_base_domain(
|
||||
base_domain,
|
||||
base_domain: str,
|
||||
*,
|
||||
always_use_local_file: Optional[bool] = False,
|
||||
local_file_path: Optional[bool] = None,
|
||||
url: Optional[bool] = None,
|
||||
local_file_path: Optional[str] = None,
|
||||
url: Optional[str] = None,
|
||||
offline: Optional[bool] = False,
|
||||
reverse_dns_map: Optional[bool] = None,
|
||||
) -> str:
|
||||
reverse_dns_map: Optional[dict[str, Any]] = None,
|
||||
) -> dict[str, Any]:
|
||||
"""
|
||||
Returns the service name of a given base domain name from reverse DNS.
|
||||
|
||||
@@ -411,18 +418,18 @@ def get_service_from_reverse_dns_base_domain(
|
||||
|
||||
|
||||
def get_ip_address_info(
|
||||
ip_address,
|
||||
ip_address: str,
|
||||
*,
|
||||
ip_db_path: Optional[str] = None,
|
||||
reverse_dns_map_path: Optional[str] = None,
|
||||
always_use_local_files: Optional[bool] = False,
|
||||
reverse_dns_map_url: Optional[str] = None,
|
||||
cache: Optional[ExpiringDict] = None,
|
||||
reverse_dns_map: Optional[dict] = None,
|
||||
reverse_dns_map: Optional[dict[str, Any]] = None,
|
||||
offline: Optional[bool] = False,
|
||||
nameservers: Optional[list[str]] = None,
|
||||
timeout: Optional[float] = 2.0,
|
||||
) -> OrderedDict[str, str]:
|
||||
) -> dict[str, Any]:
|
||||
"""
|
||||
Returns reverse DNS and country information for the given IP address
|
||||
|
||||
@@ -440,7 +447,7 @@ def get_ip_address_info(
|
||||
timeout (float): Sets the DNS timeout in seconds
|
||||
|
||||
Returns:
|
||||
OrderedDict: ``ip_address``, ``reverse_dns``, ``country``
|
||||
dict: ``ip_address``, ``reverse_dns``, ``country``
|
||||
|
||||
"""
|
||||
ip_address = ip_address.lower()
|
||||
@@ -449,7 +456,7 @@ def get_ip_address_info(
|
||||
if info:
|
||||
logger.debug(f"IP address {ip_address} was found in cache")
|
||||
return info
|
||||
info = OrderedDict()
|
||||
info = dict()
|
||||
info["ip_address"] = ip_address
|
||||
if offline:
|
||||
reverse_dns = None
|
||||
@@ -487,7 +494,7 @@ def get_ip_address_info(
|
||||
return info
|
||||
|
||||
|
||||
def parse_email_address(original_address: str) -> OrderedDict[str, str]:
|
||||
def parse_email_address(original_address: str) -> EmailAddress:
|
||||
if original_address[0] == "":
|
||||
display_name = None
|
||||
else:
|
||||
@@ -500,14 +507,12 @@ def parse_email_address(original_address: str) -> OrderedDict[str, str]:
|
||||
local = address_parts[0].lower()
|
||||
domain = address_parts[-1].lower()
|
||||
|
||||
return OrderedDict(
|
||||
[
|
||||
("display_name", display_name),
|
||||
("address", address),
|
||||
("local", local),
|
||||
("domain", domain),
|
||||
]
|
||||
)
|
||||
return {
|
||||
"display_name": display_name,
|
||||
"address": address,
|
||||
"local": local,
|
||||
"domain": domain,
|
||||
}
|
||||
|
||||
|
||||
def get_filename_safe_string(string: str) -> str:
|
||||
@@ -553,7 +558,7 @@ def is_mbox(path: str) -> bool:
|
||||
return _is_mbox
|
||||
|
||||
|
||||
def is_outlook_msg(content) -> bool:
|
||||
def is_outlook_msg(content: Union[bytes, Any]) -> bool:
|
||||
"""
|
||||
Checks if the given content is an Outlook msg OLE/MSG file
|
||||
|
||||
@@ -586,13 +591,14 @@ def convert_outlook_msg(msg_bytes: bytes) -> str:
|
||||
os.chdir(tmp_dir)
|
||||
with open("sample.msg", "wb") as msg_file:
|
||||
msg_file.write(msg_bytes)
|
||||
rfc822_bytes: bytes
|
||||
try:
|
||||
subprocess.check_call(
|
||||
["msgconvert", "sample.msg"], stdout=null_file, stderr=null_file
|
||||
)
|
||||
eml_path = "sample.eml"
|
||||
with open(eml_path, "rb") as eml_file:
|
||||
rfc822 = eml_file.read()
|
||||
rfc822_bytes = eml_file.read()
|
||||
except FileNotFoundError:
|
||||
raise EmailParserError(
|
||||
"Failed to convert Outlook MSG: msgconvert utility not found"
|
||||
@@ -601,12 +607,12 @@ def convert_outlook_msg(msg_bytes: bytes) -> str:
|
||||
os.chdir(orig_dir)
|
||||
shutil.rmtree(tmp_dir)
|
||||
|
||||
return rfc822
|
||||
return rfc822_bytes.decode("utf-8", errors="replace")
|
||||
|
||||
|
||||
def parse_email(
|
||||
data: Union[bytes, str], *, strip_attachment_payloads: Optional[bool] = False
|
||||
):
|
||||
) -> dict[str, Any]:
|
||||
"""
|
||||
A simplified email parser
|
||||
|
||||
@@ -621,7 +627,8 @@ def parse_email(
|
||||
if isinstance(data, bytes):
|
||||
if is_outlook_msg(data):
|
||||
data = convert_outlook_msg(data)
|
||||
data = data.decode("utf-8", errors="replace")
|
||||
else:
|
||||
data = data.decode("utf-8", errors="replace")
|
||||
parsed_email = mailparser.parse_from_string(data)
|
||||
headers = json.loads(parsed_email.headers_json).copy()
|
||||
parsed_email = json.loads(parsed_email.mail_json).copy()
|
||||
|
||||
Reference in New Issue
Block a user