mirror of
https://github.com/domainaware/parsedmarc.git
synced 2026-04-03 20:28:52 +00:00
4.1.8 - Be more forgiving of weird XML
This commit is contained in:
4
.gitignore
vendored
4
.gitignore
vendored
@@ -106,7 +106,6 @@ ENV/
|
||||
.idea/
|
||||
|
||||
# I/O files
|
||||
|
||||
output/
|
||||
*.zip
|
||||
*.gz
|
||||
@@ -121,3 +120,6 @@ output/
|
||||
# Data files
|
||||
*.dat
|
||||
*.mmdb
|
||||
|
||||
# Temp files
|
||||
tmp/
|
||||
@@ -1,3 +1,8 @@
|
||||
4.1.8
|
||||
-----
|
||||
|
||||
- Be more forgiving of weird XML
|
||||
|
||||
4.1.7
|
||||
-----
|
||||
|
||||
|
||||
@@ -44,13 +44,14 @@ import imapclient.exceptions
|
||||
import dateparser
|
||||
import mailparser
|
||||
|
||||
__version__ = "4.1.7"
|
||||
__version__ = "4.1.8"
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
logger.setLevel(logging.ERROR)
|
||||
|
||||
feedback_report_regex = re.compile(r"^([\w\-]+): (.+)$", re.MULTILINE)
|
||||
xml_schema_regex = re.compile(r"\s*<xs:schema.*>", re.MULTILINE)
|
||||
xml_header_regex = re.compile(r"^<\?xml .*$", re.MULTILINE)
|
||||
xml_schema_regex = re.compile(r"<\/?xs:schema.>", re.MULTILINE)
|
||||
|
||||
MAGIC_ZIP = b"\x50\x4B\x03\x04"
|
||||
MAGIC_GZIP = b"\x1F\x8B"
|
||||
@@ -424,16 +425,19 @@ def _parse_report_record(record, nameservers=None, timeout=2.0):
|
||||
new_record["auth_results"]["spf"].append(new_result)
|
||||
|
||||
if "envelope_from" not in new_record["identifiers"]:
|
||||
envelope_from = new_record["auth_results"]["spf"][-1]["domain"]
|
||||
envelope_from = None
|
||||
if len(auth_results["spf"]) > 0:
|
||||
envelope_from = new_record["auth_results"]["spf"][-1]["domain"]
|
||||
if envelope_from is not None:
|
||||
envelope_from = str(envelope_from).lower()
|
||||
new_record["identifiers"]["envelope_from"] = envelope_from
|
||||
|
||||
elif new_record["identifiers"]["envelope_from"] is None:
|
||||
envelope_from = new_record["auth_results"]["spf"][-1]["domain"]
|
||||
if envelope_from is not None:
|
||||
envelope_from = str(envelope_from).lower()
|
||||
new_record["identifiers"]["envelope_from"] = envelope_from
|
||||
if len(auth_results["spf"]) > 0:
|
||||
envelope_from = new_record["auth_results"]["spf"][-1]["domain"]
|
||||
if envelope_from is not None:
|
||||
envelope_from = str(envelope_from).lower()
|
||||
new_record["identifiers"]["envelope_from"] = envelope_from
|
||||
|
||||
envelope_to = None
|
||||
if "envelope_to" in new_record["identifiers"]:
|
||||
@@ -457,9 +461,20 @@ def parse_aggregate_report_xml(xml, nameservers=None, timeout=2.0):
|
||||
Returns:
|
||||
OrderedDict: The parsed aggregate DMARC report
|
||||
"""
|
||||
errors = []
|
||||
|
||||
try:
|
||||
xmltodict.parse(xml)["feedback"]
|
||||
except Exception as e:
|
||||
errors.append(e.__str__())
|
||||
|
||||
try:
|
||||
# Replace XML header (sometimes they are invalid)
|
||||
xml = xml_header_regex.sub("", xml)
|
||||
|
||||
# Remove invalid schema tags
|
||||
xml = xml_schema_regex.sub("", xml)
|
||||
xml = xml_schema_regex.sub('<?xml version="1.0"?>', xml)
|
||||
|
||||
report = xmltodict.parse(xml)["feedback"]
|
||||
report_metadata = report["report_metadata"]
|
||||
schema = "draft"
|
||||
@@ -467,7 +482,13 @@ def parse_aggregate_report_xml(xml, nameservers=None, timeout=2.0):
|
||||
schema = report["version"]
|
||||
new_report = OrderedDict([("xml_schema", schema)])
|
||||
new_report_metadata = OrderedDict()
|
||||
org_name = _get_base_domain(report_metadata["org_name"])
|
||||
if report_metadata["org_name"] is None:
|
||||
if report_metadata["email"] is not None:
|
||||
report_metadata["org_name"] = report_metadata[
|
||||
"email"].split("@")[-1]
|
||||
org_name = report_metadata["org_name"]
|
||||
if org_name is not None:
|
||||
org_name = _get_base_domain(org_name)
|
||||
new_report_metadata["org_name"] = org_name
|
||||
new_report_metadata["org_email"] = report_metadata["email"]
|
||||
extra = None
|
||||
@@ -484,7 +505,6 @@ def parse_aggregate_report_xml(xml, nameservers=None, timeout=2.0):
|
||||
date_range["end"] = _timestamp_to_human(date_range["end"])
|
||||
new_report_metadata["begin_date"] = date_range["begin"]
|
||||
new_report_metadata["end_date"] = date_range["end"]
|
||||
errors = []
|
||||
if "error" in report["report_metadata"]:
|
||||
if type(report["report_metadata"]["error"]) != list:
|
||||
errors = [report["report_metadata"]["error"]]
|
||||
@@ -526,13 +546,16 @@ def parse_aggregate_report_xml(xml, nameservers=None, timeout=2.0):
|
||||
|
||||
if type(report["record"]) == list:
|
||||
for record in report["record"]:
|
||||
records.append(_parse_report_record(record,
|
||||
nameservers=nameservers,
|
||||
timeout=timeout))
|
||||
report_record = _parse_report_record(record,
|
||||
nameservers=nameservers,
|
||||
timeout=timeout)
|
||||
records.append(report_record)
|
||||
|
||||
else:
|
||||
records.append(_parse_report_record(report["record"],
|
||||
nameservers=nameservers))
|
||||
report_record = _parse_report_record(report["record"],
|
||||
nameservers=nameservers,
|
||||
timeout=timeout)
|
||||
records.append(report_record)
|
||||
|
||||
new_report["records"] = records
|
||||
|
||||
|
||||
40
samples/!example.com!1538204542!1538463818.xml.sample
Normal file
40
samples/!example.com!1538204542!1538463818.xml.sample
Normal file
@@ -0,0 +1,40 @@
|
||||
<?xml version="1.0" encoding="UTF-8" ?>
|
||||
<feedback>
|
||||
<report_metadata>
|
||||
<org_name></org_name>
|
||||
<email>administrator@accurateplastics.com</email>
|
||||
<report_id>example.com:1538463741</report_id>
|
||||
<date_range>
|
||||
<begin>1538413632</begin>
|
||||
<end>1538413632</end>
|
||||
</date_range>
|
||||
</report_metadata>
|
||||
<policy_published>
|
||||
<domain>example.com</domain>
|
||||
<adkim>r</adkim>
|
||||
<aspf>r</aspf>
|
||||
<p>none</p>
|
||||
<sp>reject</sp>
|
||||
<pct>100</pct>
|
||||
</policy_published>
|
||||
<record>
|
||||
<row>
|
||||
<source_ip>12.20.127.122</source_ip>
|
||||
<count>1</count>
|
||||
<policy_evaluated>
|
||||
<disposition>none</disposition>
|
||||
<dkim>fail</dkim>
|
||||
<spf>fail</spf>
|
||||
</policy_evaluated>
|
||||
</row>
|
||||
<identifiers>
|
||||
<header_from>example.com</header_from>
|
||||
</identifiers>
|
||||
<auth_results>
|
||||
<spf>
|
||||
<domain></domain>
|
||||
<result>none</result>
|
||||
</spf>
|
||||
</auth_results>
|
||||
</record>
|
||||
</feedback>
|
||||
@@ -0,0 +1,34 @@
|
||||
<?xml version="1.0"?>
|
||||
<feedback>
|
||||
<report_metadata>
|
||||
<report_id>2940</report_id>
|
||||
<org_name>XYZ Corporation</org_name>
|
||||
<email>admin@estadocuenta1.infonacot.gob.mx</email>
|
||||
<extra_contact_info>http://estadocuenta1.infonacot.gob.mx</extra_contact_info>
|
||||
<date_range>
|
||||
<begin>1536853302</begin>
|
||||
<end>1536939702</end>
|
||||
</date_range>
|
||||
</report_metadata>
|
||||
<policy_published>
|
||||
<domain>example.com</domain>
|
||||
<p>none</p>
|
||||
</policy_published>
|
||||
<record>
|
||||
<row>
|
||||
<source_ip>148.243.137.254</source_ip>
|
||||
<count>1</count>
|
||||
<policy_evaluated>
|
||||
<disposition>none</disposition>
|
||||
<dkim>fail</dkim>
|
||||
<spf>fail</spf>
|
||||
</policy_evaluated>
|
||||
</row>
|
||||
<identifiers>
|
||||
<envelope_to>estadocuenta1.infonacot.gob.mx</envelope_to>
|
||||
<header_from>example.com</header_from>
|
||||
</identifiers>
|
||||
<auth_results>
|
||||
</auth_results>
|
||||
</record>
|
||||
</feedback>
|
||||
@@ -10,7 +10,7 @@
|
||||
</date_range>
|
||||
</report_metadata>
|
||||
<policy_published>
|
||||
<domain>xxxxxx.de</domain>
|
||||
<domain>example.de</domain>
|
||||
<adkim>r</adkim>
|
||||
<aspf>r</aspf>
|
||||
<p>none</p>
|
||||
@@ -29,12 +29,12 @@
|
||||
</policy_evaluated>
|
||||
</row>
|
||||
<identifiers>
|
||||
<header_from>xxxxxx.de</header_from>
|
||||
<envelope_from>xxxxxx.de</envelope_from>
|
||||
<header_from>example.de</header_from>
|
||||
<envelope_from>example.de</envelope_from>
|
||||
</identifiers>
|
||||
<auth_results>
|
||||
<dkim>
|
||||
<domain>xxxxxx.de</domain>
|
||||
<domain>example.de</domain>
|
||||
<result>pass</result>
|
||||
</dkim>
|
||||
<spf>
|
||||
@@ -0,0 +1,56 @@
|
||||
<?xml version="1.0"?>
|
||||
<feedback>
|
||||
<version>1.0</version>
|
||||
<report_metadata>
|
||||
<org_name>usssa.com</org_name>
|
||||
<email>postmaster@usssa.com</email>
|
||||
<report_id>8953b4d4a4ee4218b6ac0e2cb2667ee1</report_id>
|
||||
<date_range>
|
||||
<begin>1538784000</begin>
|
||||
<end>1538870399</end>
|
||||
</date_range>
|
||||
</report_metadata>
|
||||
<policy_published>
|
||||
<domain>example.com</domain>
|
||||
<adkim>r</adkim>
|
||||
<aspf>r</aspf>
|
||||
<p>none</p>
|
||||
<sp>none</sp>
|
||||
<pct>100</pct>
|
||||
<fo>0</fo>
|
||||
</policy_published>
|
||||
<record>
|
||||
<row>
|
||||
<source_ip>12.20.127.40</source_ip>
|
||||
<count>1</count>
|
||||
<policy_evaluated>
|
||||
<disposition>none</disposition>
|
||||
<dkim>fail</dkim>
|
||||
<spf>fail</spf>
|
||||
</policy_evaluated>
|
||||
</row>
|
||||
<identifiers>
|
||||
<envelope_from></envelope_from>
|
||||
<header_from>example.com</header_from>
|
||||
</identifiers>
|
||||
<auth_results>
|
||||
</auth_results>
|
||||
</record>
|
||||
<record>
|
||||
<row>
|
||||
<source_ip>199.230.200.36</source_ip>
|
||||
<count>1</count>
|
||||
<policy_evaluated>
|
||||
<disposition>none</disposition>
|
||||
<dkim>fail</dkim>
|
||||
<spf>fail</spf>
|
||||
</policy_evaluated>
|
||||
</row>
|
||||
<identifiers>
|
||||
<envelope_from></envelope_from>
|
||||
<header_from>example.com</header_from>
|
||||
</identifiers>
|
||||
<auth_results>
|
||||
</auth_results>
|
||||
</record>
|
||||
</feedback>
|
||||
Reference in New Issue
Block a user