diff --git a/.gitignore b/.gitignore index fd61433..d4e54e8 100644 --- a/.gitignore +++ b/.gitignore @@ -128,3 +128,7 @@ tmp/ prod*.ini stage*.ini dev*.ini + +# Private samples + +samples/private \ No newline at end of file diff --git a/CHANGELOG.md b/CHANGELOG.md index 1de5a4c..8581aab 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,6 +1,12 @@ Changelog ========= +8.2.0 +----- + +- Support non-standard, text-based forensic reports sent by some mail hosts +- Set forensic report version to `None` (`null` in JSON) if the report was in a non-standard format and/or is missing a version number + 8.1.1 ----- diff --git a/parsedmarc/__init__.py b/parsedmarc/__init__.py index 5002676..54c22c1 100644 --- a/parsedmarc/__init__.py +++ b/parsedmarc/__init__.py @@ -34,7 +34,7 @@ from parsedmarc.utils import is_outlook_msg, convert_outlook_msg from parsedmarc.utils import parse_email from parsedmarc.utils import timestamp_to_human, human_timestamp_to_datetime -__version__ = "8.1.1" +__version__ = "8.2.0" formatter = logging.Formatter( fmt='%(levelname)8s:%(filename)s:%(lineno)d:%(message)s', @@ -49,6 +49,7 @@ logger.debug("parsedmarc v{0}".format(__version__)) feedback_report_regex = re.compile(r"^([\w\-]+): (.+)$", re.MULTILINE) xml_header_regex = re.compile(r"^<\?xml .*?>", re.MULTILINE) xml_schema_regex = re.compile(r"", re.MULTILINE) +text_report_regex = re.compile(r"\s*([a-zA-Z\s]+):\s(.+)", re.MULTILINE) MAGIC_ZIP = b"\x50\x4B\x03\x04" MAGIC_GZIP = b"\x1F\x8B" @@ -608,7 +609,7 @@ def parse_forensic_report(feedback_report, sample, msg_date, parsed_report["arrival_date"] = msg_date.isoformat() if "version" not in parsed_report: - parsed_report["version"] = 1 + parsed_report["version"] = None if "user_agent" not in parsed_report: parsed_report["user_agent"] = None @@ -829,6 +830,21 @@ def parse_report_email(input_, offline=False, ip_db_path=None, sample = payload elif content_type == "message/rfc822": sample = payload + elif content_type == "text/plain": + if "A message claiming to be from you has failed" in payload: + parts = payload.split("detected.") + field_matches = text_report_regex.findall(parts[0]) + fields = dict() + for match in field_matches: + field_name = match[0].lower().replace(" ", "-") + fields[field_name] = match[1].strip() + feedback_report = "Arrival-Date: {}\n" \ + "Source-IP: {}".format( + fields["received-date"], + fields["sender-ip-address"]) + sample = parts[1].lstrip() + sample = sample.replace("=\r\n", "") + logger.debug(sample) else: try: payload = b64decode(payload) @@ -857,7 +873,7 @@ def parse_report_email(input_, offline=False, ip_db_path=None, 'aggregate DMARC report: {1}'.format(subject, e) raise InvalidAggregateReport(error) - except FileNotFoundError as e: + except Exception as e: error = 'Unable to parse message with ' \ 'subject "{0}": {1}'.format(subject, e) raise InvalidDMARCReport(error) diff --git a/setup.py b/setup.py index 6bbf6d2..328a08c 100644 --- a/setup.py +++ b/setup.py @@ -14,7 +14,7 @@ from setuptools import setup from codecs import open from os import path -__version__ = "8.1.1" +__version__ = "8.2.0" description = "A Python package and CLI for parsing aggregate and " \ "forensic DMARC reports"