diff --git a/parsedmarc/__init__.py b/parsedmarc/__init__.py index 8d94b42..9d46e2c 100644 --- a/parsedmarc/__init__.py +++ b/parsedmarc/__init__.py @@ -591,14 +591,19 @@ def extract_report(input_): str: The extracted text """ + def is_base64(s): + base64_regex = re.compile(r'^[A-Za-z0-9+/=]+\Z') + return bool(base64_regex.match(s)) + try: - file_object = BytesIO() - if type(input_) is str: - try: - file_object = BytesIO(b64decode(input_)) - except binascii.Error: - pass - if file_object is None: + file_object = None + if isinstance(input_, str): + if is_base64(input_): + try: + file_object = BytesIO(b64decode(input_)) + except binascii.Error: + pass + else: file_object = open(input_, "rb") elif type(input_) is bytes: file_object = BytesIO(input_) @@ -613,7 +618,7 @@ def extract_report(input_): errors='ignore') elif header.startswith(MAGIC_GZIP): report = zlib.decompress( - file_object.getvalue(), + file_object.read(), zlib.MAX_WBITS | 16).decode(errors='ignore') elif header.startswith(MAGIC_XML) or header.startswith(MAGIC_JSON): report = file_object.read().decode(errors='ignore') diff --git a/samples/extract_report/changed-input.xml b/samples/extract_report/changed-input.xml new file mode 100644 index 0000000..31eacf5 --- /dev/null +++ b/samples/extract_report/changed-input.xml @@ -0,0 +1,592 @@ + + + + fred.com + noreply-dmarc-support@google.com + https://support.google.com/a/answer/2466580 + 11038226378739404135 + + 1718236800 + 1718323199 + + + + example.com + r + r +

none

+ none + 100 + none +
+ + + 209.85.220.69 + 1 + + none + fail + pass + + + + example.com + + + + example.com + pass + + + + + + 209.85.220.41 + 2 + + none + pass + fail + + + + example.com + + + + example.com + pass + awbr2rp4egb35wbg4umq4e5dcoe5kc4n + + + amazonses.com + pass + ug7nbtf4gccmlpwj322ax3p6ow6yfsug + + + gmail.com + pass + + + + + + 54.240.48.90 + 40 + + none + pass + fail + + + + example.com + + + + example.com + pass + awbr2rp4egb35wbg4umq4e5dcoe5kc4n + + + amazonses.com + pass + ug7nbtf4gccmlpwj322ax3p6ow6yfsug + + + amazonses.com + pass + + + + + + 54.240.8.31 + 40 + + none + pass + fail + + + + example.com + + + + example.com + pass + awbr2rp4egb35wbg4umq4e5dcoe5kc4n + + + amazonses.com + pass + ug7nbtf4gccmlpwj322ax3p6ow6yfsug + + + amazonses.com + pass + + + + + + 54.240.8.33 + 33 + + none + pass + fail + + + + example.com + + + + example.com + pass + awbr2rp4egb35wbg4umq4e5dcoe5kc4n + + + amazonses.com + pass + ug7nbtf4gccmlpwj322ax3p6ow6yfsug + + + amazonses.com + pass + + + + + + 54.240.48.92 + 40 + + none + pass + fail + + + + example.com + + + + example.com + pass + awbr2rp4egb35wbg4umq4e5dcoe5kc4n + + + amazonses.com + pass + ug7nbtf4gccmlpwj322ax3p6ow6yfsug + + + amazonses.com + pass + + + + + + 54.240.48.110 + 24 + + none + pass + fail + + + + example.com + + + + example.com + pass + awbr2rp4egb35wbg4umq4e5dcoe5kc4n + + + amazonses.com + pass + ug7nbtf4gccmlpwj322ax3p6ow6yfsug + + + amazonses.com + pass + + + + + + 209.85.220.41 + 12 + + none + pass + fail + + + + example.com + + + + example.com + pass + awbr2rp4egb35wbg4umq4e5dcoe5kc4n + + + amazonses.com + pass + ug7nbtf4gccmlpwj322ax3p6ow6yfsug + + + connectivityu.com + pass + + + + + + 2607:f8b0:4864:20::132 + 1 + + none + pass + pass + + + + example.com + + + + example.com + pass + google + + + example.com + pass + + + + + + 54.240.8.83 + 36 + + none + pass + fail + + + + example.com + + + + example.com + pass + awbr2rp4egb35wbg4umq4e5dcoe5kc4n + + + amazonses.com + pass + ug7nbtf4gccmlpwj322ax3p6ow6yfsug + + + amazonses.com + pass + + + + + + 54.240.8.96 + 27 + + none + pass + fail + + + + example.com + + + + example.com + pass + awbr2rp4egb35wbg4umq4e5dcoe5kc4n + + + amazonses.com + pass + ug7nbtf4gccmlpwj322ax3p6ow6yfsug + + + amazonses.com + pass + + + + + + 54.240.48.95 + 25 + + none + pass + fail + + + + example.com + + + + example.com + pass + awbr2rp4egb35wbg4umq4e5dcoe5kc4n + + + amazonses.com + pass + ug7nbtf4gccmlpwj322ax3p6ow6yfsug + + + amazonses.com + pass + + + + + + 209.85.220.69 + 2252 + + none + pass + pass + + + + example.com + + + + example.com + pass + google + + + example.com + pass + + + + + + 54.240.48.94 + 46 + + none + pass + fail + + + + example.com + + + + example.com + pass + awbr2rp4egb35wbg4umq4e5dcoe5kc4n + + + amazonses.com + pass + ug7nbtf4gccmlpwj322ax3p6ow6yfsug + + + amazonses.com + pass + + + + + + 54.240.8.88 + 37 + + none + pass + fail + + + + example.com + + + + example.com + pass + awbr2rp4egb35wbg4umq4e5dcoe5kc4n + + + amazonses.com + pass + ug7nbtf4gccmlpwj322ax3p6ow6yfsug + + + amazonses.com + pass + + + + + + 209.85.220.55 + 1 + + none + pass + pass + + + + example.com + + + + example.com + pass + google + + + example.com + pass + + + + + + 54.240.48.93 + 24 + + none + pass + fail + + + + example.com + + + + example.com + pass + awbr2rp4egb35wbg4umq4e5dcoe5kc4n + + + amazonses.com + pass + ug7nbtf4gccmlpwj322ax3p6ow6yfsug + + + amazonses.com + pass + + + + + + 209.85.220.41 + 23 + + none + pass + pass + + + + example.com + + + + example.com + pass + awbr2rp4egb35wbg4umq4e5dcoe5kc4n + + + amazonses.com + pass + ug7nbtf4gccmlpwj322ax3p6ow6yfsug + + + example.com + pass + + + + + + 209.85.220.41 + 24 + + none + pass + fail + + + + example.com + + + + example.com + pass + awbr2rp4egb35wbg4umq4e5dcoe5kc4n + + + amazonses.com + pass + ug7nbtf4gccmlpwj322ax3p6ow6yfsug + + + rphvac.com + none + + + + + + 209.85.220.41 + 359 + + none + pass + pass + + + + example.com + + + + example.com + pass + google + + + example.com + pass + + + +
diff --git a/samples/extract_report/nice-input.xml b/samples/extract_report/nice-input.xml new file mode 100644 index 0000000..ce28c8e --- /dev/null +++ b/samples/extract_report/nice-input.xml @@ -0,0 +1,592 @@ + + + + google.com + noreply-dmarc-support@google.com + https://support.google.com/a/answer/2466580 + 11038226378739404135 + + 1718236800 + 1718323199 + + + + example.com + r + r +

none

+ none + 100 + none +
+ + + 209.85.220.69 + 1 + + none + fail + pass + + + + example.com + + + + example.com + pass + + + + + + 209.85.220.41 + 2 + + none + pass + fail + + + + example.com + + + + example.com + pass + awbr2rp4egb35wbg4umq4e5dcoe5kc4n + + + amazonses.com + pass + ug7nbtf4gccmlpwj322ax3p6ow6yfsug + + + gmail.com + pass + + + + + + 54.240.48.90 + 40 + + none + pass + fail + + + + example.com + + + + example.com + pass + awbr2rp4egb35wbg4umq4e5dcoe5kc4n + + + amazonses.com + pass + ug7nbtf4gccmlpwj322ax3p6ow6yfsug + + + amazonses.com + pass + + + + + + 54.240.8.31 + 40 + + none + pass + fail + + + + example.com + + + + example.com + pass + awbr2rp4egb35wbg4umq4e5dcoe5kc4n + + + amazonses.com + pass + ug7nbtf4gccmlpwj322ax3p6ow6yfsug + + + amazonses.com + pass + + + + + + 54.240.8.33 + 33 + + none + pass + fail + + + + example.com + + + + example.com + pass + awbr2rp4egb35wbg4umq4e5dcoe5kc4n + + + amazonses.com + pass + ug7nbtf4gccmlpwj322ax3p6ow6yfsug + + + amazonses.com + pass + + + + + + 54.240.48.92 + 40 + + none + pass + fail + + + + example.com + + + + example.com + pass + awbr2rp4egb35wbg4umq4e5dcoe5kc4n + + + amazonses.com + pass + ug7nbtf4gccmlpwj322ax3p6ow6yfsug + + + amazonses.com + pass + + + + + + 54.240.48.110 + 24 + + none + pass + fail + + + + example.com + + + + example.com + pass + awbr2rp4egb35wbg4umq4e5dcoe5kc4n + + + amazonses.com + pass + ug7nbtf4gccmlpwj322ax3p6ow6yfsug + + + amazonses.com + pass + + + + + + 209.85.220.41 + 12 + + none + pass + fail + + + + example.com + + + + example.com + pass + awbr2rp4egb35wbg4umq4e5dcoe5kc4n + + + amazonses.com + pass + ug7nbtf4gccmlpwj322ax3p6ow6yfsug + + + connectivityu.com + pass + + + + + + 2607:f8b0:4864:20::132 + 1 + + none + pass + pass + + + + example.com + + + + example.com + pass + google + + + example.com + pass + + + + + + 54.240.8.83 + 36 + + none + pass + fail + + + + example.com + + + + example.com + pass + awbr2rp4egb35wbg4umq4e5dcoe5kc4n + + + amazonses.com + pass + ug7nbtf4gccmlpwj322ax3p6ow6yfsug + + + amazonses.com + pass + + + + + + 54.240.8.96 + 27 + + none + pass + fail + + + + example.com + + + + example.com + pass + awbr2rp4egb35wbg4umq4e5dcoe5kc4n + + + amazonses.com + pass + ug7nbtf4gccmlpwj322ax3p6ow6yfsug + + + amazonses.com + pass + + + + + + 54.240.48.95 + 25 + + none + pass + fail + + + + example.com + + + + example.com + pass + awbr2rp4egb35wbg4umq4e5dcoe5kc4n + + + amazonses.com + pass + ug7nbtf4gccmlpwj322ax3p6ow6yfsug + + + amazonses.com + pass + + + + + + 209.85.220.69 + 2252 + + none + pass + pass + + + + example.com + + + + example.com + pass + google + + + example.com + pass + + + + + + 54.240.48.94 + 46 + + none + pass + fail + + + + example.com + + + + example.com + pass + awbr2rp4egb35wbg4umq4e5dcoe5kc4n + + + amazonses.com + pass + ug7nbtf4gccmlpwj322ax3p6ow6yfsug + + + amazonses.com + pass + + + + + + 54.240.8.88 + 37 + + none + pass + fail + + + + example.com + + + + example.com + pass + awbr2rp4egb35wbg4umq4e5dcoe5kc4n + + + amazonses.com + pass + ug7nbtf4gccmlpwj322ax3p6ow6yfsug + + + amazonses.com + pass + + + + + + 209.85.220.55 + 1 + + none + pass + pass + + + + example.com + + + + example.com + pass + google + + + example.com + pass + + + + + + 54.240.48.93 + 24 + + none + pass + fail + + + + example.com + + + + example.com + pass + awbr2rp4egb35wbg4umq4e5dcoe5kc4n + + + amazonses.com + pass + ug7nbtf4gccmlpwj322ax3p6ow6yfsug + + + amazonses.com + pass + + + + + + 209.85.220.41 + 23 + + none + pass + pass + + + + example.com + + + + example.com + pass + awbr2rp4egb35wbg4umq4e5dcoe5kc4n + + + amazonses.com + pass + ug7nbtf4gccmlpwj322ax3p6ow6yfsug + + + example.com + pass + + + + + + 209.85.220.41 + 24 + + none + pass + fail + + + + example.com + + + + example.com + pass + awbr2rp4egb35wbg4umq4e5dcoe5kc4n + + + amazonses.com + pass + ug7nbtf4gccmlpwj322ax3p6ow6yfsug + + + rphvac.com + none + + + + + + 209.85.220.41 + 359 + + none + pass + pass + + + + example.com + + + + example.com + pass + google + + + example.com + pass + + + +
diff --git a/samples/extract_report/nice-input.xml.gz b/samples/extract_report/nice-input.xml.gz new file mode 100644 index 0000000..ed74d0e Binary files /dev/null and b/samples/extract_report/nice-input.xml.gz differ diff --git a/samples/extract_report/nice-input.xml.zip b/samples/extract_report/nice-input.xml.zip new file mode 100644 index 0000000..ec507d4 Binary files /dev/null and b/samples/extract_report/nice-input.xml.zip differ diff --git a/tests.py b/tests.py index 71ca0b9..ff4fb41 100644 --- a/tests.py +++ b/tests.py @@ -1,13 +1,26 @@ -from __future__ import print_function, unicode_literals, absolute_import +from __future__ import absolute_import, print_function, unicode_literals +import os import unittest from glob import glob -import os + +from lxml import etree import parsedmarc import parsedmarc.utils +def minify_xml(xml_string): + parser = etree.XMLParser(remove_blank_text=True) + tree = etree.fromstring(xml_string.encode('utf-8'), parser) + return etree.tostring(tree, pretty_print=False).decode('utf-8') + +def compare_xml(xml1, xml2): + parser = etree.XMLParser(remove_blank_text=True) + tree1 = etree.fromstring(xml1.encode('utf-8'), parser) + tree2 = etree.fromstring(xml2.encode('utf-8'), parser) + return etree.tostring(tree1) == etree.tostring(tree2) + class Test(unittest.TestCase): def testBase64Decoding(self): """Test base64 decoding""" @@ -26,6 +39,66 @@ class Test(unittest.TestCase): result = parsedmarc.utils.get_base_domain(subdomain) assert result == "c.akamaiedge.net" + def testExtractReportXMLComparator(self): + """Test XML comparator function""" + print() + xmlnice = open("samples/extract_report/nice-input.xml").read() + print(xmlnice) + xmlchanged = minify_xml(open("samples/extract_report/changed-input.xml").read()) + print(xmlchanged) + self.assertTrue(compare_xml(xmlnice, xmlnice)) + self.assertTrue(compare_xml(xmlchanged, xmlchanged)) + self.assertFalse(compare_xml(xmlnice, xmlchanged)) + self.assertFalse(compare_xml(xmlchanged, xmlnice)) + print("Passed!") + + def testExtractReportBytes(self): + """Test extract report function for bytes string input""" + print() + file = "samples/extract_report/nice-input.xml" + with open(file, 'rb') as f: + data = f.read() + print("Testing {0}: " .format(file), end="") + xmlout = parsedmarc.extract_report(data) + xmlin = open("samples/extract_report/nice-input.xml").read() + self.assertTrue(compare_xml(xmlout, xmlin)) + print("Passed!") + + def testExtractReportXML(self): + """Test extract report function for XML input""" + print() + file = "samples/extract_report/nice-input.xml" + print("Testing {0}: " .format(file), end="") + xmlout = parsedmarc.extract_report(file) + xmlin = open("samples/extract_report/nice-input.xml").read() + self.assertTrue(compare_xml(xmlout, xmlin)) + print("Passed!") + + def testExtractReportGZip(self): + """Test extract report function for gzip input""" + print() + file = "samples/extract_report/nice-input.xml.gz" + print("Testing {0}: " .format(file), end="") + xmlout = parsedmarc.extract_report(file) + xmlin = open("samples/extract_report/nice-input.xml").read() + self.assertTrue(compare_xml(xmlout, xmlin)) + print("Passed!") + + def testExtractReportZip(self): + """Test extract report function for zip input""" + print() + file = "samples/extract_report/nice-input.xml.zip" + print("Testing {0}: " .format(file), end="") + xmlout = parsedmarc.extract_report(file) + print(xmlout) + xmlin = minify_xml(open("samples/extract_report/nice-input.xml").read()) + print(xmlin) + self.assertTrue(compare_xml(xmlout, xmlin)) + xmlin = minify_xml(open("samples/extract_report/changed-input.xml").read()) + print(xmlin) + self.assertFalse(compare_xml(xmlout, xmlin)) + print("Passed!") + def testAggregateSamples(self): """Test sample aggregate/rua DMARC reports""" print()