diff --git a/parsedmarc/__init__.py b/parsedmarc/__init__.py
index 8d94b42..9d46e2c 100644
--- a/parsedmarc/__init__.py
+++ b/parsedmarc/__init__.py
@@ -591,14 +591,19 @@ def extract_report(input_):
str: The extracted text
"""
+ def is_base64(s):
+ base64_regex = re.compile(r'^[A-Za-z0-9+/=]+\Z')
+ return bool(base64_regex.match(s))
+
try:
- file_object = BytesIO()
- if type(input_) is str:
- try:
- file_object = BytesIO(b64decode(input_))
- except binascii.Error:
- pass
- if file_object is None:
+ file_object = None
+ if isinstance(input_, str):
+ if is_base64(input_):
+ try:
+ file_object = BytesIO(b64decode(input_))
+ except binascii.Error:
+ pass
+ else:
file_object = open(input_, "rb")
elif type(input_) is bytes:
file_object = BytesIO(input_)
@@ -613,7 +618,7 @@ def extract_report(input_):
errors='ignore')
elif header.startswith(MAGIC_GZIP):
report = zlib.decompress(
- file_object.getvalue(),
+ file_object.read(),
zlib.MAX_WBITS | 16).decode(errors='ignore')
elif header.startswith(MAGIC_XML) or header.startswith(MAGIC_JSON):
report = file_object.read().decode(errors='ignore')
diff --git a/samples/extract_report/changed-input.xml b/samples/extract_report/changed-input.xml
new file mode 100644
index 0000000..31eacf5
--- /dev/null
+++ b/samples/extract_report/changed-input.xml
@@ -0,0 +1,592 @@
+
+
+
+ fred.com
+ noreply-dmarc-support@google.com
+ https://support.google.com/a/answer/2466580
+ 11038226378739404135
+
+ 1718236800
+ 1718323199
+
+
+
+ example.com
+ r
+ r
+ none
+ none
+ 100
+ none
+
+
+
+ 209.85.220.69
+ 1
+
+ none
+ fail
+ pass
+
+
+
+ example.com
+
+
+
+ example.com
+ pass
+
+
+
+
+
+ 209.85.220.41
+ 2
+
+ none
+ pass
+ fail
+
+
+
+ example.com
+
+
+
+ example.com
+ pass
+ awbr2rp4egb35wbg4umq4e5dcoe5kc4n
+
+
+ amazonses.com
+ pass
+ ug7nbtf4gccmlpwj322ax3p6ow6yfsug
+
+
+ gmail.com
+ pass
+
+
+
+
+
+ 54.240.48.90
+ 40
+
+ none
+ pass
+ fail
+
+
+
+ example.com
+
+
+
+ example.com
+ pass
+ awbr2rp4egb35wbg4umq4e5dcoe5kc4n
+
+
+ amazonses.com
+ pass
+ ug7nbtf4gccmlpwj322ax3p6ow6yfsug
+
+
+ amazonses.com
+ pass
+
+
+
+
+
+ 54.240.8.31
+ 40
+
+ none
+ pass
+ fail
+
+
+
+ example.com
+
+
+
+ example.com
+ pass
+ awbr2rp4egb35wbg4umq4e5dcoe5kc4n
+
+
+ amazonses.com
+ pass
+ ug7nbtf4gccmlpwj322ax3p6ow6yfsug
+
+
+ amazonses.com
+ pass
+
+
+
+
+
+ 54.240.8.33
+ 33
+
+ none
+ pass
+ fail
+
+
+
+ example.com
+
+
+
+ example.com
+ pass
+ awbr2rp4egb35wbg4umq4e5dcoe5kc4n
+
+
+ amazonses.com
+ pass
+ ug7nbtf4gccmlpwj322ax3p6ow6yfsug
+
+
+ amazonses.com
+ pass
+
+
+
+
+
+ 54.240.48.92
+ 40
+
+ none
+ pass
+ fail
+
+
+
+ example.com
+
+
+
+ example.com
+ pass
+ awbr2rp4egb35wbg4umq4e5dcoe5kc4n
+
+
+ amazonses.com
+ pass
+ ug7nbtf4gccmlpwj322ax3p6ow6yfsug
+
+
+ amazonses.com
+ pass
+
+
+
+
+
+ 54.240.48.110
+ 24
+
+ none
+ pass
+ fail
+
+
+
+ example.com
+
+
+
+ example.com
+ pass
+ awbr2rp4egb35wbg4umq4e5dcoe5kc4n
+
+
+ amazonses.com
+ pass
+ ug7nbtf4gccmlpwj322ax3p6ow6yfsug
+
+
+ amazonses.com
+ pass
+
+
+
+
+
+ 209.85.220.41
+ 12
+
+ none
+ pass
+ fail
+
+
+
+ example.com
+
+
+
+ example.com
+ pass
+ awbr2rp4egb35wbg4umq4e5dcoe5kc4n
+
+
+ amazonses.com
+ pass
+ ug7nbtf4gccmlpwj322ax3p6ow6yfsug
+
+
+ connectivityu.com
+ pass
+
+
+
+
+
+ 2607:f8b0:4864:20::132
+ 1
+
+ none
+ pass
+ pass
+
+
+
+ example.com
+
+
+
+ example.com
+ pass
+ google
+
+
+ example.com
+ pass
+
+
+
+
+
+ 54.240.8.83
+ 36
+
+ none
+ pass
+ fail
+
+
+
+ example.com
+
+
+
+ example.com
+ pass
+ awbr2rp4egb35wbg4umq4e5dcoe5kc4n
+
+
+ amazonses.com
+ pass
+ ug7nbtf4gccmlpwj322ax3p6ow6yfsug
+
+
+ amazonses.com
+ pass
+
+
+
+
+
+ 54.240.8.96
+ 27
+
+ none
+ pass
+ fail
+
+
+
+ example.com
+
+
+
+ example.com
+ pass
+ awbr2rp4egb35wbg4umq4e5dcoe5kc4n
+
+
+ amazonses.com
+ pass
+ ug7nbtf4gccmlpwj322ax3p6ow6yfsug
+
+
+ amazonses.com
+ pass
+
+
+
+
+
+ 54.240.48.95
+ 25
+
+ none
+ pass
+ fail
+
+
+
+ example.com
+
+
+
+ example.com
+ pass
+ awbr2rp4egb35wbg4umq4e5dcoe5kc4n
+
+
+ amazonses.com
+ pass
+ ug7nbtf4gccmlpwj322ax3p6ow6yfsug
+
+
+ amazonses.com
+ pass
+
+
+
+
+
+ 209.85.220.69
+ 2252
+
+ none
+ pass
+ pass
+
+
+
+ example.com
+
+
+
+ example.com
+ pass
+ google
+
+
+ example.com
+ pass
+
+
+
+
+
+ 54.240.48.94
+ 46
+
+ none
+ pass
+ fail
+
+
+
+ example.com
+
+
+
+ example.com
+ pass
+ awbr2rp4egb35wbg4umq4e5dcoe5kc4n
+
+
+ amazonses.com
+ pass
+ ug7nbtf4gccmlpwj322ax3p6ow6yfsug
+
+
+ amazonses.com
+ pass
+
+
+
+
+
+ 54.240.8.88
+ 37
+
+ none
+ pass
+ fail
+
+
+
+ example.com
+
+
+
+ example.com
+ pass
+ awbr2rp4egb35wbg4umq4e5dcoe5kc4n
+
+
+ amazonses.com
+ pass
+ ug7nbtf4gccmlpwj322ax3p6ow6yfsug
+
+
+ amazonses.com
+ pass
+
+
+
+
+
+ 209.85.220.55
+ 1
+
+ none
+ pass
+ pass
+
+
+
+ example.com
+
+
+
+ example.com
+ pass
+ google
+
+
+ example.com
+ pass
+
+
+
+
+
+ 54.240.48.93
+ 24
+
+ none
+ pass
+ fail
+
+
+
+ example.com
+
+
+
+ example.com
+ pass
+ awbr2rp4egb35wbg4umq4e5dcoe5kc4n
+
+
+ amazonses.com
+ pass
+ ug7nbtf4gccmlpwj322ax3p6ow6yfsug
+
+
+ amazonses.com
+ pass
+
+
+
+
+
+ 209.85.220.41
+ 23
+
+ none
+ pass
+ pass
+
+
+
+ example.com
+
+
+
+ example.com
+ pass
+ awbr2rp4egb35wbg4umq4e5dcoe5kc4n
+
+
+ amazonses.com
+ pass
+ ug7nbtf4gccmlpwj322ax3p6ow6yfsug
+
+
+ example.com
+ pass
+
+
+
+
+
+ 209.85.220.41
+ 24
+
+ none
+ pass
+ fail
+
+
+
+ example.com
+
+
+
+ example.com
+ pass
+ awbr2rp4egb35wbg4umq4e5dcoe5kc4n
+
+
+ amazonses.com
+ pass
+ ug7nbtf4gccmlpwj322ax3p6ow6yfsug
+
+
+ rphvac.com
+ none
+
+
+
+
+
+ 209.85.220.41
+ 359
+
+ none
+ pass
+ pass
+
+
+
+ example.com
+
+
+
+ example.com
+ pass
+ google
+
+
+ example.com
+ pass
+
+
+
+
diff --git a/samples/extract_report/nice-input.xml b/samples/extract_report/nice-input.xml
new file mode 100644
index 0000000..ce28c8e
--- /dev/null
+++ b/samples/extract_report/nice-input.xml
@@ -0,0 +1,592 @@
+
+
+
+ google.com
+ noreply-dmarc-support@google.com
+ https://support.google.com/a/answer/2466580
+ 11038226378739404135
+
+ 1718236800
+ 1718323199
+
+
+
+ example.com
+ r
+ r
+ none
+ none
+ 100
+ none
+
+
+
+ 209.85.220.69
+ 1
+
+ none
+ fail
+ pass
+
+
+
+ example.com
+
+
+
+ example.com
+ pass
+
+
+
+
+
+ 209.85.220.41
+ 2
+
+ none
+ pass
+ fail
+
+
+
+ example.com
+
+
+
+ example.com
+ pass
+ awbr2rp4egb35wbg4umq4e5dcoe5kc4n
+
+
+ amazonses.com
+ pass
+ ug7nbtf4gccmlpwj322ax3p6ow6yfsug
+
+
+ gmail.com
+ pass
+
+
+
+
+
+ 54.240.48.90
+ 40
+
+ none
+ pass
+ fail
+
+
+
+ example.com
+
+
+
+ example.com
+ pass
+ awbr2rp4egb35wbg4umq4e5dcoe5kc4n
+
+
+ amazonses.com
+ pass
+ ug7nbtf4gccmlpwj322ax3p6ow6yfsug
+
+
+ amazonses.com
+ pass
+
+
+
+
+
+ 54.240.8.31
+ 40
+
+ none
+ pass
+ fail
+
+
+
+ example.com
+
+
+
+ example.com
+ pass
+ awbr2rp4egb35wbg4umq4e5dcoe5kc4n
+
+
+ amazonses.com
+ pass
+ ug7nbtf4gccmlpwj322ax3p6ow6yfsug
+
+
+ amazonses.com
+ pass
+
+
+
+
+
+ 54.240.8.33
+ 33
+
+ none
+ pass
+ fail
+
+
+
+ example.com
+
+
+
+ example.com
+ pass
+ awbr2rp4egb35wbg4umq4e5dcoe5kc4n
+
+
+ amazonses.com
+ pass
+ ug7nbtf4gccmlpwj322ax3p6ow6yfsug
+
+
+ amazonses.com
+ pass
+
+
+
+
+
+ 54.240.48.92
+ 40
+
+ none
+ pass
+ fail
+
+
+
+ example.com
+
+
+
+ example.com
+ pass
+ awbr2rp4egb35wbg4umq4e5dcoe5kc4n
+
+
+ amazonses.com
+ pass
+ ug7nbtf4gccmlpwj322ax3p6ow6yfsug
+
+
+ amazonses.com
+ pass
+
+
+
+
+
+ 54.240.48.110
+ 24
+
+ none
+ pass
+ fail
+
+
+
+ example.com
+
+
+
+ example.com
+ pass
+ awbr2rp4egb35wbg4umq4e5dcoe5kc4n
+
+
+ amazonses.com
+ pass
+ ug7nbtf4gccmlpwj322ax3p6ow6yfsug
+
+
+ amazonses.com
+ pass
+
+
+
+
+
+ 209.85.220.41
+ 12
+
+ none
+ pass
+ fail
+
+
+
+ example.com
+
+
+
+ example.com
+ pass
+ awbr2rp4egb35wbg4umq4e5dcoe5kc4n
+
+
+ amazonses.com
+ pass
+ ug7nbtf4gccmlpwj322ax3p6ow6yfsug
+
+
+ connectivityu.com
+ pass
+
+
+
+
+
+ 2607:f8b0:4864:20::132
+ 1
+
+ none
+ pass
+ pass
+
+
+
+ example.com
+
+
+
+ example.com
+ pass
+ google
+
+
+ example.com
+ pass
+
+
+
+
+
+ 54.240.8.83
+ 36
+
+ none
+ pass
+ fail
+
+
+
+ example.com
+
+
+
+ example.com
+ pass
+ awbr2rp4egb35wbg4umq4e5dcoe5kc4n
+
+
+ amazonses.com
+ pass
+ ug7nbtf4gccmlpwj322ax3p6ow6yfsug
+
+
+ amazonses.com
+ pass
+
+
+
+
+
+ 54.240.8.96
+ 27
+
+ none
+ pass
+ fail
+
+
+
+ example.com
+
+
+
+ example.com
+ pass
+ awbr2rp4egb35wbg4umq4e5dcoe5kc4n
+
+
+ amazonses.com
+ pass
+ ug7nbtf4gccmlpwj322ax3p6ow6yfsug
+
+
+ amazonses.com
+ pass
+
+
+
+
+
+ 54.240.48.95
+ 25
+
+ none
+ pass
+ fail
+
+
+
+ example.com
+
+
+
+ example.com
+ pass
+ awbr2rp4egb35wbg4umq4e5dcoe5kc4n
+
+
+ amazonses.com
+ pass
+ ug7nbtf4gccmlpwj322ax3p6ow6yfsug
+
+
+ amazonses.com
+ pass
+
+
+
+
+
+ 209.85.220.69
+ 2252
+
+ none
+ pass
+ pass
+
+
+
+ example.com
+
+
+
+ example.com
+ pass
+ google
+
+
+ example.com
+ pass
+
+
+
+
+
+ 54.240.48.94
+ 46
+
+ none
+ pass
+ fail
+
+
+
+ example.com
+
+
+
+ example.com
+ pass
+ awbr2rp4egb35wbg4umq4e5dcoe5kc4n
+
+
+ amazonses.com
+ pass
+ ug7nbtf4gccmlpwj322ax3p6ow6yfsug
+
+
+ amazonses.com
+ pass
+
+
+
+
+
+ 54.240.8.88
+ 37
+
+ none
+ pass
+ fail
+
+
+
+ example.com
+
+
+
+ example.com
+ pass
+ awbr2rp4egb35wbg4umq4e5dcoe5kc4n
+
+
+ amazonses.com
+ pass
+ ug7nbtf4gccmlpwj322ax3p6ow6yfsug
+
+
+ amazonses.com
+ pass
+
+
+
+
+
+ 209.85.220.55
+ 1
+
+ none
+ pass
+ pass
+
+
+
+ example.com
+
+
+
+ example.com
+ pass
+ google
+
+
+ example.com
+ pass
+
+
+
+
+
+ 54.240.48.93
+ 24
+
+ none
+ pass
+ fail
+
+
+
+ example.com
+
+
+
+ example.com
+ pass
+ awbr2rp4egb35wbg4umq4e5dcoe5kc4n
+
+
+ amazonses.com
+ pass
+ ug7nbtf4gccmlpwj322ax3p6ow6yfsug
+
+
+ amazonses.com
+ pass
+
+
+
+
+
+ 209.85.220.41
+ 23
+
+ none
+ pass
+ pass
+
+
+
+ example.com
+
+
+
+ example.com
+ pass
+ awbr2rp4egb35wbg4umq4e5dcoe5kc4n
+
+
+ amazonses.com
+ pass
+ ug7nbtf4gccmlpwj322ax3p6ow6yfsug
+
+
+ example.com
+ pass
+
+
+
+
+
+ 209.85.220.41
+ 24
+
+ none
+ pass
+ fail
+
+
+
+ example.com
+
+
+
+ example.com
+ pass
+ awbr2rp4egb35wbg4umq4e5dcoe5kc4n
+
+
+ amazonses.com
+ pass
+ ug7nbtf4gccmlpwj322ax3p6ow6yfsug
+
+
+ rphvac.com
+ none
+
+
+
+
+
+ 209.85.220.41
+ 359
+
+ none
+ pass
+ pass
+
+
+
+ example.com
+
+
+
+ example.com
+ pass
+ google
+
+
+ example.com
+ pass
+
+
+
+
diff --git a/samples/extract_report/nice-input.xml.gz b/samples/extract_report/nice-input.xml.gz
new file mode 100644
index 0000000..ed74d0e
Binary files /dev/null and b/samples/extract_report/nice-input.xml.gz differ
diff --git a/samples/extract_report/nice-input.xml.zip b/samples/extract_report/nice-input.xml.zip
new file mode 100644
index 0000000..ec507d4
Binary files /dev/null and b/samples/extract_report/nice-input.xml.zip differ
diff --git a/tests.py b/tests.py
index 71ca0b9..ff4fb41 100644
--- a/tests.py
+++ b/tests.py
@@ -1,13 +1,26 @@
-from __future__ import print_function, unicode_literals, absolute_import
+from __future__ import absolute_import, print_function, unicode_literals
+import os
import unittest
from glob import glob
-import os
+
+from lxml import etree
import parsedmarc
import parsedmarc.utils
+def minify_xml(xml_string):
+ parser = etree.XMLParser(remove_blank_text=True)
+ tree = etree.fromstring(xml_string.encode('utf-8'), parser)
+ return etree.tostring(tree, pretty_print=False).decode('utf-8')
+
+def compare_xml(xml1, xml2):
+ parser = etree.XMLParser(remove_blank_text=True)
+ tree1 = etree.fromstring(xml1.encode('utf-8'), parser)
+ tree2 = etree.fromstring(xml2.encode('utf-8'), parser)
+ return etree.tostring(tree1) == etree.tostring(tree2)
+
class Test(unittest.TestCase):
def testBase64Decoding(self):
"""Test base64 decoding"""
@@ -26,6 +39,66 @@ class Test(unittest.TestCase):
result = parsedmarc.utils.get_base_domain(subdomain)
assert result == "c.akamaiedge.net"
+ def testExtractReportXMLComparator(self):
+ """Test XML comparator function"""
+ print()
+ xmlnice = open("samples/extract_report/nice-input.xml").read()
+ print(xmlnice)
+ xmlchanged = minify_xml(open("samples/extract_report/changed-input.xml").read())
+ print(xmlchanged)
+ self.assertTrue(compare_xml(xmlnice, xmlnice))
+ self.assertTrue(compare_xml(xmlchanged, xmlchanged))
+ self.assertFalse(compare_xml(xmlnice, xmlchanged))
+ self.assertFalse(compare_xml(xmlchanged, xmlnice))
+ print("Passed!")
+
+ def testExtractReportBytes(self):
+ """Test extract report function for bytes string input"""
+ print()
+ file = "samples/extract_report/nice-input.xml"
+ with open(file, 'rb') as f:
+ data = f.read()
+ print("Testing {0}: " .format(file), end="")
+ xmlout = parsedmarc.extract_report(data)
+ xmlin = open("samples/extract_report/nice-input.xml").read()
+ self.assertTrue(compare_xml(xmlout, xmlin))
+ print("Passed!")
+
+ def testExtractReportXML(self):
+ """Test extract report function for XML input"""
+ print()
+ file = "samples/extract_report/nice-input.xml"
+ print("Testing {0}: " .format(file), end="")
+ xmlout = parsedmarc.extract_report(file)
+ xmlin = open("samples/extract_report/nice-input.xml").read()
+ self.assertTrue(compare_xml(xmlout, xmlin))
+ print("Passed!")
+
+ def testExtractReportGZip(self):
+ """Test extract report function for gzip input"""
+ print()
+ file = "samples/extract_report/nice-input.xml.gz"
+ print("Testing {0}: " .format(file), end="")
+ xmlout = parsedmarc.extract_report(file)
+ xmlin = open("samples/extract_report/nice-input.xml").read()
+ self.assertTrue(compare_xml(xmlout, xmlin))
+ print("Passed!")
+
+ def testExtractReportZip(self):
+ """Test extract report function for zip input"""
+ print()
+ file = "samples/extract_report/nice-input.xml.zip"
+ print("Testing {0}: " .format(file), end="")
+ xmlout = parsedmarc.extract_report(file)
+ print(xmlout)
+ xmlin = minify_xml(open("samples/extract_report/nice-input.xml").read())
+ print(xmlin)
+ self.assertTrue(compare_xml(xmlout, xmlin))
+ xmlin = minify_xml(open("samples/extract_report/changed-input.xml").read())
+ print(xmlin)
+ self.assertFalse(compare_xml(xmlout, xmlin))
+ print("Passed!")
+
def testAggregateSamples(self):
"""Test sample aggregate/rua DMARC reports"""
print()