mirror of
https://github.com/domainaware/parsedmarc.git
synced 2026-02-25 18:46:25 +00:00
Compare commits
9 Commits
copilot/fi
...
copilot/op
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
2192d0bfd4 | ||
|
|
f1c6ebef1d | ||
|
|
221bc332ef | ||
|
|
a2a75f7a81 | ||
|
|
50fcb51577 | ||
|
|
dd9ef90773 | ||
|
|
0e3a4b0f06 | ||
|
|
343b53ef18 | ||
|
|
792079a3e8 |
2
.github/workflows/python-tests.yml
vendored
2
.github/workflows/python-tests.yml
vendored
@@ -30,7 +30,7 @@ jobs:
|
|||||||
strategy:
|
strategy:
|
||||||
fail-fast: false
|
fail-fast: false
|
||||||
matrix:
|
matrix:
|
||||||
python-version: ["3.9", "3.10", "3.11", "3.12", "3.13"]
|
python-version: ["3.9", "3.10", "3.11", "3.12", "3.13", "3.14"]
|
||||||
|
|
||||||
steps:
|
steps:
|
||||||
- uses: actions/checkout@v5
|
- uses: actions/checkout@v5
|
||||||
|
|||||||
10
CHANGELOG.md
10
CHANGELOG.md
@@ -1,5 +1,15 @@
|
|||||||
# Changelog
|
# Changelog
|
||||||
|
|
||||||
|
## 9.0.10
|
||||||
|
|
||||||
|
- Support Python 3.14+
|
||||||
|
|
||||||
|
## 9.0.9
|
||||||
|
|
||||||
|
### Fixes
|
||||||
|
|
||||||
|
- Validate that a string is base64-encoded before trying to base64 decode it. (PRs #648 and #649)
|
||||||
|
|
||||||
## 9.0.8
|
## 9.0.8
|
||||||
|
|
||||||
### Fixes
|
### Fixes
|
||||||
|
|||||||
@@ -61,4 +61,4 @@ for RHEL or Debian.
|
|||||||
| 3.11 | ✅ | Actively maintained; supported until June 2028 (Debian 12) |
|
| 3.11 | ✅ | Actively maintained; supported until June 2028 (Debian 12) |
|
||||||
| 3.12 | ✅ | Actively maintained; supported until May 2035 (RHEL 10) |
|
| 3.12 | ✅ | Actively maintained; supported until May 2035 (RHEL 10) |
|
||||||
| 3.13 | ✅ | Actively maintained; supported until June 2030 (Debian 13) |
|
| 3.13 | ✅ | Actively maintained; supported until June 2030 (Debian 13) |
|
||||||
| 3.14 | ❌ | Not currently supported due to [this imapclient bug](https://github.com/mjs/imapclient/issues/618)|
|
| 3.14 | ✅ | Actively maintained |
|
||||||
|
|||||||
1
ci.ini
1
ci.ini
@@ -3,6 +3,7 @@ save_aggregate = True
|
|||||||
save_forensic = True
|
save_forensic = True
|
||||||
save_smtp_tls = True
|
save_smtp_tls = True
|
||||||
debug = True
|
debug = True
|
||||||
|
offline = True
|
||||||
|
|
||||||
[elasticsearch]
|
[elasticsearch]
|
||||||
hosts = http://localhost:9200
|
hosts = http://localhost:9200
|
||||||
|
|||||||
@@ -29,14 +29,3 @@ token_file = /etc/example/token.json
|
|||||||
include_spam_trash = True
|
include_spam_trash = True
|
||||||
paginate_messages = True
|
paginate_messages = True
|
||||||
scopes = https://www.googleapis.com/auth/gmail.modify
|
scopes = https://www.googleapis.com/auth/gmail.modify
|
||||||
|
|
||||||
[msgraph]
|
|
||||||
auth_method = ClientSecret
|
|
||||||
client_id = 12345678-90ab-cdef-1234-567890abcdef
|
|
||||||
client_secret = your-client-secret-here
|
|
||||||
tenant_id = 12345678-90ab-cdef-1234-567890abcdef
|
|
||||||
mailbox = dmarc-reports@example.com
|
|
||||||
# Use standard folder names - they work across all locales
|
|
||||||
# and avoid "Default folder Root not found" errors
|
|
||||||
reports_folder = Inbox
|
|
||||||
archive_folder = Archive
|
|
||||||
|
|||||||
@@ -61,7 +61,7 @@ for RHEL or Debian.
|
|||||||
| 3.11 | ✅ | Actively maintained; supported until June 2028 (Debian 12) |
|
| 3.11 | ✅ | Actively maintained; supported until June 2028 (Debian 12) |
|
||||||
| 3.12 | ✅ | Actively maintained; supported until May 2035 (RHEL 10) |
|
| 3.12 | ✅ | Actively maintained; supported until May 2035 (RHEL 10) |
|
||||||
| 3.13 | ✅ | Actively maintained; supported until June 2030 (Debian 13) |
|
| 3.13 | ✅ | Actively maintained; supported until June 2030 (Debian 13) |
|
||||||
| 3.14 | ❌ | Not currently supported due to [this imapclient bug](https://github.com/mjs/imapclient/issues/618)|
|
| 3.14 | ✅ | Actively maintained |
|
||||||
|
|
||||||
```{toctree}
|
```{toctree}
|
||||||
:caption: 'Contents'
|
:caption: 'Contents'
|
||||||
|
|||||||
@@ -229,18 +229,6 @@ The full set of configuration options are:
|
|||||||
username, you must grant the app `Mail.ReadWrite.Shared`.
|
username, you must grant the app `Mail.ReadWrite.Shared`.
|
||||||
:::
|
:::
|
||||||
|
|
||||||
:::{tip}
|
|
||||||
When configuring folder names (e.g., `reports_folder`, `archive_folder`),
|
|
||||||
you can use standard folder names like `Inbox`, `Archive`, `Sent Items`, etc.
|
|
||||||
These will be automatically mapped to Microsoft Graph's well-known folder names,
|
|
||||||
which works reliably across different mailbox locales and avoids issues with
|
|
||||||
uninitialized or shared mailboxes. Supported folder names include:
|
|
||||||
- English: Inbox, Sent Items, Deleted Items, Drafts, Junk Email, Archive, Outbox
|
|
||||||
- German: Posteingang, Gesendete Elemente, Gelöschte Elemente, Entwürfe, Junk-E-Mail, Archiv
|
|
||||||
- French: Boîte de réception, Éléments envoyés, Éléments supprimés, Brouillons, Courrier indésirable, Archives
|
|
||||||
- Spanish: Bandeja de entrada, Elementos enviados, Elementos eliminados, Borradores, Correo no deseado
|
|
||||||
:::
|
|
||||||
|
|
||||||
:::{warning}
|
:::{warning}
|
||||||
If you are using the `ClientSecret` auth method, you need to
|
If you are using the `ClientSecret` auth method, you need to
|
||||||
grant the `Mail.ReadWrite` (application) permission to the
|
grant the `Mail.ReadWrite` (application) permission to the
|
||||||
|
|||||||
File diff suppressed because one or more lines are too long
@@ -751,8 +751,8 @@ def parse_aggregate_report_xml(
|
|||||||
new_report_metadata["report_id"] = report_id
|
new_report_metadata["report_id"] = report_id
|
||||||
date_range = report["report_metadata"]["date_range"]
|
date_range = report["report_metadata"]["date_range"]
|
||||||
|
|
||||||
begin_ts = int(date_range["begin"])
|
begin_ts = int(date_range["begin"].split(".")[0])
|
||||||
end_ts = int(date_range["end"])
|
end_ts = int(date_range["end"].split(".")[0])
|
||||||
span_seconds = end_ts - begin_ts
|
span_seconds = end_ts - begin_ts
|
||||||
|
|
||||||
normalize_timespan = span_seconds > normalize_timespan_threshold_hours * 3600
|
normalize_timespan = span_seconds > normalize_timespan_threshold_hours * 3600
|
||||||
@@ -892,7 +892,11 @@ def extract_report(content: Union[bytes, str, BinaryIO]) -> str:
|
|||||||
try:
|
try:
|
||||||
if isinstance(content, str):
|
if isinstance(content, str):
|
||||||
try:
|
try:
|
||||||
file_object = BytesIO(b64decode(content))
|
file_object = BytesIO(
|
||||||
|
b64decode(
|
||||||
|
content.replace("\n", "").replace("\r", ""), validate=True
|
||||||
|
)
|
||||||
|
)
|
||||||
except binascii.Error:
|
except binascii.Error:
|
||||||
return content
|
return content
|
||||||
header = file_object.read(6)
|
header = file_object.read(6)
|
||||||
|
|||||||
@@ -1,3 +1,3 @@
|
|||||||
__version__ = "9.0.8"
|
__version__ = "9.0.10"
|
||||||
|
|
||||||
USER_AGENT = f"parsedmarc/{__version__}"
|
USER_AGENT = f"parsedmarc/{__version__}"
|
||||||
|
|||||||
@@ -20,59 +20,6 @@ from msgraph.core import GraphClient
|
|||||||
from parsedmarc.log import logger
|
from parsedmarc.log import logger
|
||||||
from parsedmarc.mail.mailbox_connection import MailboxConnection
|
from parsedmarc.mail.mailbox_connection import MailboxConnection
|
||||||
|
|
||||||
# Mapping of common folder names to Microsoft Graph well-known folder names
|
|
||||||
# This avoids the "Default folder Root not found" error on uninitialized mailboxes
|
|
||||||
WELL_KNOWN_FOLDER_MAP = {
|
|
||||||
# English names
|
|
||||||
"inbox": "inbox",
|
|
||||||
"sent items": "sentitems",
|
|
||||||
"sent": "sentitems",
|
|
||||||
"sentitems": "sentitems",
|
|
||||||
"deleted items": "deleteditems",
|
|
||||||
"deleted": "deleteditems",
|
|
||||||
"deleteditems": "deleteditems",
|
|
||||||
"trash": "deleteditems",
|
|
||||||
"drafts": "drafts",
|
|
||||||
"junk email": "junkemail",
|
|
||||||
"junk": "junkemail",
|
|
||||||
"junkemail": "junkemail",
|
|
||||||
"spam": "junkemail",
|
|
||||||
"archive": "archive",
|
|
||||||
"outbox": "outbox",
|
|
||||||
"conversation history": "conversationhistory",
|
|
||||||
"conversationhistory": "conversationhistory",
|
|
||||||
# German names
|
|
||||||
"posteingang": "inbox",
|
|
||||||
"gesendete elemente": "sentitems",
|
|
||||||
"gesendet": "sentitems",
|
|
||||||
"gelöschte elemente": "deleteditems",
|
|
||||||
"gelöscht": "deleteditems",
|
|
||||||
"entwürfe": "drafts",
|
|
||||||
"junk-e-mail": "junkemail",
|
|
||||||
"archiv": "archive",
|
|
||||||
"postausgang": "outbox",
|
|
||||||
# French names
|
|
||||||
"boîte de réception": "inbox",
|
|
||||||
"éléments envoyés": "sentitems",
|
|
||||||
"envoyés": "sentitems",
|
|
||||||
"éléments supprimés": "deleteditems",
|
|
||||||
"supprimés": "deleteditems",
|
|
||||||
"brouillons": "drafts",
|
|
||||||
"courrier indésirable": "junkemail",
|
|
||||||
"archives": "archive",
|
|
||||||
"boîte d'envoi": "outbox",
|
|
||||||
# Spanish names
|
|
||||||
"bandeja de entrada": "inbox",
|
|
||||||
"elementos enviados": "sentitems",
|
|
||||||
"enviados": "sentitems",
|
|
||||||
"elementos eliminados": "deleteditems",
|
|
||||||
"eliminados": "deleteditems",
|
|
||||||
"borradores": "drafts",
|
|
||||||
"correo no deseado": "junkemail",
|
|
||||||
"archivar": "archive",
|
|
||||||
"bandeja de salida": "outbox",
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
class AuthMethod(Enum):
|
class AuthMethod(Enum):
|
||||||
DeviceCode = 1
|
DeviceCode = 1
|
||||||
@@ -183,13 +130,6 @@ class MSGraphConnection(MailboxConnection):
|
|||||||
self.mailbox_name = mailbox
|
self.mailbox_name = mailbox
|
||||||
|
|
||||||
def create_folder(self, folder_name: str):
|
def create_folder(self, folder_name: str):
|
||||||
# Check if this is a well-known folder - they already exist and cannot be created
|
|
||||||
if "/" not in folder_name:
|
|
||||||
well_known_name = WELL_KNOWN_FOLDER_MAP.get(folder_name.lower())
|
|
||||||
if well_known_name:
|
|
||||||
logger.debug(f"Folder '{folder_name}' is a well-known folder, skipping creation")
|
|
||||||
return
|
|
||||||
|
|
||||||
sub_url = ""
|
sub_url = ""
|
||||||
path_parts = folder_name.split("/")
|
path_parts = folder_name.split("/")
|
||||||
if len(path_parts) > 1: # Folder is a subFolder
|
if len(path_parts) > 1: # Folder is a subFolder
|
||||||
@@ -306,12 +246,6 @@ class MSGraphConnection(MailboxConnection):
|
|||||||
parent_folder_id = folder_id
|
parent_folder_id = folder_id
|
||||||
return self._find_folder_id_with_parent(path_parts[-1], parent_folder_id)
|
return self._find_folder_id_with_parent(path_parts[-1], parent_folder_id)
|
||||||
else:
|
else:
|
||||||
# Check if this is a well-known folder name (case-insensitive)
|
|
||||||
well_known_name = WELL_KNOWN_FOLDER_MAP.get(folder_name.lower())
|
|
||||||
if well_known_name:
|
|
||||||
# Use well-known folder name directly to avoid querying uninitialized mailboxes
|
|
||||||
logger.debug(f"Using well-known folder name '{well_known_name}' for '{folder_name}'")
|
|
||||||
return well_known_name
|
|
||||||
return self._find_folder_id_with_parent(folder_name, None)
|
return self._find_folder_id_with_parent(folder_name, None)
|
||||||
|
|
||||||
def _find_folder_id_with_parent(
|
def _find_folder_id_with_parent(
|
||||||
|
|||||||
@@ -29,7 +29,7 @@ classifiers = [
|
|||||||
"Operating System :: OS Independent",
|
"Operating System :: OS Independent",
|
||||||
"Programming Language :: Python :: 3"
|
"Programming Language :: Python :: 3"
|
||||||
]
|
]
|
||||||
requires-python = ">=3.9, <3.14"
|
requires-python = ">=3.9"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"azure-identity>=1.8.0",
|
"azure-identity>=1.8.0",
|
||||||
"azure-monitor-ingestion>=1.0.0",
|
"azure-monitor-ingestion>=1.0.0",
|
||||||
@@ -48,7 +48,7 @@ dependencies = [
|
|||||||
"imapclient>=2.1.0",
|
"imapclient>=2.1.0",
|
||||||
"kafka-python-ng>=2.2.2",
|
"kafka-python-ng>=2.2.2",
|
||||||
"lxml>=4.4.0",
|
"lxml>=4.4.0",
|
||||||
"mailsuite>=1.11.1",
|
"mailsuite>=1.11.2",
|
||||||
"msgraph-core==0.2.2",
|
"msgraph-core==0.2.2",
|
||||||
"opensearch-py>=2.4.2,<=3.0.0",
|
"opensearch-py>=2.4.2,<=3.0.0",
|
||||||
"publicsuffixlist>=0.10.0",
|
"publicsuffixlist>=0.10.0",
|
||||||
|
|||||||
45
tests.py
45
tests.py
@@ -12,6 +12,9 @@ from lxml import etree
|
|||||||
import parsedmarc
|
import parsedmarc
|
||||||
import parsedmarc.utils
|
import parsedmarc.utils
|
||||||
|
|
||||||
|
# Detect if running in GitHub Actions to skip DNS lookups
|
||||||
|
OFFLINE_MODE = os.environ.get("GITHUB_ACTIONS", "false").lower() == "true"
|
||||||
|
|
||||||
|
|
||||||
def minify_xml(xml_string):
|
def minify_xml(xml_string):
|
||||||
parser = etree.XMLParser(remove_blank_text=True)
|
parser = etree.XMLParser(remove_blank_text=True)
|
||||||
@@ -121,7 +124,7 @@ class Test(unittest.TestCase):
|
|||||||
continue
|
continue
|
||||||
print("Testing {0}: ".format(sample_path), end="")
|
print("Testing {0}: ".format(sample_path), end="")
|
||||||
parsed_report = parsedmarc.parse_report_file(
|
parsed_report = parsedmarc.parse_report_file(
|
||||||
sample_path, always_use_local_files=True
|
sample_path, always_use_local_files=True, offline=OFFLINE_MODE
|
||||||
)["report"]
|
)["report"]
|
||||||
parsedmarc.parsed_aggregate_reports_to_csv(parsed_report)
|
parsedmarc.parsed_aggregate_reports_to_csv(parsed_report)
|
||||||
print("Passed!")
|
print("Passed!")
|
||||||
@@ -129,7 +132,7 @@ class Test(unittest.TestCase):
|
|||||||
def testEmptySample(self):
|
def testEmptySample(self):
|
||||||
"""Test empty/unparasable report"""
|
"""Test empty/unparasable report"""
|
||||||
with self.assertRaises(parsedmarc.ParserError):
|
with self.assertRaises(parsedmarc.ParserError):
|
||||||
parsedmarc.parse_report_file("samples/empty.xml")
|
parsedmarc.parse_report_file("samples/empty.xml", offline=OFFLINE_MODE)
|
||||||
|
|
||||||
def testForensicSamples(self):
|
def testForensicSamples(self):
|
||||||
"""Test sample forensic/ruf/failure DMARC reports"""
|
"""Test sample forensic/ruf/failure DMARC reports"""
|
||||||
@@ -139,8 +142,12 @@ class Test(unittest.TestCase):
|
|||||||
print("Testing {0}: ".format(sample_path), end="")
|
print("Testing {0}: ".format(sample_path), end="")
|
||||||
with open(sample_path) as sample_file:
|
with open(sample_path) as sample_file:
|
||||||
sample_content = sample_file.read()
|
sample_content = sample_file.read()
|
||||||
parsed_report = parsedmarc.parse_report_email(sample_content)["report"]
|
parsed_report = parsedmarc.parse_report_email(
|
||||||
parsed_report = parsedmarc.parse_report_file(sample_path)["report"]
|
sample_content, offline=OFFLINE_MODE
|
||||||
|
)["report"]
|
||||||
|
parsed_report = parsedmarc.parse_report_file(
|
||||||
|
sample_path, offline=OFFLINE_MODE
|
||||||
|
)["report"]
|
||||||
parsedmarc.parsed_forensic_reports_to_csv(parsed_report)
|
parsedmarc.parsed_forensic_reports_to_csv(parsed_report)
|
||||||
print("Passed!")
|
print("Passed!")
|
||||||
|
|
||||||
@@ -152,36 +159,12 @@ class Test(unittest.TestCase):
|
|||||||
if os.path.isdir(sample_path):
|
if os.path.isdir(sample_path):
|
||||||
continue
|
continue
|
||||||
print("Testing {0}: ".format(sample_path), end="")
|
print("Testing {0}: ".format(sample_path), end="")
|
||||||
parsed_report = parsedmarc.parse_report_file(sample_path)["report"]
|
parsed_report = parsedmarc.parse_report_file(
|
||||||
|
sample_path, offline=OFFLINE_MODE
|
||||||
|
)["report"]
|
||||||
parsedmarc.parsed_smtp_tls_reports_to_csv(parsed_report)
|
parsedmarc.parsed_smtp_tls_reports_to_csv(parsed_report)
|
||||||
print("Passed!")
|
print("Passed!")
|
||||||
|
|
||||||
def testMSGraphWellKnownFolders(self):
|
|
||||||
"""Test MSGraph well-known folder name mapping"""
|
|
||||||
from parsedmarc.mail.graph import WELL_KNOWN_FOLDER_MAP
|
|
||||||
|
|
||||||
# Test English folder names
|
|
||||||
assert WELL_KNOWN_FOLDER_MAP.get("inbox") == "inbox"
|
|
||||||
assert WELL_KNOWN_FOLDER_MAP.get("sent items") == "sentitems"
|
|
||||||
assert WELL_KNOWN_FOLDER_MAP.get("deleted items") == "deleteditems"
|
|
||||||
assert WELL_KNOWN_FOLDER_MAP.get("archive") == "archive"
|
|
||||||
|
|
||||||
# Test case insensitivity - simulating how the code actually uses it
|
|
||||||
# This is what happens when user config has "reports_folder = Inbox"
|
|
||||||
assert WELL_KNOWN_FOLDER_MAP.get("inbox") == "inbox"
|
|
||||||
assert WELL_KNOWN_FOLDER_MAP.get("Inbox".lower()) == "inbox" # User's exact config
|
|
||||||
assert WELL_KNOWN_FOLDER_MAP.get("INBOX".lower()) == "inbox"
|
|
||||||
assert WELL_KNOWN_FOLDER_MAP.get("Archive".lower()) == "archive"
|
|
||||||
|
|
||||||
# Test German folder names
|
|
||||||
assert WELL_KNOWN_FOLDER_MAP.get("posteingang") == "inbox"
|
|
||||||
assert WELL_KNOWN_FOLDER_MAP.get("Posteingang".lower()) == "inbox" # Capitalized
|
|
||||||
assert WELL_KNOWN_FOLDER_MAP.get("archiv") == "archive"
|
|
||||||
|
|
||||||
# Test that custom folders don't match
|
|
||||||
assert WELL_KNOWN_FOLDER_MAP.get("custom_folder") is None
|
|
||||||
assert WELL_KNOWN_FOLDER_MAP.get("my_reports") is None
|
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
unittest.main(verbosity=2)
|
unittest.main(verbosity=2)
|
||||||
|
|||||||
Reference in New Issue
Block a user