mirror of
https://github.com/domainaware/parsedmarc.git
synced 2026-02-18 07:26:25 +00:00
Compare commits
14 Commits
9.0.6
...
copilot/fi
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
eb2218b6fc | ||
|
|
3f2fc5f727 | ||
|
|
f94c28c770 | ||
|
|
c0f05b81b8 | ||
|
|
9c9ef2fa50 | ||
|
|
1f3a1fc843 | ||
|
|
34fa0c145d | ||
|
|
6719a06388 | ||
|
|
eafa435868 | ||
|
|
5d772c3b36 | ||
|
|
72cabbef23 | ||
|
|
3d74cd6ac0 | ||
|
|
d1ac59a016 | ||
|
|
7fdd53008f |
15
CHANGELOG.md
15
CHANGELOG.md
@@ -1,5 +1,18 @@
|
||||
# Changelog
|
||||
|
||||
## 9.0.8
|
||||
|
||||
### Fixes
|
||||
|
||||
- Fix logging configuration not propagating to child parser processes (#646).
|
||||
- Update `mailsuite` dependency to `?=1.11.1` to solve issues with iCloud IMAP (#493).
|
||||
|
||||
## 9.0.7
|
||||
|
||||
## Fixes
|
||||
|
||||
- Fix IMAP `since` option (#PR 645 closes issues #581 and #643).
|
||||
|
||||
## 9.0.6
|
||||
|
||||
### Fixes
|
||||
@@ -11,7 +24,7 @@
|
||||
### Improvements
|
||||
|
||||
- Improve type hints across the library (Pylance/Pyright friendliness) and reduce false-positive linter errors.
|
||||
- Emails in Microsoft 365 are now marked read as they are read. This provides constancy with other mailbox types, and gives you a indication of when emails are being read as they are processed in batches.
|
||||
- Emails in Microsoft 365 are now marked read as they are read. This provides constancy with other mailbox types, and gives you a indication of when emails are being read as they are processed in batches. (Close #625)
|
||||
|
||||
### Compatibility / Dependencies
|
||||
|
||||
|
||||
@@ -61,4 +61,4 @@ for RHEL or Debian.
|
||||
| 3.11 | ✅ | Actively maintained; supported until June 2028 (Debian 12) |
|
||||
| 3.12 | ✅ | Actively maintained; supported until May 2035 (RHEL 10) |
|
||||
| 3.13 | ✅ | Actively maintained; supported until June 2030 (Debian 13) |
|
||||
| 3.14 | ❌ | Not currently supported due to Not currently supported due to [this imapclient bug](https://github.com/mjs/imapclient/issues/618)|
|
||||
| 3.14 | ❌ | Not currently supported due to [this imapclient bug](https://github.com/mjs/imapclient/issues/618)|
|
||||
|
||||
@@ -29,3 +29,14 @@ token_file = /etc/example/token.json
|
||||
include_spam_trash = True
|
||||
paginate_messages = True
|
||||
scopes = https://www.googleapis.com/auth/gmail.modify
|
||||
|
||||
[msgraph]
|
||||
auth_method = ClientSecret
|
||||
client_id = 12345678-90ab-cdef-1234-567890abcdef
|
||||
client_secret = your-client-secret-here
|
||||
tenant_id = 12345678-90ab-cdef-1234-567890abcdef
|
||||
mailbox = dmarc-reports@example.com
|
||||
# Use standard folder names - they work across all locales
|
||||
# and avoid "Default folder Root not found" errors
|
||||
reports_folder = Inbox
|
||||
archive_folder = Archive
|
||||
|
||||
@@ -229,6 +229,18 @@ The full set of configuration options are:
|
||||
username, you must grant the app `Mail.ReadWrite.Shared`.
|
||||
:::
|
||||
|
||||
:::{tip}
|
||||
When configuring folder names (e.g., `reports_folder`, `archive_folder`),
|
||||
you can use standard folder names like `Inbox`, `Archive`, `Sent Items`, etc.
|
||||
These will be automatically mapped to Microsoft Graph's well-known folder names,
|
||||
which works reliably across different mailbox locales and avoids issues with
|
||||
uninitialized or shared mailboxes. Supported folder names include:
|
||||
- English: Inbox, Sent Items, Deleted Items, Drafts, Junk Email, Archive, Outbox
|
||||
- German: Posteingang, Gesendete Elemente, Gelöschte Elemente, Entwürfe, Junk-E-Mail, Archiv
|
||||
- French: Boîte de réception, Éléments envoyés, Éléments supprimés, Brouillons, Courrier indésirable, Archives
|
||||
- Spanish: Bandeja de entrada, Elementos enviados, Elementos eliminados, Borradores, Correo no deseado
|
||||
:::
|
||||
|
||||
:::{warning}
|
||||
If you are using the `ClientSecret` auth method, you need to
|
||||
grant the `Mail.ReadWrite` (application) permission to the
|
||||
|
||||
@@ -595,9 +595,12 @@ def parsed_smtp_tls_reports_to_csv_rows(
|
||||
if "mx_host_patterns" in policy:
|
||||
record["mx_host_patterns"] = "|".join(policy["mx_host_patterns"])
|
||||
successful_record = record.copy()
|
||||
successful_record["policy_domain"] = policy["policy_domain"]
|
||||
successful_record["policy_type"] = policy["policy_type"]
|
||||
successful_record["successful_session_count"] = policy[
|
||||
"successful_session_count"
|
||||
]
|
||||
successful_record["failed_session_count"] = policy["failed_session_count"]
|
||||
rows.append(successful_record)
|
||||
if "failure_details" in policy:
|
||||
for failure_details in policy["failure_details"]:
|
||||
@@ -1939,8 +1942,10 @@ def get_dmarc_reports_from_mailbox(
|
||||
"Only days and weeks values in 'since' option are \
|
||||
considered for IMAP connections. Examples: 2d or 1w"
|
||||
)
|
||||
since = (datetime.now(timezone.utc) - timedelta(minutes=_since)).date()
|
||||
current_time = datetime.now(timezone.utc).date()
|
||||
since = (datetime.now(timezone.utc) - timedelta(minutes=_since)).strftime(
|
||||
"%d-%b-%Y"
|
||||
)
|
||||
current_time = datetime.now(timezone.utc).strftime("%d-%b-%Y")
|
||||
elif isinstance(connection, MSGraphConnection):
|
||||
since = (
|
||||
datetime.now(timezone.utc) - timedelta(minutes=_since)
|
||||
|
||||
@@ -46,6 +46,7 @@ from parsedmarc.mail import (
|
||||
MSGraphConnection,
|
||||
)
|
||||
from parsedmarc.mail.graph import AuthMethod
|
||||
from parsedmarc.types import ParsingResults
|
||||
from parsedmarc.utils import get_base_domain, get_reverse_dns, is_mbox
|
||||
|
||||
# Increase the max header limit for very large emails. `_MAXHEADERS` is a
|
||||
@@ -67,6 +68,48 @@ def _str_to_list(s):
|
||||
return list(map(lambda i: i.lstrip(), _list))
|
||||
|
||||
|
||||
def _configure_logging(log_level, log_file=None):
|
||||
"""
|
||||
Configure logging for the current process.
|
||||
This is needed for child processes to properly log messages.
|
||||
|
||||
Args:
|
||||
log_level: The logging level (e.g., logging.DEBUG, logging.WARNING)
|
||||
log_file: Optional path to log file
|
||||
"""
|
||||
# Get the logger
|
||||
from parsedmarc.log import logger
|
||||
|
||||
# Set the log level
|
||||
logger.setLevel(log_level)
|
||||
|
||||
# Add StreamHandler with formatter if not already present
|
||||
# Check if we already have a StreamHandler to avoid duplicates
|
||||
# Use exact type check to distinguish from FileHandler subclass
|
||||
has_stream_handler = any(type(h) is logging.StreamHandler for h in logger.handlers)
|
||||
|
||||
if not has_stream_handler:
|
||||
formatter = logging.Formatter(
|
||||
fmt="%(levelname)8s:%(filename)s:%(lineno)d:%(message)s",
|
||||
datefmt="%Y-%m-%d:%H:%M:%S",
|
||||
)
|
||||
handler = logging.StreamHandler()
|
||||
handler.setFormatter(formatter)
|
||||
logger.addHandler(handler)
|
||||
|
||||
# Add FileHandler if log_file is specified
|
||||
if log_file:
|
||||
try:
|
||||
fh = logging.FileHandler(log_file, "a")
|
||||
formatter = logging.Formatter(
|
||||
"%(asctime)s - %(levelname)s - [%(filename)s:%(lineno)d] - %(message)s"
|
||||
)
|
||||
fh.setFormatter(formatter)
|
||||
logger.addHandler(fh)
|
||||
except (IOError, OSError, PermissionError) as error:
|
||||
logger.warning("Unable to write to log file: {}".format(error))
|
||||
|
||||
|
||||
def cli_parse(
|
||||
file_path,
|
||||
sa,
|
||||
@@ -79,8 +122,29 @@ def cli_parse(
|
||||
reverse_dns_map_url,
|
||||
normalize_timespan_threshold_hours,
|
||||
conn,
|
||||
log_level=logging.ERROR,
|
||||
log_file=None,
|
||||
):
|
||||
"""Separated this function for multiprocessing"""
|
||||
"""Separated this function for multiprocessing
|
||||
|
||||
Args:
|
||||
file_path: Path to the report file
|
||||
sa: Strip attachment payloads flag
|
||||
nameservers: List of nameservers
|
||||
dns_timeout: DNS timeout
|
||||
ip_db_path: Path to IP database
|
||||
offline: Offline mode flag
|
||||
always_use_local_files: Always use local files flag
|
||||
reverse_dns_map_path: Path to reverse DNS map
|
||||
reverse_dns_map_url: URL to reverse DNS map
|
||||
normalize_timespan_threshold_hours: Timespan threshold
|
||||
conn: Pipe connection for IPC
|
||||
log_level: Logging level for this process
|
||||
log_file: Optional path to log file
|
||||
"""
|
||||
# Configure logging in this child process
|
||||
_configure_logging(log_level, log_file)
|
||||
|
||||
try:
|
||||
file_results = parse_report_file(
|
||||
file_path,
|
||||
@@ -1461,6 +1525,10 @@ def _main():
|
||||
if n_procs < 1:
|
||||
n_procs = 1
|
||||
|
||||
# Capture the current log level to pass to child processes
|
||||
current_log_level = logger.level
|
||||
current_log_file = opts.log_file
|
||||
|
||||
for batch_index in range((len(file_paths) + n_procs - 1) // n_procs):
|
||||
processes = []
|
||||
connections = []
|
||||
@@ -1486,6 +1554,8 @@ def _main():
|
||||
opts.reverse_dns_map_url,
|
||||
opts.normalize_timespan_threshold_hours,
|
||||
child_conn,
|
||||
current_log_level,
|
||||
current_log_file,
|
||||
),
|
||||
)
|
||||
processes.append(process)
|
||||
@@ -1688,13 +1758,13 @@ def _main():
|
||||
logger.exception("Mailbox Error")
|
||||
exit(1)
|
||||
|
||||
results = {
|
||||
parsing_results: ParsingResults = {
|
||||
"aggregate_reports": aggregate_reports,
|
||||
"forensic_reports": forensic_reports,
|
||||
"smtp_tls_reports": smtp_tls_reports,
|
||||
}
|
||||
|
||||
process_reports(results)
|
||||
process_reports(parsing_results)
|
||||
|
||||
if opts.smtp_host:
|
||||
try:
|
||||
@@ -1708,7 +1778,7 @@ def _main():
|
||||
else _str_to_list(str(opts.smtp_to))
|
||||
)
|
||||
email_results(
|
||||
results,
|
||||
parsing_results,
|
||||
opts.smtp_host,
|
||||
opts.smtp_from,
|
||||
smtp_to_value,
|
||||
|
||||
@@ -1,3 +1,3 @@
|
||||
__version__ = "9.0.6"
|
||||
__version__ = "9.0.8"
|
||||
|
||||
USER_AGENT = f"parsedmarc/{__version__}"
|
||||
|
||||
@@ -2,7 +2,6 @@
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
import logging
|
||||
import logging.handlers
|
||||
import threading
|
||||
@@ -62,9 +61,11 @@ class GelfClient(object):
|
||||
def save_forensic_report_to_gelf(self, forensic_reports: list[dict[str, Any]]):
|
||||
rows = parsed_forensic_reports_to_csv_rows(forensic_reports)
|
||||
for row in rows:
|
||||
self.logger.info(json.dumps(row))
|
||||
log_context_data.parsedmarc = row
|
||||
self.logger.info("parsedmarc forensic report")
|
||||
|
||||
def save_smtp_tls_report_to_gelf(self, smtp_tls_reports: dict[str, Any]):
|
||||
rows = parsed_smtp_tls_reports_to_csv_rows(smtp_tls_reports)
|
||||
for row in rows:
|
||||
self.logger.info(json.dumps(row))
|
||||
log_context_data.parsedmarc = row
|
||||
self.logger.info("parsedmarc smtptls report")
|
||||
|
||||
@@ -20,6 +20,59 @@ from msgraph.core import GraphClient
|
||||
from parsedmarc.log import logger
|
||||
from parsedmarc.mail.mailbox_connection import MailboxConnection
|
||||
|
||||
# Mapping of common folder names to Microsoft Graph well-known folder names
|
||||
# This avoids the "Default folder Root not found" error on uninitialized mailboxes
|
||||
WELL_KNOWN_FOLDER_MAP = {
|
||||
# English names
|
||||
"inbox": "inbox",
|
||||
"sent items": "sentitems",
|
||||
"sent": "sentitems",
|
||||
"sentitems": "sentitems",
|
||||
"deleted items": "deleteditems",
|
||||
"deleted": "deleteditems",
|
||||
"deleteditems": "deleteditems",
|
||||
"trash": "deleteditems",
|
||||
"drafts": "drafts",
|
||||
"junk email": "junkemail",
|
||||
"junk": "junkemail",
|
||||
"junkemail": "junkemail",
|
||||
"spam": "junkemail",
|
||||
"archive": "archive",
|
||||
"outbox": "outbox",
|
||||
"conversation history": "conversationhistory",
|
||||
"conversationhistory": "conversationhistory",
|
||||
# German names
|
||||
"posteingang": "inbox",
|
||||
"gesendete elemente": "sentitems",
|
||||
"gesendet": "sentitems",
|
||||
"gelöschte elemente": "deleteditems",
|
||||
"gelöscht": "deleteditems",
|
||||
"entwürfe": "drafts",
|
||||
"junk-e-mail": "junkemail",
|
||||
"archiv": "archive",
|
||||
"postausgang": "outbox",
|
||||
# French names
|
||||
"boîte de réception": "inbox",
|
||||
"éléments envoyés": "sentitems",
|
||||
"envoyés": "sentitems",
|
||||
"éléments supprimés": "deleteditems",
|
||||
"supprimés": "deleteditems",
|
||||
"brouillons": "drafts",
|
||||
"courrier indésirable": "junkemail",
|
||||
"archives": "archive",
|
||||
"boîte d'envoi": "outbox",
|
||||
# Spanish names
|
||||
"bandeja de entrada": "inbox",
|
||||
"elementos enviados": "sentitems",
|
||||
"enviados": "sentitems",
|
||||
"elementos eliminados": "deleteditems",
|
||||
"eliminados": "deleteditems",
|
||||
"borradores": "drafts",
|
||||
"correo no deseado": "junkemail",
|
||||
"archivar": "archive",
|
||||
"bandeja de salida": "outbox",
|
||||
}
|
||||
|
||||
|
||||
class AuthMethod(Enum):
|
||||
DeviceCode = 1
|
||||
@@ -130,6 +183,13 @@ class MSGraphConnection(MailboxConnection):
|
||||
self.mailbox_name = mailbox
|
||||
|
||||
def create_folder(self, folder_name: str):
|
||||
# Check if this is a well-known folder - they already exist and cannot be created
|
||||
if "/" not in folder_name:
|
||||
well_known_name = WELL_KNOWN_FOLDER_MAP.get(folder_name.lower())
|
||||
if well_known_name:
|
||||
logger.debug(f"Folder '{folder_name}' is a well-known folder, skipping creation")
|
||||
return
|
||||
|
||||
sub_url = ""
|
||||
path_parts = folder_name.split("/")
|
||||
if len(path_parts) > 1: # Folder is a subFolder
|
||||
@@ -246,6 +306,12 @@ class MSGraphConnection(MailboxConnection):
|
||||
parent_folder_id = folder_id
|
||||
return self._find_folder_id_with_parent(path_parts[-1], parent_folder_id)
|
||||
else:
|
||||
# Check if this is a well-known folder name (case-insensitive)
|
||||
well_known_name = WELL_KNOWN_FOLDER_MAP.get(folder_name.lower())
|
||||
if well_known_name:
|
||||
# Use well-known folder name directly to avoid querying uninitialized mailboxes
|
||||
logger.debug(f"Using well-known folder name '{well_known_name}' for '{folder_name}'")
|
||||
return well_known_name
|
||||
return self._find_folder_id_with_parent(folder_name, None)
|
||||
|
||||
def _find_folder_id_with_parent(
|
||||
|
||||
@@ -48,7 +48,7 @@ dependencies = [
|
||||
"imapclient>=2.1.0",
|
||||
"kafka-python-ng>=2.2.2",
|
||||
"lxml>=4.4.0",
|
||||
"mailsuite>=1.11.0",
|
||||
"mailsuite>=1.11.1",
|
||||
"msgraph-core==0.2.2",
|
||||
"opensearch-py>=2.4.2,<=3.0.0",
|
||||
"publicsuffixlist>=0.10.0",
|
||||
|
||||
26
tests.py
26
tests.py
@@ -156,6 +156,32 @@ class Test(unittest.TestCase):
|
||||
parsedmarc.parsed_smtp_tls_reports_to_csv(parsed_report)
|
||||
print("Passed!")
|
||||
|
||||
def testMSGraphWellKnownFolders(self):
|
||||
"""Test MSGraph well-known folder name mapping"""
|
||||
from parsedmarc.mail.graph import WELL_KNOWN_FOLDER_MAP
|
||||
|
||||
# Test English folder names
|
||||
assert WELL_KNOWN_FOLDER_MAP.get("inbox") == "inbox"
|
||||
assert WELL_KNOWN_FOLDER_MAP.get("sent items") == "sentitems"
|
||||
assert WELL_KNOWN_FOLDER_MAP.get("deleted items") == "deleteditems"
|
||||
assert WELL_KNOWN_FOLDER_MAP.get("archive") == "archive"
|
||||
|
||||
# Test case insensitivity - simulating how the code actually uses it
|
||||
# This is what happens when user config has "reports_folder = Inbox"
|
||||
assert WELL_KNOWN_FOLDER_MAP.get("inbox") == "inbox"
|
||||
assert WELL_KNOWN_FOLDER_MAP.get("Inbox".lower()) == "inbox" # User's exact config
|
||||
assert WELL_KNOWN_FOLDER_MAP.get("INBOX".lower()) == "inbox"
|
||||
assert WELL_KNOWN_FOLDER_MAP.get("Archive".lower()) == "archive"
|
||||
|
||||
# Test German folder names
|
||||
assert WELL_KNOWN_FOLDER_MAP.get("posteingang") == "inbox"
|
||||
assert WELL_KNOWN_FOLDER_MAP.get("Posteingang".lower()) == "inbox" # Capitalized
|
||||
assert WELL_KNOWN_FOLDER_MAP.get("archiv") == "archive"
|
||||
|
||||
# Test that custom folders don't match
|
||||
assert WELL_KNOWN_FOLDER_MAP.get("custom_folder") is None
|
||||
assert WELL_KNOWN_FOLDER_MAP.get("my_reports") is None
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
unittest.main(verbosity=2)
|
||||
|
||||
Reference in New Issue
Block a user