Compare commits

...

11 Commits

Author SHA1 Message Date
Sean Whalen
5273948be0 Make build.sh usable without the gh-pages branch 2025-02-18 09:17:12 -05:00
Sean Whalen
b51756b8bd 8.18.1
- Add missing `https://` to the default Microsoft Graph URL
2025-02-17 12:41:57 -05:00
Sean Whalen
7fa7c24cb8 Merge branch 'master' of https://github.com/domainaware/parsedmarc 2025-02-17 12:31:47 -05:00
Sean Whalen
972237ae7e Fix default Microsoft Graph URL 2025-02-17 12:31:39 -05:00
Sean Whalen
6e5333a342 Style fixes 2025-02-03 16:11:21 -05:00
Sean Whalen
47b074c80b Merge branch 'master' of https://github.com/domainaware/parsedmarc 2025-02-03 16:11:01 -05:00
Sean Whalen
a1cfeb3081 8.18.0
- Add support for Microsoft national clouds via Graph API base URL (PR #590)
- Avoid stopping processing when an invalid DMARC report is encountered (PR #587)
- Increase `http.client._MAXHEADERS` from `100` to `200` to avoid errors connecting to Elasticsearch/OpenSearch (PR #589)
2025-02-03 16:10:51 -05:00
Paul Hecker
c7c451b1b1 Set http.client._MAXHEADERS to 200 (#589) 2025-02-03 15:26:15 -05:00
Kevin Goad
669deb9755 Add support for Microsoft national clouds via Graph API base URL (#590)
* adding support for Microsoft National Clouds

* Update usage.md
2025-02-03 15:25:15 -05:00
bendem
446c018920 do not stop processing when we encounter an invalid dmarc report (#587) 2025-02-03 15:20:52 -05:00
Sean Whalen
38c6f86973 Update CHANGELOG.md 2025-01-10 09:09:24 -05:00
9 changed files with 89 additions and 79 deletions

View File

@@ -1,10 +1,22 @@
Changelog
=========
8.18.1
------
- Add missing `https://` to the default Microsoft Graph URL
8.18.0
------
- Add support for Microsoft national clouds via Graph API base URL (PR #590)
- Avoid stopping processing when an invalid DMARC report is encountered (PR #587)
- Increase `http.client._MAXHEADERS` from `100` to `200` to avoid errors connecting to Elasticsearch/OpenSearch (PR #589)
8.17.0
------
- Ignore duplicate aggregate DMARC reports with the same `org_name` and `report_id` seen within the same hour (Fixes [#539](https://github.com/domainaware/parsedmarc/issues/539))
- Ignore duplicate aggregate DMARC reports with the same `org_name` and `report_id` seen within the same hour (Fixes #535)
- Fix saving SMTP TLS reports to OpenSearch (PR #585 closed issue #576)
- Add 303 entries to `base_reverse_dns_map.csv`

View File

@@ -14,7 +14,9 @@ cd docs
make clean
make html
touch build/html/.nojekyll
cp -rf build/html/* ../../parsedmarc-docs/
if [ -d "./../parsedmarc-docs" ]; then
cp -rf build/html/* ../../parsedmarc-docs/
fi
cd ..
./sortmaps.py
python3 tests.py

View File

@@ -208,6 +208,8 @@ The full set of configuration options are:
- `mailbox` - str: The mailbox name. This defaults to the
current user if using the UsernamePassword auth method, but
could be a shared mailbox if the user has access to the mailbox
- `graph_url` - str: Microsoft Graph URL. Allows for use of National Clouds (ex Azure Gov)
(Default: https://graph.microsoft.com)
- `token_file` - str: Path to save the token file
(Default: `.token`)
- `allow_unencrypted_storage` - bool: Allows the Azure Identity

View File

@@ -39,7 +39,7 @@ from parsedmarc.utils import is_outlook_msg, convert_outlook_msg
from parsedmarc.utils import parse_email
from parsedmarc.utils import timestamp_to_human, human_timestamp_to_datetime
__version__ = "8.17.0"
__version__ = "8.18.1"
logger.debug("parsedmarc v{0}".format(__version__))
@@ -272,7 +272,7 @@ def _parse_smtp_tls_failure_details(failure_details):
return new_failure_details
except KeyError as e:
raise InvalidSMTPTLSReport(f"Missing required failure details field:" f" {e}")
raise InvalidSMTPTLSReport(f"Missing required failure details field: {e}")
except Exception as e:
raise InvalidSMTPTLSReport(str(e))
@@ -284,7 +284,7 @@ def _parse_smtp_tls_report_policy(policy):
policy_type = policy["policy"]["policy-type"]
failure_details = []
if policy_type not in policy_types:
raise InvalidSMTPTLSReport(f"Invalid policy type " f"{policy_type}")
raise InvalidSMTPTLSReport(f"Invalid policy type {policy_type}")
new_policy = OrderedDict(policy_domain=policy_domain, policy_type=policy_type)
if "policy-string" in policy["policy"]:
if isinstance(policy["policy"]["policy-string"], list):
@@ -332,9 +332,7 @@ def parse_smtp_tls_report_json(report):
raise Exception(f"Missing required field: {required_field}]")
if not isinstance(report["policies"], list):
policies_type = type(report["policies"])
raise InvalidSMTPTLSReport(
f"policies must be a list, " f"not {policies_type}"
)
raise InvalidSMTPTLSReport(f"policies must be a list, not {policies_type}")
for policy in report["policies"]:
policies.append(_parse_smtp_tls_report_policy(policy))
@@ -1246,11 +1244,11 @@ def parse_report_email(
field_name = match[0].lower().replace(" ", "-")
fields[field_name] = match[1].strip()
feedback_report = "Arrival-Date: {}\n" "Source-IP: {}" "".format(
feedback_report = "Arrival-Date: {}\nSource-IP: {}".format(
fields["received-date"], fields["sender-ip-address"]
)
except Exception as e:
error = "Unable to parse message with " 'subject "{0}": {1}'.format(
error = 'Unable to parse message with subject "{0}": {1}'.format(
subject, e
)
raise InvalidDMARCReport(error)
@@ -1294,10 +1292,10 @@ def parse_report_email(
"is not a valid "
"aggregate DMARC report: {1}".format(subject, e)
)
raise ParserError(error)
raise InvalidDMARCReport(error)
except Exception as e:
error = "Unable to parse message with " 'subject "{0}": {1}'.format(
error = 'Unable to parse message with subject "{0}": {1}'.format(
subject, e
)
raise ParserError(error)
@@ -1331,7 +1329,7 @@ def parse_report_email(
return result
if result is None:
error = 'Message with subject "{0}" is ' "not a valid report".format(subject)
error = 'Message with subject "{0}" is not a valid report'.format(subject)
raise InvalidDMARCReport(error)
@@ -1666,7 +1664,7 @@ def get_dmarc_reports_from_mailbox(
aggregate_reports.append(parsed_email["report"])
else:
logger.debug(
"Skipping duplicate aggregate report " f"with ID: {report_id}"
f"Skipping duplicate aggregate report with ID: {report_id}"
)
aggregate_report_msg_uids.append(msg_uid)
elif parsed_email["report_type"] == "forensic":
@@ -1708,7 +1706,7 @@ def get_dmarc_reports_from_mailbox(
except Exception as e:
message = "Error deleting message UID"
e = "{0} {1}: " "{2}".format(message, msg_uid, e)
e = "{0} {1}: {2}".format(message, msg_uid, e)
logger.error("Mailbox error: {0}".format(e))
else:
if len(aggregate_report_msg_uids) > 0:

View File

@@ -14,6 +14,7 @@ import json
from ssl import CERT_NONE, create_default_context
from multiprocessing import Pipe, Process
import sys
import http.client
from tqdm import tqdm
from parsedmarc import (
@@ -48,6 +49,8 @@ from parsedmarc.log import logger
from parsedmarc.utils import is_mbox, get_reverse_dns
from parsedmarc import SEEN_AGGREGATE_REPORT_IDS
http.client._MAXHEADERS = 200 # pylint:disable=protected-access
formatter = logging.Formatter(
fmt="%(levelname)8s:%(filename)s:%(lineno)d:%(message)s",
datefmt="%Y-%m-%d:%H:%M:%S",
@@ -396,7 +399,7 @@ def _main():
arg_parser.add_argument(
"-c",
"--config-file",
help="a path to a configuration file " "(--silent implied)",
help="a path to a configuration file (--silent implied)",
)
arg_parser.add_argument(
"file_path",
@@ -404,7 +407,7 @@ def _main():
help="one or more paths to aggregate or forensic "
"report files, emails, or mbox files'",
)
strip_attachment_help = "remove attachment payloads from forensic " "report output"
strip_attachment_help = "remove attachment payloads from forensic report output"
arg_parser.add_argument(
"--strip-attachment-payloads", help=strip_attachment_help, action="store_true"
)
@@ -447,14 +450,14 @@ def _main():
arg_parser.add_argument(
"-t",
"--dns_timeout",
help="number of seconds to wait for an answer " "from DNS (default: 2.0)",
help="number of seconds to wait for an answer from DNS (default: 2.0)",
type=float,
default=2.0,
)
arg_parser.add_argument(
"--offline",
action="store_true",
help="do not make online queries for geolocation " " or DNS",
help="do not make online queries for geolocation or DNS",
)
arg_parser.add_argument(
"-s", "--silent", action="store_true", help="only print errors"
@@ -528,6 +531,7 @@ def _main():
graph_tenant_id=None,
graph_mailbox=None,
graph_allow_unencrypted_storage=False,
graph_url="https://graph.microsoft.com",
hec=None,
hec_token=None,
hec_index=None,
@@ -730,7 +734,7 @@ def _main():
if "host" in imap_config:
opts.imap_host = imap_config["host"]
else:
logger.error("host setting missing from the " "imap config section")
logger.error("host setting missing from the imap config section")
exit(-1)
if "port" in imap_config:
opts.imap_port = imap_config.getint("port")
@@ -746,14 +750,12 @@ def _main():
if "user" in imap_config:
opts.imap_user = imap_config["user"]
else:
logger.critical("user setting missing from the " "imap config section")
logger.critical("user setting missing from the imap config section")
exit(-1)
if "password" in imap_config:
opts.imap_password = imap_config["password"]
else:
logger.critical(
"password setting missing from the " "imap config section"
)
logger.critical("password setting missing from the imap config section")
exit(-1)
if "reports_folder" in imap_config:
opts.mailbox_reports_folder = imap_config["reports_folder"]
@@ -822,21 +824,20 @@ def _main():
opts.graph_user = graph_config["user"]
else:
logger.critical(
"user setting missing from the " "msgraph config section"
"user setting missing from the msgraph config section"
)
exit(-1)
if "password" in graph_config:
opts.graph_password = graph_config["password"]
else:
logger.critical(
"password setting missing from the " "msgraph config section"
"password setting missing from the msgraph config section"
)
if "client_secret" in graph_config:
opts.graph_client_secret = graph_config["client_secret"]
else:
logger.critical(
"client_secret setting missing from the "
"msgraph config section"
"client_secret setting missing from the msgraph config section"
)
exit(-1)
@@ -849,7 +850,7 @@ def _main():
opts.graph_tenant_id = graph_config["tenant_id"]
else:
logger.critical(
"tenant_id setting missing from the " "msgraph config section"
"tenant_id setting missing from the msgraph config section"
)
exit(-1)
@@ -858,8 +859,7 @@ def _main():
opts.graph_client_secret = graph_config["client_secret"]
else:
logger.critical(
"client_secret setting missing from the "
"msgraph config section"
"client_secret setting missing from the msgraph config section"
)
exit(-1)
@@ -867,7 +867,7 @@ def _main():
opts.graph_client_id = graph_config["client_id"]
else:
logger.critical(
"client_id setting missing from the " "msgraph config section"
"client_id setting missing from the msgraph config section"
)
exit(-1)
@@ -875,10 +875,13 @@ def _main():
opts.graph_mailbox = graph_config["mailbox"]
elif opts.graph_auth_method != AuthMethod.UsernamePassword.name:
logger.critical(
"mailbox setting missing from the " "msgraph config section"
"mailbox setting missing from the msgraph config section"
)
exit(-1)
if "graph_url" in graph_config:
opts.graph_url = graph_config["graph_url"]
if "allow_unencrypted_storage" in graph_config:
opts.graph_allow_unencrypted_storage = graph_config.getboolean(
"allow_unencrypted_storage"
@@ -890,7 +893,7 @@ def _main():
opts.elasticsearch_hosts = _str_to_list(elasticsearch_config["hosts"])
else:
logger.critical(
"hosts setting missing from the " "elasticsearch config section"
"hosts setting missing from the elasticsearch config section"
)
exit(-1)
if "timeout" in elasticsearch_config:
@@ -928,7 +931,7 @@ def _main():
opts.opensearch_hosts = _str_to_list(opensearch_config["hosts"])
else:
logger.critical(
"hosts setting missing from the " "opensearch config section"
"hosts setting missing from the opensearch config section"
)
exit(-1)
if "timeout" in opensearch_config:
@@ -964,21 +967,21 @@ def _main():
opts.hec = hec_config["url"]
else:
logger.critical(
"url setting missing from the " "splunk_hec config section"
"url setting missing from the splunk_hec config section"
)
exit(-1)
if "token" in hec_config:
opts.hec_token = hec_config["token"]
else:
logger.critical(
"token setting missing from the " "splunk_hec config section"
"token setting missing from the splunk_hec config section"
)
exit(-1)
if "index" in hec_config:
opts.hec_index = hec_config["index"]
else:
logger.critical(
"index setting missing from the " "splunk_hec config section"
"index setting missing from the splunk_hec config section"
)
exit(-1)
if "skip_certificate_verification" in hec_config:
@@ -991,9 +994,7 @@ def _main():
if "hosts" in kafka_config:
opts.kafka_hosts = _str_to_list(kafka_config["hosts"])
else:
logger.critical(
"hosts setting missing from the " "kafka config section"
)
logger.critical("hosts setting missing from the kafka config section")
exit(-1)
if "user" in kafka_config:
opts.kafka_username = kafka_config["user"]
@@ -1008,21 +1009,20 @@ def _main():
opts.kafka_aggregate_topic = kafka_config["aggregate_topic"]
else:
logger.critical(
"aggregate_topic setting missing from the " "kafka config section"
"aggregate_topic setting missing from the kafka config section"
)
exit(-1)
if "forensic_topic" in kafka_config:
opts.kafka_forensic_topic = kafka_config["forensic_topic"]
else:
logger.critical(
"forensic_topic setting missing from the " "kafka config section"
"forensic_topic setting missing from the kafka config section"
)
if "smtp_tls_topic" in kafka_config:
opts.kafka_smtp_tls_topic = kafka_config["smtp_tls_topic"]
else:
logger.critical(
"forensic_topic setting missing from the "
"splunk_hec config section"
"forensic_topic setting missing from the splunk_hec config section"
)
if "smtp" in config.sections():
@@ -1030,7 +1030,7 @@ def _main():
if "host" in smtp_config:
opts.smtp_host = smtp_config["host"]
else:
logger.critical("host setting missing from the " "smtp config section")
logger.critical("host setting missing from the smtp config section")
exit(-1)
if "port" in smtp_config:
opts.smtp_port = smtp_config.getint("port")
@@ -1042,23 +1042,21 @@ def _main():
if "user" in smtp_config:
opts.smtp_user = smtp_config["user"]
else:
logger.critical("user setting missing from the " "smtp config section")
logger.critical("user setting missing from the smtp config section")
exit(-1)
if "password" in smtp_config:
opts.smtp_password = smtp_config["password"]
else:
logger.critical(
"password setting missing from the " "smtp config section"
)
logger.critical("password setting missing from the smtp config section")
exit(-1)
if "from" in smtp_config:
opts.smtp_from = smtp_config["from"]
else:
logger.critical("from setting missing from the " "smtp config section")
logger.critical("from setting missing from the smtp config section")
if "to" in smtp_config:
opts.smtp_to = _str_to_list(smtp_config["to"])
else:
logger.critical("to setting missing from the " "smtp config section")
logger.critical("to setting missing from the smtp config section")
if "subject" in smtp_config:
opts.smtp_subject = smtp_config["subject"]
if "attachment" in smtp_config:
@@ -1071,7 +1069,7 @@ def _main():
if "bucket" in s3_config:
opts.s3_bucket = s3_config["bucket"]
else:
logger.critical("bucket setting missing from the " "s3 config section")
logger.critical("bucket setting missing from the s3 config section")
exit(-1)
if "path" in s3_config:
opts.s3_path = s3_config["path"]
@@ -1096,9 +1094,7 @@ def _main():
if "server" in syslog_config:
opts.syslog_server = syslog_config["server"]
else:
logger.critical(
"server setting missing from the " "syslog config section"
)
logger.critical("server setting missing from the syslog config section")
exit(-1)
if "port" in syslog_config:
opts.syslog_port = syslog_config["port"]
@@ -1149,17 +1145,17 @@ def _main():
if "host" in gelf_config:
opts.gelf_host = gelf_config["host"]
else:
logger.critical("host setting missing from the " "gelf config section")
logger.critical("host setting missing from the gelf config section")
exit(-1)
if "port" in gelf_config:
opts.gelf_port = gelf_config["port"]
else:
logger.critical("port setting missing from the " "gelf config section")
logger.critical("port setting missing from the gelf config section")
exit(-1)
if "mode" in gelf_config:
opts.gelf_mode = gelf_config["mode"]
else:
logger.critical("mode setting missing from the " "gelf config section")
logger.critical("mode setting missing from the gelf config section")
exit(-1)
if "webhook" in config.sections():
@@ -1185,8 +1181,7 @@ def _main():
try:
fh = logging.FileHandler(opts.log_file, "a")
formatter = logging.Formatter(
"%(asctime)s - "
"%(levelname)s - [%(filename)s:%(lineno)d] - %(message)s"
"%(asctime)s - %(levelname)s - [%(filename)s:%(lineno)d] - %(message)s"
)
fh.setFormatter(formatter)
logger.addHandler(fh)
@@ -1294,7 +1289,7 @@ def _main():
if opts.hec:
if opts.hec_token is None or opts.hec_index is None:
logger.error("HEC token and HEC index are required when " "using HEC URL")
logger.error("HEC token and HEC index are required when using HEC URL")
exit(1)
verify = True
@@ -1457,7 +1452,7 @@ def _main():
try:
if opts.imap_user is None or opts.imap_password is None:
logger.error(
"IMAP user and password must be specified if" "host is specified"
"IMAP user and password must be specified ifhost is specified"
)
ssl = True
@@ -1496,6 +1491,7 @@ def _main():
password=opts.graph_password,
token_file=opts.graph_token_file,
allow_unencrypted_storage=opts.graph_allow_unencrypted_storage,
graph_url=opts.graph_url,
)
except Exception:

View File

@@ -63,9 +63,7 @@ class GmailConnection(MailboxConnection):
).execute()
except HttpError as e:
if e.status_code == 409:
logger.debug(
f"Folder {folder_name} already exists, " f"skipping creation"
)
logger.debug(f"Folder {folder_name} already exists, skipping creation")
else:
raise e

View File

@@ -89,6 +89,7 @@ class MSGraphConnection(MailboxConnection):
self,
auth_method: str,
mailbox: str,
graph_url: str,
client_id: str,
client_secret: str,
username: str,
@@ -108,7 +109,10 @@ class MSGraphConnection(MailboxConnection):
token_path=token_path,
allow_unencrypted_storage=allow_unencrypted_storage,
)
client_params = {"credential": credential}
client_params = {
"credential": credential,
"cloud": graph_url,
}
if not isinstance(credential, ClientSecretCredential):
scopes = ["Mail.ReadWrite"]
# Detect if mailbox is shared
@@ -137,16 +141,16 @@ class MSGraphConnection(MailboxConnection):
request_url = f"/users/{self.mailbox_name}/mailFolders{sub_url}"
resp = self._client.post(request_url, json=request_body)
if resp.status_code == 409:
logger.debug(f"Folder {folder_name} already exists, " f"skipping creation")
logger.debug(f"Folder {folder_name} already exists, skipping creation")
elif resp.status_code == 201:
logger.debug(f"Created folder {folder_name}")
else:
logger.warning(f"Unknown response " f"{resp.status_code} {resp.json()}")
logger.warning(f"Unknown response {resp.status_code} {resp.json()}")
def fetch_messages(self, folder_name: str, **kwargs) -> List[str]:
"""Returns a list of message UIDs in the specified folder"""
folder_id = self._find_folder_id_from_folder_path(folder_name)
url = f"/users/{self.mailbox_name}/mailFolders/" f"{folder_id}/messages"
url = f"/users/{self.mailbox_name}/mailFolders/{folder_id}/messages"
since = kwargs.get("since")
if not since:
since = None
@@ -185,7 +189,7 @@ class MSGraphConnection(MailboxConnection):
resp = self._client.patch(url, json={"isRead": "true"})
if resp.status_code != 200:
raise RuntimeWarning(
f"Failed to mark message read" f"{resp.status_code}: {resp.json()}"
f"Failed to mark message read{resp.status_code}: {resp.json()}"
)
def fetch_message(self, message_id: str, **kwargs):
@@ -193,7 +197,7 @@ class MSGraphConnection(MailboxConnection):
result = self._client.get(url)
if result.status_code != 200:
raise RuntimeWarning(
f"Failed to fetch message" f"{result.status_code}: {result.json()}"
f"Failed to fetch message{result.status_code}: {result.json()}"
)
mark_read = kwargs.get("mark_read")
if mark_read:
@@ -205,7 +209,7 @@ class MSGraphConnection(MailboxConnection):
resp = self._client.delete(url)
if resp.status_code != 204:
raise RuntimeWarning(
f"Failed to delete message " f"{resp.status_code}: {resp.json()}"
f"Failed to delete message {resp.status_code}: {resp.json()}"
)
def move_message(self, message_id: str, folder_name: str):
@@ -215,7 +219,7 @@ class MSGraphConnection(MailboxConnection):
resp = self._client.post(url, json=request_body)
if resp.status_code != 201:
raise RuntimeWarning(
f"Failed to move message " f"{resp.status_code}: {resp.json()}"
f"Failed to move message {resp.status_code}: {resp.json()}"
)
def keepalive(self):
@@ -250,7 +254,7 @@ class MSGraphConnection(MailboxConnection):
filter = f"?$filter=displayName eq '{folder_name}'"
folders_resp = self._client.get(url + filter)
if folders_resp.status_code != 200:
raise RuntimeWarning(f"Failed to list folders." f"{folders_resp.json()}")
raise RuntimeWarning(f"Failed to list folders.{folders_resp.json()}")
folders: list = folders_resp.json()["value"]
matched_folders = [
folder for folder in folders if folder["displayName"] == folder_name

View File

@@ -85,7 +85,5 @@ class IMAPConnection(MailboxConnection):
logger.warning("IMAP connection timeout. Reconnecting...")
sleep(check_timeout)
except Exception as e:
logger.warning(
"IMAP connection error. {0}. " "Reconnecting...".format(e)
)
logger.warning("IMAP connection error. {0}. Reconnecting...".format(e))
sleep(check_timeout)

View File

@@ -344,7 +344,7 @@ def get_service_from_reverse_dns_base_domain(
if not (offline or always_use_local_file) and len(reverse_dns_map) == 0:
try:
logger.debug(f"Trying to fetch " f"reverse DNS map from {url}...")
logger.debug(f"Trying to fetch reverse DNS map from {url}...")
csv_file.write(requests.get(url).text)
csv_file.seek(0)
load_csv(csv_file)