mirror of
https://github.com/domainaware/parsedmarc.git
synced 2026-02-18 23:46:25 +00:00
Compare commits
16 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
617b7c5b4a | ||
|
|
989bfd8f07 | ||
|
|
908cc2918c | ||
|
|
bd5774d71d | ||
|
|
8e9112bad3 | ||
|
|
40e041a8af | ||
|
|
7ba433cddb | ||
|
|
6d467c93f9 | ||
|
|
be38e83761 | ||
|
|
ef4e1ac8dc | ||
|
|
39e4c22ecc | ||
|
|
88ff3a2c23 | ||
|
|
d8aee569f7 | ||
|
|
debc28cc6e | ||
|
|
52ccf0536c | ||
|
|
f618f69c6c |
1
.vscode/settings.json
vendored
1
.vscode/settings.json
vendored
@@ -101,6 +101,7 @@
|
||||
"sourcetype",
|
||||
"STARTTLS",
|
||||
"tasklist",
|
||||
"timespan",
|
||||
"tlsa",
|
||||
"tlsrpt",
|
||||
"toctree",
|
||||
|
||||
15
CHANGELOG.md
15
CHANGELOG.md
@@ -1,6 +1,21 @@
|
||||
Changelog
|
||||
=========
|
||||
|
||||
8.16.0
|
||||
------
|
||||
|
||||
- Add a `since` option to only search for emails since a certain time (PR #527)
|
||||
|
||||
8.15.4
|
||||
------
|
||||
|
||||
- Fix crash if aggregate report timespan is > 24 hours
|
||||
|
||||
8.15.3
|
||||
------
|
||||
|
||||
- Ignore aggregate reports with a timespan of > 24 hours (Fixes #282)
|
||||
|
||||
8.15.2
|
||||
------
|
||||
|
||||
|
||||
1
build.sh
1
build.sh
@@ -9,6 +9,7 @@ fi
|
||||
. venv/bin/activate
|
||||
pip install .[build]
|
||||
ruff format .
|
||||
ruff check .
|
||||
cd docs
|
||||
make clean
|
||||
make html
|
||||
|
||||
@@ -166,6 +166,9 @@ The full set of configuration options are:
|
||||
- `check_timeout` - int: Number of seconds to wait for a IMAP
|
||||
IDLE response or the number of seconds until the next
|
||||
mail check (Default: `30`)
|
||||
- `since` - str: Search for messages since certain time. (Examples: `5m|3h|2d|1w`)
|
||||
Acceptable units - {"m":"minutes", "h":"hours", "d":"days", "w":"weeks"}).
|
||||
Defaults to `1d` if incorrect value is provided.
|
||||
- `imap`
|
||||
- `host` - str: The IMAP server hostname or IP address
|
||||
- `port` - int: The IMAP server port (Default: `993`)
|
||||
|
||||
@@ -17,7 +17,7 @@ import zlib
|
||||
from base64 import b64decode
|
||||
from collections import OrderedDict
|
||||
from csv import DictWriter
|
||||
from datetime import datetime
|
||||
from datetime import datetime, timedelta
|
||||
from io import BytesIO, StringIO
|
||||
from typing import Callable
|
||||
|
||||
@@ -28,13 +28,18 @@ from lxml import etree
|
||||
from mailsuite.smtp import send_email
|
||||
|
||||
from parsedmarc.log import logger
|
||||
from parsedmarc.mail import MailboxConnection
|
||||
from parsedmarc.mail import (
|
||||
MailboxConnection,
|
||||
IMAPConnection,
|
||||
MSGraphConnection,
|
||||
GmailConnection,
|
||||
)
|
||||
from parsedmarc.utils import get_base_domain, get_ip_address_info
|
||||
from parsedmarc.utils import is_outlook_msg, convert_outlook_msg
|
||||
from parsedmarc.utils import parse_email
|
||||
from parsedmarc.utils import timestamp_to_human, human_timestamp_to_datetime
|
||||
|
||||
__version__ = "8.15.2"
|
||||
__version__ = "8.16.0"
|
||||
|
||||
logger.debug("parsedmarc v{0}".format(__version__))
|
||||
|
||||
@@ -519,7 +524,7 @@ def parse_aggregate_report_xml(
|
||||
date_range = report["report_metadata"]["date_range"]
|
||||
if int(date_range["end"]) - int(date_range["begin"]) > 2 * 86400:
|
||||
_error = "Time span > 24 hours - RFC 7489 section 7.2"
|
||||
errors.append(_error)
|
||||
raise InvalidAggregateReport(_error)
|
||||
date_range["begin"] = timestamp_to_human(date_range["begin"])
|
||||
date_range["end"] = timestamp_to_human(date_range["end"])
|
||||
new_report_metadata["begin_date"] = date_range["begin"]
|
||||
@@ -1499,6 +1504,7 @@ def get_dmarc_reports_from_mailbox(
|
||||
strip_attachment_payloads=False,
|
||||
results=None,
|
||||
batch_size=10,
|
||||
since=None,
|
||||
create_folders=True,
|
||||
):
|
||||
"""
|
||||
@@ -1522,6 +1528,8 @@ def get_dmarc_reports_from_mailbox(
|
||||
results (dict): Results from the previous run
|
||||
batch_size (int): Number of messages to read and process before saving
|
||||
(use 0 for no limit)
|
||||
since: Search for messages since certain time
|
||||
(units - {"m":"minutes", "h":"hours", "d":"days", "w":"weeks"})
|
||||
create_folders (bool): Whether to create the destination folders
|
||||
(not used in watch)
|
||||
|
||||
@@ -1534,6 +1542,9 @@ def get_dmarc_reports_from_mailbox(
|
||||
if connection is None:
|
||||
raise ValueError("Must supply a connection")
|
||||
|
||||
# current_time useful to fetch_messages later in the program
|
||||
current_time = None
|
||||
|
||||
aggregate_reports = []
|
||||
forensic_reports = []
|
||||
smtp_tls_reports = []
|
||||
@@ -1557,11 +1568,50 @@ def get_dmarc_reports_from_mailbox(
|
||||
connection.create_folder(smtp_tls_reports_folder)
|
||||
connection.create_folder(invalid_reports_folder)
|
||||
|
||||
messages = connection.fetch_messages(reports_folder, batch_size=batch_size)
|
||||
if since:
|
||||
_since = 1440 # default one day
|
||||
if re.match(r"\d+[mhd]$", since):
|
||||
s = re.split(r"(\d+)", since)
|
||||
if s[2] == "m":
|
||||
_since = int(s[1])
|
||||
elif s[2] == "h":
|
||||
_since = int(s[1]) * 60
|
||||
elif s[2] == "d":
|
||||
_since = int(s[1]) * 60 * 24
|
||||
elif s[2] == "w":
|
||||
_since = int(s[1]) * 60 * 24 * 7
|
||||
else:
|
||||
logger.warning(
|
||||
"Incorrect format for 'since' option. \
|
||||
Provided value:{0}, Expected values:(5m|3h|2d|1w). \
|
||||
Ignoring option, fetching messages for last 24hrs"
|
||||
"SMTP does not support a time or timezone in since."
|
||||
"See https://www.rfc-editor.org/rfc/rfc3501#page-52".format(since)
|
||||
)
|
||||
|
||||
if isinstance(connection, IMAPConnection):
|
||||
logger.debug(
|
||||
"Only days and weeks values in 'since' option are \
|
||||
considered for IMAP conections. Examples: 2d or 1w"
|
||||
)
|
||||
since = (datetime.utcnow() - timedelta(minutes=_since)).date()
|
||||
current_time = datetime.utcnow().date()
|
||||
elif isinstance(connection, MSGraphConnection):
|
||||
since = (datetime.utcnow() - timedelta(minutes=_since)).isoformat() + "Z"
|
||||
current_time = datetime.utcnow().isoformat() + "Z"
|
||||
elif isinstance(connection, GmailConnection):
|
||||
since = (datetime.utcnow() - timedelta(minutes=_since)).strftime("%s")
|
||||
current_time = datetime.utcnow().strftime("%s")
|
||||
else:
|
||||
pass
|
||||
|
||||
messages = connection.fetch_messages(
|
||||
reports_folder, batch_size=batch_size, since=since
|
||||
)
|
||||
total_messages = len(messages)
|
||||
logger.debug("Found {0} messages in {1}".format(len(messages), reports_folder))
|
||||
|
||||
if batch_size:
|
||||
if batch_size and not since:
|
||||
message_limit = min(total_messages, batch_size)
|
||||
else:
|
||||
message_limit = total_messages
|
||||
@@ -1575,7 +1625,13 @@ def get_dmarc_reports_from_mailbox(
|
||||
i + 1, message_limit, msg_uid
|
||||
)
|
||||
)
|
||||
msg_content = connection.fetch_message(msg_uid)
|
||||
if isinstance(mailbox, MSGraphConnection):
|
||||
if test:
|
||||
msg_content = connection.fetch_message(msg_uid, mark_read=False)
|
||||
else:
|
||||
msg_content = connection.fetch_message(msg_uid, mark_read=True)
|
||||
else:
|
||||
msg_content = connection.fetch_message(msg_uid)
|
||||
try:
|
||||
sa = strip_attachment_payloads
|
||||
parsed_email = parse_report_email(
|
||||
@@ -1706,7 +1762,12 @@ def get_dmarc_reports_from_mailbox(
|
||||
]
|
||||
)
|
||||
|
||||
total_messages = len(connection.fetch_messages(reports_folder))
|
||||
if current_time:
|
||||
total_messages = len(
|
||||
connection.fetch_messages(reports_folder, since=current_time)
|
||||
)
|
||||
else:
|
||||
total_messages = len(connection.fetch_messages(reports_folder))
|
||||
|
||||
if not test and not batch_size and total_messages > 0:
|
||||
# Process emails that came in during the last run
|
||||
@@ -1725,6 +1786,7 @@ def get_dmarc_reports_from_mailbox(
|
||||
reverse_dns_map_path=reverse_dns_map_path,
|
||||
reverse_dns_map_url=reverse_dns_map_url,
|
||||
offline=offline,
|
||||
since=current_time,
|
||||
)
|
||||
|
||||
return results
|
||||
|
||||
@@ -510,6 +510,7 @@ def _main():
|
||||
mailbox_test=False,
|
||||
mailbox_batch_size=10,
|
||||
mailbox_check_timeout=30,
|
||||
mailbox_since=None,
|
||||
imap_host=None,
|
||||
imap_skip_certificate_verification=False,
|
||||
imap_ssl=True,
|
||||
@@ -714,6 +715,8 @@ def _main():
|
||||
opts.mailbox_batch_size = mailbox_config.getint("batch_size")
|
||||
if "check_timeout" in mailbox_config:
|
||||
opts.mailbox_check_timeout = mailbox_config.getint("check_timeout")
|
||||
if "since" in mailbox_config:
|
||||
opts.mailbox_since = mailbox_config["since"]
|
||||
|
||||
if "imap" in config.sections():
|
||||
imap_config = config["imap"]
|
||||
@@ -1540,6 +1543,7 @@ def _main():
|
||||
nameservers=opts.nameservers,
|
||||
test=opts.mailbox_test,
|
||||
strip_attachment_payloads=opts.strip_attachment_payloads,
|
||||
since=opts.mailbox_since,
|
||||
)
|
||||
|
||||
aggregate_reports += reports["aggregate_reports"]
|
||||
|
||||
@@ -69,18 +69,32 @@ class GmailConnection(MailboxConnection):
|
||||
else:
|
||||
raise e
|
||||
|
||||
def _fetch_all_message_ids(self, reports_label_id, page_token=None):
|
||||
results = (
|
||||
self.service.users()
|
||||
.messages()
|
||||
.list(
|
||||
userId="me",
|
||||
includeSpamTrash=self.include_spam_trash,
|
||||
labelIds=[reports_label_id],
|
||||
pageToken=page_token,
|
||||
def _fetch_all_message_ids(self, reports_label_id, page_token=None, since=None):
|
||||
if since:
|
||||
results = (
|
||||
self.service.users()
|
||||
.messages()
|
||||
.list(
|
||||
userId="me",
|
||||
includeSpamTrash=self.include_spam_trash,
|
||||
labelIds=[reports_label_id],
|
||||
pageToken=page_token,
|
||||
q=f"after:{since}",
|
||||
)
|
||||
.execute()
|
||||
)
|
||||
else:
|
||||
results = (
|
||||
self.service.users()
|
||||
.messages()
|
||||
.list(
|
||||
userId="me",
|
||||
includeSpamTrash=self.include_spam_trash,
|
||||
labelIds=[reports_label_id],
|
||||
pageToken=page_token,
|
||||
)
|
||||
.execute()
|
||||
)
|
||||
.execute()
|
||||
)
|
||||
messages = results.get("messages", [])
|
||||
for message in messages:
|
||||
yield message["id"]
|
||||
@@ -92,7 +106,13 @@ class GmailConnection(MailboxConnection):
|
||||
|
||||
def fetch_messages(self, reports_folder: str, **kwargs) -> List[str]:
|
||||
reports_label_id = self._find_label_id_for_label(reports_folder)
|
||||
return [id for id in self._fetch_all_message_ids(reports_label_id)]
|
||||
since = kwargs.get("since")
|
||||
if since:
|
||||
return [
|
||||
id for id in self._fetch_all_message_ids(reports_label_id, since=since)
|
||||
]
|
||||
else:
|
||||
return [id for id in self._fetch_all_message_ids(reports_label_id)]
|
||||
|
||||
def fetch_message(self, message_id):
|
||||
msg = (
|
||||
|
||||
@@ -147,15 +147,20 @@ class MSGraphConnection(MailboxConnection):
|
||||
"""Returns a list of message UIDs in the specified folder"""
|
||||
folder_id = self._find_folder_id_from_folder_path(folder_name)
|
||||
url = f"/users/{self.mailbox_name}/mailFolders/" f"{folder_id}/messages"
|
||||
since = kwargs.get("since")
|
||||
if not since:
|
||||
since = None
|
||||
batch_size = kwargs.get("batch_size")
|
||||
if not batch_size:
|
||||
batch_size = 0
|
||||
emails = self._get_all_messages(url, batch_size)
|
||||
emails = self._get_all_messages(url, batch_size, since)
|
||||
return [email["id"] for email in emails]
|
||||
|
||||
def _get_all_messages(self, url, batch_size):
|
||||
def _get_all_messages(self, url, batch_size, since):
|
||||
messages: list
|
||||
params = {"$select": "id"}
|
||||
if since:
|
||||
params["$filter"] = f"receivedDateTime ge {since}"
|
||||
if batch_size and batch_size > 0:
|
||||
params["$top"] = batch_size
|
||||
else:
|
||||
@@ -166,7 +171,7 @@ class MSGraphConnection(MailboxConnection):
|
||||
messages = result.json()["value"]
|
||||
# Loop if next page is present and not obtained message limit.
|
||||
while "@odata.nextLink" in result.json() and (
|
||||
batch_size == 0 or batch_size - len(messages) > 0
|
||||
since is not None or (batch_size == 0 or batch_size - len(messages) > 0)
|
||||
):
|
||||
result = self._client.get(result.json()["@odata.nextLink"])
|
||||
if result.status_code != 200:
|
||||
@@ -183,14 +188,16 @@ class MSGraphConnection(MailboxConnection):
|
||||
f"Failed to mark message read" f"{resp.status_code}: {resp.json()}"
|
||||
)
|
||||
|
||||
def fetch_message(self, message_id: str):
|
||||
def fetch_message(self, message_id: str, **kwargs):
|
||||
url = f"/users/{self.mailbox_name}/messages/{message_id}/$value"
|
||||
result = self._client.get(url)
|
||||
if result.status_code != 200:
|
||||
raise RuntimeWarning(
|
||||
f"Failed to fetch message" f"{result.status_code}: {result.json()}"
|
||||
)
|
||||
self.mark_message_read(message_id)
|
||||
mark_read = kwargs.get("mark_read")
|
||||
if mark_read:
|
||||
self.mark_message_read(message_id)
|
||||
return result.text
|
||||
|
||||
def delete_message(self, message_id: str):
|
||||
|
||||
@@ -39,7 +39,11 @@ class IMAPConnection(MailboxConnection):
|
||||
|
||||
def fetch_messages(self, reports_folder: str, **kwargs):
|
||||
self._client.select_folder(reports_folder)
|
||||
return self._client.search()
|
||||
since = kwargs.get("since")
|
||||
if since:
|
||||
return self._client.search(["SINCE", since])
|
||||
else:
|
||||
return self._client.search()
|
||||
|
||||
def fetch_message(self, message_id):
|
||||
return self._client.fetch_message(message_id, parse=False)
|
||||
|
||||
Reference in New Issue
Block a user