mirror of
https://github.com/domainaware/parsedmarc.git
synced 2026-02-20 16:26:24 +00:00
Compare commits
20 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
989bfd8f07 | ||
|
|
908cc2918c | ||
|
|
bd5774d71d | ||
|
|
8e9112bad3 | ||
|
|
40e041a8af | ||
|
|
7ba433cddb | ||
|
|
6d467c93f9 | ||
|
|
be38e83761 | ||
|
|
ef4e1ac8dc | ||
|
|
39e4c22ecc | ||
|
|
88ff3a2c23 | ||
|
|
d8aee569f7 | ||
|
|
debc28cc6e | ||
|
|
52ccf0536c | ||
|
|
976a3274e6 | ||
|
|
bb722e651a | ||
|
|
ab280d7a34 | ||
|
|
92b12eaacf | ||
|
|
8444053476 | ||
|
|
f618f69c6c |
7
.github/workflows/python-tests.yml
vendored
7
.github/workflows/python-tests.yml
vendored
@@ -39,7 +39,7 @@ jobs:
|
||||
- name: Install Python dependencies
|
||||
run: |
|
||||
python -m pip install --upgrade pip
|
||||
if [ -f requirements.txt ]; then pip install -r requirements.txt; fi
|
||||
pip install .[build]
|
||||
- name: Test building documentation
|
||||
run: |
|
||||
cd docs
|
||||
@@ -49,8 +49,7 @@ jobs:
|
||||
ruff check .
|
||||
- name: Run unit tests
|
||||
run: |
|
||||
coverage run tests.py
|
||||
coverage json
|
||||
pytest --cov --cov-report=xml tests.py
|
||||
- name: Test sample DMARC reports
|
||||
run: |
|
||||
pip install -e .
|
||||
@@ -61,3 +60,5 @@ jobs:
|
||||
hatch build
|
||||
- name: Upload coverage to Codecov
|
||||
uses: codecov/codecov-action@v4
|
||||
with:
|
||||
token: ${{ secrets.CODECOV_TOKEN }}
|
||||
|
||||
1
.vscode/settings.json
vendored
1
.vscode/settings.json
vendored
@@ -101,6 +101,7 @@
|
||||
"sourcetype",
|
||||
"STARTTLS",
|
||||
"tasklist",
|
||||
"timespan",
|
||||
"tlsa",
|
||||
"tlsrpt",
|
||||
"toctree",
|
||||
|
||||
18
CHANGELOG.md
18
CHANGELOG.md
@@ -1,6 +1,24 @@
|
||||
Changelog
|
||||
=========
|
||||
|
||||
8.15.4
|
||||
------
|
||||
|
||||
- Fix crash if aggregate report timespan is > 24 hours
|
||||
|
||||
8.15.3
|
||||
------
|
||||
|
||||
- Ignore aggregate reports with a timespan of > 24 hours (Fixes #282)
|
||||
|
||||
8.15.2
|
||||
------
|
||||
|
||||
- Require `mailsuite>=1.9.18`
|
||||
- Pins `mail-parser` version at `3.15.0` due to a parsing regression in mail-parser `4.0.0`
|
||||
- Parse aggregate reports with empty `<auth_results>`
|
||||
- Do not overwrite the log on each run (PR #569 fixes issue #565)
|
||||
|
||||
8.15.1
|
||||
------
|
||||
|
||||
|
||||
31
Dockerfile
31
Dockerfile
@@ -1,12 +1,35 @@
|
||||
FROM python:3.9-slim
|
||||
ARG BASE_IMAGE=python:3.9-slim
|
||||
ARG USERNAME=parsedmarc
|
||||
ARG USER_UID=1000
|
||||
ARG USER_GID=$USER_UID
|
||||
|
||||
## build
|
||||
|
||||
FROM $BASE_IMAGE AS build
|
||||
|
||||
WORKDIR /app
|
||||
|
||||
RUN pip install hatch
|
||||
|
||||
COPY parsedmarc/ parsedmarc/
|
||||
COPY README.md pyproject.toml ./
|
||||
|
||||
RUN pip install -U pip
|
||||
RUN pip install hatch
|
||||
RUN hatch build
|
||||
RUN pip install dist/*.whl
|
||||
|
||||
## image
|
||||
|
||||
FROM $BASE_IMAGE
|
||||
ARG USERNAME
|
||||
ARG USER_UID
|
||||
ARG USER_GID
|
||||
|
||||
COPY --from=build /app/dist/*.whl /tmp/dist/
|
||||
RUN set -ex; \
|
||||
groupadd --gid ${USER_GID} ${USERNAME}; \
|
||||
useradd --uid ${USER_UID} --gid ${USER_GID} -m ${USERNAME}; \
|
||||
pip install /tmp/dist/*.whl; \
|
||||
rm -rf /tmp/dist
|
||||
|
||||
USER $USERNAME
|
||||
|
||||
ENTRYPOINT ["parsedmarc"]
|
||||
|
||||
3
build.sh
3
build.sh
@@ -7,8 +7,9 @@ if [ ! -d "venv" ]; then
|
||||
fi
|
||||
|
||||
. venv/bin/activate
|
||||
pip install -U -r requirements.txt
|
||||
pip install .[build]
|
||||
ruff format .
|
||||
ruff check .
|
||||
cd docs
|
||||
make clean
|
||||
make html
|
||||
|
||||
@@ -166,6 +166,9 @@ The full set of configuration options are:
|
||||
- `check_timeout` - int: Number of seconds to wait for a IMAP
|
||||
IDLE response or the number of seconds until the next
|
||||
mail check (Default: `30`)
|
||||
- `since` - str: Search for messages since certain time. (Examples: `5m|3h|2d|1w`)
|
||||
Acceptable units - {"m":"minutes", "h":"hours", "d":"days", "w":"weeks"}).
|
||||
Defaults to `1d` if incorrect value is provided.
|
||||
- `imap`
|
||||
- `host` - str: The IMAP server hostname or IP address
|
||||
- `port` - int: The IMAP server port (Default: `993`)
|
||||
|
||||
@@ -17,7 +17,7 @@ import zlib
|
||||
from base64 import b64decode
|
||||
from collections import OrderedDict
|
||||
from csv import DictWriter
|
||||
from datetime import datetime
|
||||
from datetime import datetime, timedelta
|
||||
from io import BytesIO, StringIO
|
||||
from typing import Callable
|
||||
|
||||
@@ -28,13 +28,18 @@ from lxml import etree
|
||||
from mailsuite.smtp import send_email
|
||||
|
||||
from parsedmarc.log import logger
|
||||
from parsedmarc.mail import MailboxConnection
|
||||
from parsedmarc.mail import (
|
||||
MailboxConnection,
|
||||
IMAPConnection,
|
||||
MSGraphConnection,
|
||||
GmailConnection,
|
||||
)
|
||||
from parsedmarc.utils import get_base_domain, get_ip_address_info
|
||||
from parsedmarc.utils import is_outlook_msg, convert_outlook_msg
|
||||
from parsedmarc.utils import parse_email
|
||||
from parsedmarc.utils import timestamp_to_human, human_timestamp_to_datetime
|
||||
|
||||
__version__ = "8.15.1"
|
||||
__version__ = "8.15.4"
|
||||
|
||||
logger.debug("parsedmarc v{0}".format(__version__))
|
||||
|
||||
@@ -169,7 +174,7 @@ def _parse_report_record(
|
||||
else:
|
||||
lowered_from = ""
|
||||
new_record["identifiers"]["header_from"] = lowered_from
|
||||
if record["auth_results"] is not None:
|
||||
if isinstance(record["auth_results"], dict):
|
||||
auth_results = record["auth_results"].copy()
|
||||
if "spf" not in auth_results:
|
||||
auth_results["spf"] = []
|
||||
@@ -519,7 +524,7 @@ def parse_aggregate_report_xml(
|
||||
date_range = report["report_metadata"]["date_range"]
|
||||
if int(date_range["end"]) - int(date_range["begin"]) > 2 * 86400:
|
||||
_error = "Time span > 24 hours - RFC 7489 section 7.2"
|
||||
errors.append(_error)
|
||||
raise InvalidAggregateReport(_error)
|
||||
date_range["begin"] = timestamp_to_human(date_range["begin"])
|
||||
date_range["end"] = timestamp_to_human(date_range["end"])
|
||||
new_report_metadata["begin_date"] = date_range["begin"]
|
||||
@@ -1499,6 +1504,7 @@ def get_dmarc_reports_from_mailbox(
|
||||
strip_attachment_payloads=False,
|
||||
results=None,
|
||||
batch_size=10,
|
||||
since=None,
|
||||
create_folders=True,
|
||||
):
|
||||
"""
|
||||
@@ -1522,6 +1528,8 @@ def get_dmarc_reports_from_mailbox(
|
||||
results (dict): Results from the previous run
|
||||
batch_size (int): Number of messages to read and process before saving
|
||||
(use 0 for no limit)
|
||||
since: Search for messages since certain time
|
||||
(units - {"m":"minutes", "h":"hours", "d":"days", "w":"weeks"})
|
||||
create_folders (bool): Whether to create the destination folders
|
||||
(not used in watch)
|
||||
|
||||
@@ -1534,6 +1542,9 @@ def get_dmarc_reports_from_mailbox(
|
||||
if connection is None:
|
||||
raise ValueError("Must supply a connection")
|
||||
|
||||
# current_time useful to fetch_messages later in the program
|
||||
current_time = None
|
||||
|
||||
aggregate_reports = []
|
||||
forensic_reports = []
|
||||
smtp_tls_reports = []
|
||||
@@ -1557,11 +1568,50 @@ def get_dmarc_reports_from_mailbox(
|
||||
connection.create_folder(smtp_tls_reports_folder)
|
||||
connection.create_folder(invalid_reports_folder)
|
||||
|
||||
messages = connection.fetch_messages(reports_folder, batch_size=batch_size)
|
||||
if since:
|
||||
_since = 1440 # default one day
|
||||
if re.match(r"\d+[mhd]$", since):
|
||||
s = re.split(r"(\d+)", since)
|
||||
if s[2] == "m":
|
||||
_since = int(s[1])
|
||||
elif s[2] == "h":
|
||||
_since = int(s[1]) * 60
|
||||
elif s[2] == "d":
|
||||
_since = int(s[1]) * 60 * 24
|
||||
elif s[2] == "w":
|
||||
_since = int(s[1]) * 60 * 24 * 7
|
||||
else:
|
||||
logger.warning(
|
||||
"Incorrect format for 'since' option. \
|
||||
Provided value:{0}, Expected values:(5m|3h|2d|1w). \
|
||||
Ignoring option, fetching messages for last 24hrs"
|
||||
"SMTP does not support a time or timezone in since."
|
||||
"See https://www.rfc-editor.org/rfc/rfc3501#page-52".format(since)
|
||||
)
|
||||
|
||||
if isinstance(connection, IMAPConnection):
|
||||
logger.debug(
|
||||
"Only days and weeks values in 'since' option are \
|
||||
considered for IMAP conections. Examples: 2d or 1w"
|
||||
)
|
||||
since = (datetime.utcnow() - timedelta(minutes=_since)).date()
|
||||
current_time = datetime.utcnow().date()
|
||||
elif isinstance(connection, MSGraphConnection):
|
||||
since = (datetime.utcnow() - timedelta(minutes=_since)).isoformat() + "Z"
|
||||
current_time = datetime.utcnow().isoformat() + "Z"
|
||||
elif isinstance(connection, GmailConnection):
|
||||
since = (datetime.utcnow() - timedelta(minutes=_since)).strftime("%s")
|
||||
current_time = datetime.utcnow().strftime("%s")
|
||||
else:
|
||||
pass
|
||||
|
||||
messages = connection.fetch_messages(
|
||||
reports_folder, batch_size=batch_size, since=since
|
||||
)
|
||||
total_messages = len(messages)
|
||||
logger.debug("Found {0} messages in {1}".format(len(messages), reports_folder))
|
||||
|
||||
if batch_size:
|
||||
if batch_size and not since:
|
||||
message_limit = min(total_messages, batch_size)
|
||||
else:
|
||||
message_limit = total_messages
|
||||
@@ -1575,7 +1625,13 @@ def get_dmarc_reports_from_mailbox(
|
||||
i + 1, message_limit, msg_uid
|
||||
)
|
||||
)
|
||||
msg_content = connection.fetch_message(msg_uid)
|
||||
if isinstance(mailbox, MSGraphConnection):
|
||||
if test:
|
||||
msg_content = connection.fetch_message(msg_uid, mark_read=False)
|
||||
else:
|
||||
msg_content = connection.fetch_message(msg_uid, mark_read=True)
|
||||
else:
|
||||
msg_content = connection.fetch_message(msg_uid)
|
||||
try:
|
||||
sa = strip_attachment_payloads
|
||||
parsed_email = parse_report_email(
|
||||
@@ -1706,7 +1762,12 @@ def get_dmarc_reports_from_mailbox(
|
||||
]
|
||||
)
|
||||
|
||||
total_messages = len(connection.fetch_messages(reports_folder))
|
||||
if current_time:
|
||||
total_messages = len(
|
||||
connection.fetch_messages(reports_folder, since=current_time)
|
||||
)
|
||||
else:
|
||||
total_messages = len(connection.fetch_messages(reports_folder))
|
||||
|
||||
if not test and not batch_size and total_messages > 0:
|
||||
# Process emails that came in during the last run
|
||||
@@ -1725,6 +1786,7 @@ def get_dmarc_reports_from_mailbox(
|
||||
reverse_dns_map_path=reverse_dns_map_path,
|
||||
reverse_dns_map_url=reverse_dns_map_url,
|
||||
offline=offline,
|
||||
since=current_time,
|
||||
)
|
||||
|
||||
return results
|
||||
|
||||
@@ -510,6 +510,7 @@ def _main():
|
||||
mailbox_test=False,
|
||||
mailbox_batch_size=10,
|
||||
mailbox_check_timeout=30,
|
||||
mailbox_since=None,
|
||||
imap_host=None,
|
||||
imap_skip_certificate_verification=False,
|
||||
imap_ssl=True,
|
||||
@@ -714,6 +715,8 @@ def _main():
|
||||
opts.mailbox_batch_size = mailbox_config.getint("batch_size")
|
||||
if "check_timeout" in mailbox_config:
|
||||
opts.mailbox_check_timeout = mailbox_config.getint("check_timeout")
|
||||
if "since" in mailbox_config:
|
||||
opts.mailbox_since = mailbox_config["since"]
|
||||
|
||||
if "imap" in config.sections():
|
||||
imap_config = config["imap"]
|
||||
@@ -1179,9 +1182,7 @@ def _main():
|
||||
logger.setLevel(logging.DEBUG)
|
||||
if opts.log_file:
|
||||
try:
|
||||
log_file = open(opts.log_file, "w")
|
||||
log_file.close()
|
||||
fh = logging.FileHandler(opts.log_file)
|
||||
fh = logging.FileHandler(opts.log_file, "a")
|
||||
formatter = logging.Formatter(
|
||||
"%(asctime)s - "
|
||||
"%(levelname)s - [%(filename)s:%(lineno)d] - %(message)s"
|
||||
@@ -1542,6 +1543,7 @@ def _main():
|
||||
nameservers=opts.nameservers,
|
||||
test=opts.mailbox_test,
|
||||
strip_attachment_payloads=opts.strip_attachment_payloads,
|
||||
since=opts.mailbox_since,
|
||||
)
|
||||
|
||||
aggregate_reports += reports["aggregate_reports"]
|
||||
|
||||
@@ -69,18 +69,32 @@ class GmailConnection(MailboxConnection):
|
||||
else:
|
||||
raise e
|
||||
|
||||
def _fetch_all_message_ids(self, reports_label_id, page_token=None):
|
||||
results = (
|
||||
self.service.users()
|
||||
.messages()
|
||||
.list(
|
||||
userId="me",
|
||||
includeSpamTrash=self.include_spam_trash,
|
||||
labelIds=[reports_label_id],
|
||||
pageToken=page_token,
|
||||
def _fetch_all_message_ids(self, reports_label_id, page_token=None, since=None):
|
||||
if since:
|
||||
results = (
|
||||
self.service.users()
|
||||
.messages()
|
||||
.list(
|
||||
userId="me",
|
||||
includeSpamTrash=self.include_spam_trash,
|
||||
labelIds=[reports_label_id],
|
||||
pageToken=page_token,
|
||||
q=f"after:{since}",
|
||||
)
|
||||
.execute()
|
||||
)
|
||||
else:
|
||||
results = (
|
||||
self.service.users()
|
||||
.messages()
|
||||
.list(
|
||||
userId="me",
|
||||
includeSpamTrash=self.include_spam_trash,
|
||||
labelIds=[reports_label_id],
|
||||
pageToken=page_token,
|
||||
)
|
||||
.execute()
|
||||
)
|
||||
.execute()
|
||||
)
|
||||
messages = results.get("messages", [])
|
||||
for message in messages:
|
||||
yield message["id"]
|
||||
@@ -92,7 +106,13 @@ class GmailConnection(MailboxConnection):
|
||||
|
||||
def fetch_messages(self, reports_folder: str, **kwargs) -> List[str]:
|
||||
reports_label_id = self._find_label_id_for_label(reports_folder)
|
||||
return [id for id in self._fetch_all_message_ids(reports_label_id)]
|
||||
since = kwargs.get("since")
|
||||
if since:
|
||||
return [
|
||||
id for id in self._fetch_all_message_ids(reports_label_id, since=since)
|
||||
]
|
||||
else:
|
||||
return [id for id in self._fetch_all_message_ids(reports_label_id)]
|
||||
|
||||
def fetch_message(self, message_id):
|
||||
msg = (
|
||||
|
||||
@@ -147,15 +147,20 @@ class MSGraphConnection(MailboxConnection):
|
||||
"""Returns a list of message UIDs in the specified folder"""
|
||||
folder_id = self._find_folder_id_from_folder_path(folder_name)
|
||||
url = f"/users/{self.mailbox_name}/mailFolders/" f"{folder_id}/messages"
|
||||
since = kwargs.get("since")
|
||||
if not since:
|
||||
since = None
|
||||
batch_size = kwargs.get("batch_size")
|
||||
if not batch_size:
|
||||
batch_size = 0
|
||||
emails = self._get_all_messages(url, batch_size)
|
||||
emails = self._get_all_messages(url, batch_size, since)
|
||||
return [email["id"] for email in emails]
|
||||
|
||||
def _get_all_messages(self, url, batch_size):
|
||||
def _get_all_messages(self, url, batch_size, since):
|
||||
messages: list
|
||||
params = {"$select": "id"}
|
||||
if since:
|
||||
params["$filter"] = f"receivedDateTime ge {since}"
|
||||
if batch_size and batch_size > 0:
|
||||
params["$top"] = batch_size
|
||||
else:
|
||||
@@ -166,7 +171,7 @@ class MSGraphConnection(MailboxConnection):
|
||||
messages = result.json()["value"]
|
||||
# Loop if next page is present and not obtained message limit.
|
||||
while "@odata.nextLink" in result.json() and (
|
||||
batch_size == 0 or batch_size - len(messages) > 0
|
||||
since is not None or (batch_size == 0 or batch_size - len(messages) > 0)
|
||||
):
|
||||
result = self._client.get(result.json()["@odata.nextLink"])
|
||||
if result.status_code != 200:
|
||||
@@ -183,14 +188,16 @@ class MSGraphConnection(MailboxConnection):
|
||||
f"Failed to mark message read" f"{resp.status_code}: {resp.json()}"
|
||||
)
|
||||
|
||||
def fetch_message(self, message_id: str):
|
||||
def fetch_message(self, message_id: str, **kwargs):
|
||||
url = f"/users/{self.mailbox_name}/messages/{message_id}/$value"
|
||||
result = self._client.get(url)
|
||||
if result.status_code != 200:
|
||||
raise RuntimeWarning(
|
||||
f"Failed to fetch message" f"{result.status_code}: {result.json()}"
|
||||
)
|
||||
self.mark_message_read(message_id)
|
||||
mark_read = kwargs.get("mark_read")
|
||||
if mark_read:
|
||||
self.mark_message_read(message_id)
|
||||
return result.text
|
||||
|
||||
def delete_message(self, message_id: str):
|
||||
|
||||
@@ -39,7 +39,11 @@ class IMAPConnection(MailboxConnection):
|
||||
|
||||
def fetch_messages(self, reports_folder: str, **kwargs):
|
||||
self._client.select_folder(reports_folder)
|
||||
return self._client.search()
|
||||
since = kwargs.get("since")
|
||||
if since:
|
||||
return self._client.search(["SINCE", since])
|
||||
else:
|
||||
return self._client.search()
|
||||
|
||||
def fetch_message(self, message_id):
|
||||
return self._client.fetch_message(message_id, parse=False)
|
||||
|
||||
@@ -46,15 +46,27 @@ dependencies = [
|
||||
"imapclient>=2.1.0",
|
||||
"kafka-python-ng>=2.2.2",
|
||||
"lxml>=4.4.0",
|
||||
"mailsuite>=1.9.17",
|
||||
"mailsuite>=1.9.18",
|
||||
"msgraph-core==0.2.2",
|
||||
"opensearch-py>=2.4.2,<=3.0.0",
|
||||
"publicsuffixlist>=0.10.0",
|
||||
"pygelf>=0.4.2",
|
||||
"requests>=2.22.0",
|
||||
"tqdm>=4.31.1",
|
||||
"urllib3>=1.25.7",
|
||||
"xmltodict>=0.12.0",
|
||||
"pygelf>=0.4.2",
|
||||
]
|
||||
|
||||
[project.optional-dependencies]
|
||||
build = [
|
||||
"hatch",
|
||||
"myst-parser[linkify]",
|
||||
"nose",
|
||||
"pytest",
|
||||
"pytest-cov",
|
||||
"ruff",
|
||||
"sphinx",
|
||||
"sphinx_rtd_theme",
|
||||
]
|
||||
|
||||
[project.scripts]
|
||||
|
||||
@@ -1,46 +0,0 @@
|
||||
tqdm>=4.31.1
|
||||
pygments>=2.11.1
|
||||
dnspython>=2.0.0
|
||||
expiringdict>=1.1.4
|
||||
urllib3>=1.25.7
|
||||
requests>=2.22.0
|
||||
publicsuffixlist>=0.10.0
|
||||
xmltodict>=0.12.0
|
||||
geoip2>=3.0.0
|
||||
imapclient>=2.1.0
|
||||
dateparser>=1.1.1
|
||||
elasticsearch<7.14.0
|
||||
elasticsearch-dsl>=7.4.0
|
||||
opensearch-py>=2.4.2,<=3.0.0
|
||||
kafka-python-ng>=2.2.2
|
||||
mailsuite>=1.9.17
|
||||
pygelf
|
||||
nose>=1.3.7
|
||||
wheel>=0.37.0
|
||||
ruff
|
||||
jinja2>=2.10.1
|
||||
packaging>=19.1
|
||||
imagesize>=1.1.0
|
||||
alabaster>=0.7.12
|
||||
Babel>=2.7.0
|
||||
docutils<0.18,>=0.14
|
||||
sphinx>=1.0.5
|
||||
sphinx_rtd_theme>=0.4.3
|
||||
codecov>=2.0.15
|
||||
lxml>=4.4.0
|
||||
boto3>=1.16.63
|
||||
msgraph-core==0.2.2
|
||||
azure-identity>=1.8.0
|
||||
azure-monitor-ingestion>=1.0.0
|
||||
google-api-core>=2.4.0
|
||||
google-api-python-client>=2.35.0
|
||||
google-auth>=2.3.3
|
||||
google-auth-httplib2>=0.1.0
|
||||
google-auth-oauthlib>=0.4.6
|
||||
hatch>=1.5.0
|
||||
myst-parser>=0.18.0
|
||||
myst-parser[linkify]
|
||||
requests
|
||||
bs4
|
||||
pytest
|
||||
|
||||
Reference in New Issue
Block a user