mirror of
https://github.com/domainaware/parsedmarc.git
synced 2026-02-18 15:36:24 +00:00
Compare commits
86 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
918501ccb5 | ||
|
|
036c372ea3 | ||
|
|
a969d83137 | ||
|
|
e299f7d161 | ||
|
|
4c04418dae | ||
|
|
2ca9373ed0 | ||
|
|
961ef6d804 | ||
|
|
573ba1e3e9 | ||
|
|
1d8af3ccff | ||
|
|
8426daa26b | ||
|
|
d1531b86f2 | ||
|
|
8bb046798c | ||
|
|
d64e12548a | ||
|
|
380479cbf1 | ||
|
|
ace21c8084 | ||
|
|
1a1aef21ad | ||
|
|
532dbbdb7e | ||
|
|
45738ae688 | ||
|
|
9d77bd64bc | ||
|
|
140290221d | ||
|
|
187d61b770 | ||
|
|
0443b7365e | ||
|
|
d7b887a835 | ||
|
|
a805733221 | ||
|
|
9552c3ac92 | ||
|
|
5273948be0 | ||
|
|
b51756b8bd | ||
|
|
7fa7c24cb8 | ||
|
|
972237ae7e | ||
|
|
6e5333a342 | ||
|
|
47b074c80b | ||
|
|
a1cfeb3081 | ||
|
|
c7c451b1b1 | ||
|
|
669deb9755 | ||
|
|
446c018920 | ||
|
|
38c6f86973 | ||
|
|
62ccc11925 | ||
|
|
c32ca3cae3 | ||
|
|
010f1f84a7 | ||
|
|
7da57c6382 | ||
|
|
d08e29a306 | ||
|
|
e1e53ad4cb | ||
|
|
4670e9687d | ||
|
|
7f8a2c08cd | ||
|
|
e9c05dd0bf | ||
|
|
9348a474dd | ||
|
|
e0decaba8c | ||
|
|
26a651cded | ||
|
|
bcfcd93fc6 | ||
|
|
54d5ed3543 | ||
|
|
1efbc87e0e | ||
|
|
e78e7f64af | ||
|
|
ad9de65b99 | ||
|
|
b9df12700b | ||
|
|
20843b920f | ||
|
|
e5ae89fedf | ||
|
|
f148cff11c | ||
|
|
4583769e04 | ||
|
|
0ecb80b27c | ||
|
|
b8e62e6d3b | ||
|
|
c67953a2c5 | ||
|
|
27dff4298c | ||
|
|
f2133aacd4 | ||
|
|
31917e58a9 | ||
|
|
bffb98d217 | ||
|
|
1f93b3a7ea | ||
|
|
88debb9729 | ||
|
|
a8a5564780 | ||
|
|
1e26f95b7b | ||
|
|
82b48e4d01 | ||
|
|
617b7c5b4a | ||
|
|
989bfd8f07 | ||
|
|
908cc2918c | ||
|
|
bd5774d71d | ||
|
|
8e9112bad3 | ||
|
|
40e041a8af | ||
|
|
7ba433cddb | ||
|
|
6d467c93f9 | ||
|
|
be38e83761 | ||
|
|
ef4e1ac8dc | ||
|
|
39e4c22ecc | ||
|
|
88ff3a2c23 | ||
|
|
d8aee569f7 | ||
|
|
debc28cc6e | ||
|
|
52ccf0536c | ||
|
|
f618f69c6c |
24
.github/workflows/python-tests.yml
vendored
24
.github/workflows/python-tests.yml
vendored
@@ -11,13 +11,26 @@ on:
|
||||
|
||||
jobs:
|
||||
build:
|
||||
|
||||
runs-on: ubuntu-latest
|
||||
|
||||
services:
|
||||
elasticsearch:
|
||||
image: elasticsearch:8.18.2
|
||||
env:
|
||||
discovery.type: single-node
|
||||
cluster.name: parsedmarc-cluster
|
||||
discovery.seed_hosts: elasticsearch
|
||||
bootstrap.memory_lock: true
|
||||
xpack.security.enabled: false
|
||||
xpack.license.self_generated.type: basic
|
||||
ports:
|
||||
- 9200:9200
|
||||
- 9300:9300
|
||||
|
||||
strategy:
|
||||
fail-fast: false
|
||||
matrix:
|
||||
python-version: ["3.8", "3.9", "3.10", "3.11", "3.12"]
|
||||
python-version: ["3.9", "3.10", "3.11", "3.12"]
|
||||
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
@@ -29,13 +42,6 @@ jobs:
|
||||
run: |
|
||||
sudo apt-get update
|
||||
sudo apt-get install -y libemail-outlook-message-perl
|
||||
wget -qO - https://artifacts.elastic.co/GPG-KEY-elasticsearch | sudo gpg --dearmor -o /usr/share/keyrings/elasticsearch-keyring.gpg
|
||||
sudo apt-get install apt-transport-https
|
||||
echo "deb [signed-by=/usr/share/keyrings/elasticsearch-keyring.gpg] https://artifacts.elastic.co/packages/8.x/apt stable main" | sudo tee /etc/apt/sources.list.d/elastic-8.x.list
|
||||
sudo apt-get update && sudo apt-get install elasticsearch
|
||||
sudo sed -i 's/xpack.security.enabled: true/xpack.security.enabled: false/' /etc/elasticsearch/elasticsearch.yml
|
||||
sudo systemctl restart elasticsearch
|
||||
sudo systemctl --no-pager status elasticsearch
|
||||
- name: Install Python dependencies
|
||||
run: |
|
||||
python -m pip install --upgrade pip
|
||||
|
||||
6
.gitignore
vendored
6
.gitignore
vendored
@@ -136,3 +136,9 @@ samples/private
|
||||
|
||||
*.html
|
||||
*.sqlite-journal
|
||||
|
||||
parsedmarc.ini
|
||||
scratch.py
|
||||
|
||||
parsedmarc/resources/maps/base_reverse_dns.csv
|
||||
parsedmarc/resources/maps/unknown_base_reverse_dns.csv
|
||||
|
||||
2
.vscode/settings.json
vendored
2
.vscode/settings.json
vendored
@@ -70,6 +70,7 @@
|
||||
"modindex",
|
||||
"msgconvert",
|
||||
"msgraph",
|
||||
"MSSP",
|
||||
"Munge",
|
||||
"ndjson",
|
||||
"newkey",
|
||||
@@ -101,6 +102,7 @@
|
||||
"sourcetype",
|
||||
"STARTTLS",
|
||||
"tasklist",
|
||||
"timespan",
|
||||
"tlsa",
|
||||
"tlsrpt",
|
||||
"toctree",
|
||||
|
||||
52
CHANGELOG.md
52
CHANGELOG.md
@@ -1,6 +1,58 @@
|
||||
Changelog
|
||||
=========
|
||||
|
||||
8.18.2
|
||||
------
|
||||
|
||||
- Merged PR #603
|
||||
- Fixes issue #595 - CI test fails for Elasticsearch
|
||||
- Moved Elasticsearch to a separate Docker service container for CI testing
|
||||
- Dropped Python 3.8 from CI testing
|
||||
- Fixes lookup and saving of DMARC forensic reports in Elasticsearch and OpenSearch
|
||||
- Updated fallback `base_reverse_dns_map.csv`, which now includes over 1,400 lines
|
||||
- Updated included `dbip-country-lite.mmdb` to the June 2025 release
|
||||
- Automatically fall back to the internal `base_reverse_dns_map.csv` if the received file is not valid (Fixes #602)
|
||||
- Print the received data to the debug log
|
||||
|
||||
8.18.1
|
||||
------
|
||||
|
||||
- Add missing `https://` to the default Microsoft Graph URL
|
||||
|
||||
8.18.0
|
||||
------
|
||||
|
||||
- Add support for Microsoft national clouds via Graph API base URL (PR #590)
|
||||
- Avoid stopping processing when an invalid DMARC report is encountered (PR #587)
|
||||
- Increase `http.client._MAXHEADERS` from `100` to `200` to avoid errors connecting to Elasticsearch/OpenSearch (PR #589)
|
||||
|
||||
8.17.0
|
||||
------
|
||||
|
||||
- Ignore duplicate aggregate DMARC reports with the same `org_name` and `report_id` seen within the same hour (Fixes #535)
|
||||
- Fix saving SMTP TLS reports to OpenSearch (PR #585 closed issue #576)
|
||||
- Add 303 entries to `base_reverse_dns_map.csv`
|
||||
|
||||
8.16.1
|
||||
------
|
||||
|
||||
- Failed attempt to ignore aggregate DMARC reports seen within a period of one hour (#535)
|
||||
|
||||
8.16.0
|
||||
------
|
||||
|
||||
- Add a `since` option to only search for emails since a certain time (PR #527)
|
||||
|
||||
8.15.4
|
||||
------
|
||||
|
||||
- Fix crash if aggregate report timespan is > 24 hours
|
||||
|
||||
8.15.3
|
||||
------
|
||||
|
||||
- Ignore aggregate reports with a timespan of > 24 hours (Fixes #282)
|
||||
|
||||
8.15.2
|
||||
------
|
||||
|
||||
|
||||
@@ -42,6 +42,6 @@ Thanks to all
|
||||
- Consistent data structures
|
||||
- Simple JSON and/or CSV output
|
||||
- Optionally email the results
|
||||
- Optionally send the results to Elasticsearch and/or Splunk, for use
|
||||
- Optionally send the results to Elasticsearch, Opensearch, and/or Splunk, for use
|
||||
with premade dashboards
|
||||
- Optionally send reports to Apache Kafka
|
||||
|
||||
7
build.sh
7
build.sh
@@ -9,12 +9,17 @@ fi
|
||||
. venv/bin/activate
|
||||
pip install .[build]
|
||||
ruff format .
|
||||
ruff check .
|
||||
cd docs
|
||||
make clean
|
||||
make html
|
||||
touch build/html/.nojekyll
|
||||
cp -rf build/html/* ../../parsedmarc-docs/
|
||||
if [ -d "./../parsedmarc-docs" ]; then
|
||||
cp -rf build/html/* ../../parsedmarc-docs/
|
||||
fi
|
||||
cd ..
|
||||
sort -o "parsedmarc/resources/maps/known_unknown_base_reverse_dns.txt" "parsedmarc/resources/maps/known_unknown_base_reverse_dns.txt"
|
||||
./sortmaps.py
|
||||
python3 tests.py
|
||||
rm -rf dist/ build/
|
||||
hatch build
|
||||
@@ -28,3 +28,30 @@ services:
|
||||
interval: 10s
|
||||
timeout: 10s
|
||||
retries: 24
|
||||
|
||||
opensearch:
|
||||
image: opensearchproject/opensearch:2.18.0
|
||||
environment:
|
||||
- network.host=127.0.0.1
|
||||
- http.host=0.0.0.0
|
||||
- node.name=opensearch
|
||||
- discovery.type=single-node
|
||||
- cluster.name=parsedmarc-cluster
|
||||
- discovery.seed_hosts=opensearch
|
||||
- bootstrap.memory_lock=true
|
||||
- OPENSEARCH_INITIAL_ADMIN_PASSWORD=${OPENSEARCH_INITIAL_ADMIN_PASSWORD}
|
||||
ports:
|
||||
- 127.0.0.1:9201:9200
|
||||
ulimits:
|
||||
memlock:
|
||||
soft: -1
|
||||
hard: -1
|
||||
healthcheck:
|
||||
test:
|
||||
[
|
||||
"CMD-SHELL",
|
||||
"curl -s -XGET http://localhost:9201/_cluster/health?pretty | grep status | grep -q '\\(green\\|yellow\\)'"
|
||||
]
|
||||
interval: 10s
|
||||
timeout: 10s
|
||||
retries: 24
|
||||
|
||||
@@ -166,6 +166,9 @@ The full set of configuration options are:
|
||||
- `check_timeout` - int: Number of seconds to wait for a IMAP
|
||||
IDLE response or the number of seconds until the next
|
||||
mail check (Default: `30`)
|
||||
- `since` - str: Search for messages since certain time. (Examples: `5m|3h|2d|1w`)
|
||||
Acceptable units - {"m":"minutes", "h":"hours", "d":"days", "w":"weeks"}).
|
||||
Defaults to `1d` if incorrect value is provided.
|
||||
- `imap`
|
||||
- `host` - str: The IMAP server hostname or IP address
|
||||
- `port` - int: The IMAP server port (Default: `993`)
|
||||
@@ -205,6 +208,8 @@ The full set of configuration options are:
|
||||
- `mailbox` - str: The mailbox name. This defaults to the
|
||||
current user if using the UsernamePassword auth method, but
|
||||
could be a shared mailbox if the user has access to the mailbox
|
||||
- `graph_url` - str: Microsoft Graph URL. Allows for use of National Clouds (ex Azure Gov)
|
||||
(Default: https://graph.microsoft.com)
|
||||
- `token_file` - str: Path to save the token file
|
||||
(Default: `.token`)
|
||||
- `allow_unencrypted_storage` - bool: Allows the Azure Identity
|
||||
|
||||
5901
grafana/Grafana-DMARC_Reports.json-new_panel.json
Normal file
5901
grafana/Grafana-DMARC_Reports.json-new_panel.json
Normal file
File diff suppressed because it is too large
Load Diff
File diff suppressed because one or more lines are too long
@@ -17,7 +17,7 @@ import zlib
|
||||
from base64 import b64decode
|
||||
from collections import OrderedDict
|
||||
from csv import DictWriter
|
||||
from datetime import datetime
|
||||
from datetime import datetime, timedelta
|
||||
from io import BytesIO, StringIO
|
||||
from typing import Callable
|
||||
|
||||
@@ -28,13 +28,18 @@ from lxml import etree
|
||||
from mailsuite.smtp import send_email
|
||||
|
||||
from parsedmarc.log import logger
|
||||
from parsedmarc.mail import MailboxConnection
|
||||
from parsedmarc.mail import (
|
||||
MailboxConnection,
|
||||
IMAPConnection,
|
||||
MSGraphConnection,
|
||||
GmailConnection,
|
||||
)
|
||||
from parsedmarc.utils import get_base_domain, get_ip_address_info
|
||||
from parsedmarc.utils import is_outlook_msg, convert_outlook_msg
|
||||
from parsedmarc.utils import parse_email
|
||||
from parsedmarc.utils import timestamp_to_human, human_timestamp_to_datetime
|
||||
|
||||
__version__ = "8.15.2"
|
||||
__version__ = "8.18.2"
|
||||
|
||||
logger.debug("parsedmarc v{0}".format(__version__))
|
||||
|
||||
@@ -49,6 +54,7 @@ MAGIC_XML = b"\x3c\x3f\x78\x6d\x6c\x20"
|
||||
MAGIC_JSON = b"\7b"
|
||||
|
||||
IP_ADDRESS_CACHE = ExpiringDict(max_len=10000, max_age_seconds=14400)
|
||||
SEEN_AGGREGATE_REPORT_IDS = ExpiringDict(max_len=100000000, max_age_seconds=3600)
|
||||
REVERSE_DNS_MAP = dict()
|
||||
|
||||
|
||||
@@ -266,7 +272,7 @@ def _parse_smtp_tls_failure_details(failure_details):
|
||||
return new_failure_details
|
||||
|
||||
except KeyError as e:
|
||||
raise InvalidSMTPTLSReport(f"Missing required failure details field:" f" {e}")
|
||||
raise InvalidSMTPTLSReport(f"Missing required failure details field: {e}")
|
||||
except Exception as e:
|
||||
raise InvalidSMTPTLSReport(str(e))
|
||||
|
||||
@@ -278,7 +284,7 @@ def _parse_smtp_tls_report_policy(policy):
|
||||
policy_type = policy["policy"]["policy-type"]
|
||||
failure_details = []
|
||||
if policy_type not in policy_types:
|
||||
raise InvalidSMTPTLSReport(f"Invalid policy type " f"{policy_type}")
|
||||
raise InvalidSMTPTLSReport(f"Invalid policy type {policy_type}")
|
||||
new_policy = OrderedDict(policy_domain=policy_domain, policy_type=policy_type)
|
||||
if "policy-string" in policy["policy"]:
|
||||
if isinstance(policy["policy"]["policy-string"], list):
|
||||
@@ -326,9 +332,7 @@ def parse_smtp_tls_report_json(report):
|
||||
raise Exception(f"Missing required field: {required_field}]")
|
||||
if not isinstance(report["policies"], list):
|
||||
policies_type = type(report["policies"])
|
||||
raise InvalidSMTPTLSReport(
|
||||
f"policies must be a list, " f"not {policies_type}"
|
||||
)
|
||||
raise InvalidSMTPTLSReport(f"policies must be a list, not {policies_type}")
|
||||
for policy in report["policies"]:
|
||||
policies.append(_parse_smtp_tls_report_policy(policy))
|
||||
|
||||
@@ -519,7 +523,7 @@ def parse_aggregate_report_xml(
|
||||
date_range = report["report_metadata"]["date_range"]
|
||||
if int(date_range["end"]) - int(date_range["begin"]) > 2 * 86400:
|
||||
_error = "Time span > 24 hours - RFC 7489 section 7.2"
|
||||
errors.append(_error)
|
||||
raise InvalidAggregateReport(_error)
|
||||
date_range["begin"] = timestamp_to_human(date_range["begin"])
|
||||
date_range["end"] = timestamp_to_human(date_range["end"])
|
||||
new_report_metadata["begin_date"] = date_range["begin"]
|
||||
@@ -1240,11 +1244,11 @@ def parse_report_email(
|
||||
field_name = match[0].lower().replace(" ", "-")
|
||||
fields[field_name] = match[1].strip()
|
||||
|
||||
feedback_report = "Arrival-Date: {}\n" "Source-IP: {}" "".format(
|
||||
feedback_report = "Arrival-Date: {}\nSource-IP: {}".format(
|
||||
fields["received-date"], fields["sender-ip-address"]
|
||||
)
|
||||
except Exception as e:
|
||||
error = "Unable to parse message with " 'subject "{0}": {1}'.format(
|
||||
error = 'Unable to parse message with subject "{0}": {1}'.format(
|
||||
subject, e
|
||||
)
|
||||
raise InvalidDMARCReport(error)
|
||||
@@ -1288,10 +1292,10 @@ def parse_report_email(
|
||||
"is not a valid "
|
||||
"aggregate DMARC report: {1}".format(subject, e)
|
||||
)
|
||||
raise ParserError(error)
|
||||
raise InvalidDMARCReport(error)
|
||||
|
||||
except Exception as e:
|
||||
error = "Unable to parse message with " 'subject "{0}": {1}'.format(
|
||||
error = 'Unable to parse message with subject "{0}": {1}'.format(
|
||||
subject, e
|
||||
)
|
||||
raise ParserError(error)
|
||||
@@ -1325,7 +1329,7 @@ def parse_report_email(
|
||||
return result
|
||||
|
||||
if result is None:
|
||||
error = 'Message with subject "{0}" is ' "not a valid report".format(subject)
|
||||
error = 'Message with subject "{0}" is not a valid report'.format(subject)
|
||||
raise InvalidDMARCReport(error)
|
||||
|
||||
|
||||
@@ -1465,7 +1469,17 @@ def get_dmarc_reports_from_mbox(
|
||||
strip_attachment_payloads=sa,
|
||||
)
|
||||
if parsed_email["report_type"] == "aggregate":
|
||||
aggregate_reports.append(parsed_email["report"])
|
||||
report_org = parsed_email["report"]["report_metadata"]["org_name"]
|
||||
report_id = parsed_email["report"]["report_metadata"]["report_id"]
|
||||
report_key = f"{report_org}_{report_id}"
|
||||
if report_key not in SEEN_AGGREGATE_REPORT_IDS:
|
||||
SEEN_AGGREGATE_REPORT_IDS[report_key] = True
|
||||
aggregate_reports.append(parsed_email["report"])
|
||||
else:
|
||||
logger.debug(
|
||||
"Skipping duplicate aggregate report "
|
||||
f"from {report_org} with ID: {report_id}"
|
||||
)
|
||||
elif parsed_email["report_type"] == "forensic":
|
||||
forensic_reports.append(parsed_email["report"])
|
||||
elif parsed_email["report_type"] == "smtp_tls":
|
||||
@@ -1499,6 +1513,7 @@ def get_dmarc_reports_from_mailbox(
|
||||
strip_attachment_payloads=False,
|
||||
results=None,
|
||||
batch_size=10,
|
||||
since=None,
|
||||
create_folders=True,
|
||||
):
|
||||
"""
|
||||
@@ -1522,6 +1537,8 @@ def get_dmarc_reports_from_mailbox(
|
||||
results (dict): Results from the previous run
|
||||
batch_size (int): Number of messages to read and process before saving
|
||||
(use 0 for no limit)
|
||||
since: Search for messages since certain time
|
||||
(units - {"m":"minutes", "h":"hours", "d":"days", "w":"weeks"})
|
||||
create_folders (bool): Whether to create the destination folders
|
||||
(not used in watch)
|
||||
|
||||
@@ -1534,6 +1551,9 @@ def get_dmarc_reports_from_mailbox(
|
||||
if connection is None:
|
||||
raise ValueError("Must supply a connection")
|
||||
|
||||
# current_time useful to fetch_messages later in the program
|
||||
current_time = None
|
||||
|
||||
aggregate_reports = []
|
||||
forensic_reports = []
|
||||
smtp_tls_reports = []
|
||||
@@ -1557,11 +1577,50 @@ def get_dmarc_reports_from_mailbox(
|
||||
connection.create_folder(smtp_tls_reports_folder)
|
||||
connection.create_folder(invalid_reports_folder)
|
||||
|
||||
messages = connection.fetch_messages(reports_folder, batch_size=batch_size)
|
||||
if since:
|
||||
_since = 1440 # default one day
|
||||
if re.match(r"\d+[mhd]$", since):
|
||||
s = re.split(r"(\d+)", since)
|
||||
if s[2] == "m":
|
||||
_since = int(s[1])
|
||||
elif s[2] == "h":
|
||||
_since = int(s[1]) * 60
|
||||
elif s[2] == "d":
|
||||
_since = int(s[1]) * 60 * 24
|
||||
elif s[2] == "w":
|
||||
_since = int(s[1]) * 60 * 24 * 7
|
||||
else:
|
||||
logger.warning(
|
||||
"Incorrect format for 'since' option. \
|
||||
Provided value:{0}, Expected values:(5m|3h|2d|1w). \
|
||||
Ignoring option, fetching messages for last 24hrs"
|
||||
"SMTP does not support a time or timezone in since."
|
||||
"See https://www.rfc-editor.org/rfc/rfc3501#page-52".format(since)
|
||||
)
|
||||
|
||||
if isinstance(connection, IMAPConnection):
|
||||
logger.debug(
|
||||
"Only days and weeks values in 'since' option are \
|
||||
considered for IMAP conections. Examples: 2d or 1w"
|
||||
)
|
||||
since = (datetime.utcnow() - timedelta(minutes=_since)).date()
|
||||
current_time = datetime.utcnow().date()
|
||||
elif isinstance(connection, MSGraphConnection):
|
||||
since = (datetime.utcnow() - timedelta(minutes=_since)).isoformat() + "Z"
|
||||
current_time = datetime.utcnow().isoformat() + "Z"
|
||||
elif isinstance(connection, GmailConnection):
|
||||
since = (datetime.utcnow() - timedelta(minutes=_since)).strftime("%s")
|
||||
current_time = datetime.utcnow().strftime("%s")
|
||||
else:
|
||||
pass
|
||||
|
||||
messages = connection.fetch_messages(
|
||||
reports_folder, batch_size=batch_size, since=since
|
||||
)
|
||||
total_messages = len(messages)
|
||||
logger.debug("Found {0} messages in {1}".format(len(messages), reports_folder))
|
||||
|
||||
if batch_size:
|
||||
if batch_size and not since:
|
||||
message_limit = min(total_messages, batch_size)
|
||||
else:
|
||||
message_limit = total_messages
|
||||
@@ -1575,7 +1634,13 @@ def get_dmarc_reports_from_mailbox(
|
||||
i + 1, message_limit, msg_uid
|
||||
)
|
||||
)
|
||||
msg_content = connection.fetch_message(msg_uid)
|
||||
if isinstance(mailbox, MSGraphConnection):
|
||||
if test:
|
||||
msg_content = connection.fetch_message(msg_uid, mark_read=False)
|
||||
else:
|
||||
msg_content = connection.fetch_message(msg_uid, mark_read=True)
|
||||
else:
|
||||
msg_content = connection.fetch_message(msg_uid)
|
||||
try:
|
||||
sa = strip_attachment_payloads
|
||||
parsed_email = parse_report_email(
|
||||
@@ -1591,7 +1656,16 @@ def get_dmarc_reports_from_mailbox(
|
||||
keep_alive=connection.keepalive,
|
||||
)
|
||||
if parsed_email["report_type"] == "aggregate":
|
||||
aggregate_reports.append(parsed_email["report"])
|
||||
report_org = parsed_email["report"]["report_metadata"]["org_name"]
|
||||
report_id = parsed_email["report"]["report_metadata"]["report_id"]
|
||||
report_key = f"{report_org}_{report_id}"
|
||||
if report_key not in SEEN_AGGREGATE_REPORT_IDS:
|
||||
SEEN_AGGREGATE_REPORT_IDS[report_key] = True
|
||||
aggregate_reports.append(parsed_email["report"])
|
||||
else:
|
||||
logger.debug(
|
||||
f"Skipping duplicate aggregate report with ID: {report_id}"
|
||||
)
|
||||
aggregate_report_msg_uids.append(msg_uid)
|
||||
elif parsed_email["report_type"] == "forensic":
|
||||
forensic_reports.append(parsed_email["report"])
|
||||
@@ -1632,7 +1706,7 @@ def get_dmarc_reports_from_mailbox(
|
||||
|
||||
except Exception as e:
|
||||
message = "Error deleting message UID"
|
||||
e = "{0} {1}: " "{2}".format(message, msg_uid, e)
|
||||
e = "{0} {1}: {2}".format(message, msg_uid, e)
|
||||
logger.error("Mailbox error: {0}".format(e))
|
||||
else:
|
||||
if len(aggregate_report_msg_uids) > 0:
|
||||
@@ -1706,7 +1780,12 @@ def get_dmarc_reports_from_mailbox(
|
||||
]
|
||||
)
|
||||
|
||||
total_messages = len(connection.fetch_messages(reports_folder))
|
||||
if current_time:
|
||||
total_messages = len(
|
||||
connection.fetch_messages(reports_folder, since=current_time)
|
||||
)
|
||||
else:
|
||||
total_messages = len(connection.fetch_messages(reports_folder))
|
||||
|
||||
if not test and not batch_size and total_messages > 0:
|
||||
# Process emails that came in during the last run
|
||||
@@ -1725,6 +1804,7 @@ def get_dmarc_reports_from_mailbox(
|
||||
reverse_dns_map_path=reverse_dns_map_path,
|
||||
reverse_dns_map_url=reverse_dns_map_url,
|
||||
offline=offline,
|
||||
since=current_time,
|
||||
)
|
||||
|
||||
return results
|
||||
|
||||
@@ -14,6 +14,7 @@ import json
|
||||
from ssl import CERT_NONE, create_default_context
|
||||
from multiprocessing import Pipe, Process
|
||||
import sys
|
||||
import http.client
|
||||
from tqdm import tqdm
|
||||
|
||||
from parsedmarc import (
|
||||
@@ -46,6 +47,9 @@ from parsedmarc.mail.graph import AuthMethod
|
||||
|
||||
from parsedmarc.log import logger
|
||||
from parsedmarc.utils import is_mbox, get_reverse_dns
|
||||
from parsedmarc import SEEN_AGGREGATE_REPORT_IDS
|
||||
|
||||
http.client._MAXHEADERS = 200 # pylint:disable=protected-access
|
||||
|
||||
formatter = logging.Formatter(
|
||||
fmt="%(levelname)8s:%(filename)s:%(lineno)d:%(message)s",
|
||||
@@ -395,7 +399,7 @@ def _main():
|
||||
arg_parser.add_argument(
|
||||
"-c",
|
||||
"--config-file",
|
||||
help="a path to a configuration file " "(--silent implied)",
|
||||
help="a path to a configuration file (--silent implied)",
|
||||
)
|
||||
arg_parser.add_argument(
|
||||
"file_path",
|
||||
@@ -403,7 +407,7 @@ def _main():
|
||||
help="one or more paths to aggregate or forensic "
|
||||
"report files, emails, or mbox files'",
|
||||
)
|
||||
strip_attachment_help = "remove attachment payloads from forensic " "report output"
|
||||
strip_attachment_help = "remove attachment payloads from forensic report output"
|
||||
arg_parser.add_argument(
|
||||
"--strip-attachment-payloads", help=strip_attachment_help, action="store_true"
|
||||
)
|
||||
@@ -446,14 +450,14 @@ def _main():
|
||||
arg_parser.add_argument(
|
||||
"-t",
|
||||
"--dns_timeout",
|
||||
help="number of seconds to wait for an answer " "from DNS (default: 2.0)",
|
||||
help="number of seconds to wait for an answer from DNS (default: 2.0)",
|
||||
type=float,
|
||||
default=2.0,
|
||||
)
|
||||
arg_parser.add_argument(
|
||||
"--offline",
|
||||
action="store_true",
|
||||
help="do not make online queries for geolocation " " or DNS",
|
||||
help="do not make online queries for geolocation or DNS",
|
||||
)
|
||||
arg_parser.add_argument(
|
||||
"-s", "--silent", action="store_true", help="only print errors"
|
||||
@@ -510,6 +514,7 @@ def _main():
|
||||
mailbox_test=False,
|
||||
mailbox_batch_size=10,
|
||||
mailbox_check_timeout=30,
|
||||
mailbox_since=None,
|
||||
imap_host=None,
|
||||
imap_skip_certificate_verification=False,
|
||||
imap_ssl=True,
|
||||
@@ -526,6 +531,7 @@ def _main():
|
||||
graph_tenant_id=None,
|
||||
graph_mailbox=None,
|
||||
graph_allow_unencrypted_storage=False,
|
||||
graph_url="https://graph.microsoft.com",
|
||||
hec=None,
|
||||
hec_token=None,
|
||||
hec_index=None,
|
||||
@@ -714,6 +720,8 @@ def _main():
|
||||
opts.mailbox_batch_size = mailbox_config.getint("batch_size")
|
||||
if "check_timeout" in mailbox_config:
|
||||
opts.mailbox_check_timeout = mailbox_config.getint("check_timeout")
|
||||
if "since" in mailbox_config:
|
||||
opts.mailbox_since = mailbox_config["since"]
|
||||
|
||||
if "imap" in config.sections():
|
||||
imap_config = config["imap"]
|
||||
@@ -726,7 +734,7 @@ def _main():
|
||||
if "host" in imap_config:
|
||||
opts.imap_host = imap_config["host"]
|
||||
else:
|
||||
logger.error("host setting missing from the " "imap config section")
|
||||
logger.error("host setting missing from the imap config section")
|
||||
exit(-1)
|
||||
if "port" in imap_config:
|
||||
opts.imap_port = imap_config.getint("port")
|
||||
@@ -742,14 +750,12 @@ def _main():
|
||||
if "user" in imap_config:
|
||||
opts.imap_user = imap_config["user"]
|
||||
else:
|
||||
logger.critical("user setting missing from the " "imap config section")
|
||||
logger.critical("user setting missing from the imap config section")
|
||||
exit(-1)
|
||||
if "password" in imap_config:
|
||||
opts.imap_password = imap_config["password"]
|
||||
else:
|
||||
logger.critical(
|
||||
"password setting missing from the " "imap config section"
|
||||
)
|
||||
logger.critical("password setting missing from the imap config section")
|
||||
exit(-1)
|
||||
if "reports_folder" in imap_config:
|
||||
opts.mailbox_reports_folder = imap_config["reports_folder"]
|
||||
@@ -818,21 +824,20 @@ def _main():
|
||||
opts.graph_user = graph_config["user"]
|
||||
else:
|
||||
logger.critical(
|
||||
"user setting missing from the " "msgraph config section"
|
||||
"user setting missing from the msgraph config section"
|
||||
)
|
||||
exit(-1)
|
||||
if "password" in graph_config:
|
||||
opts.graph_password = graph_config["password"]
|
||||
else:
|
||||
logger.critical(
|
||||
"password setting missing from the " "msgraph config section"
|
||||
"password setting missing from the msgraph config section"
|
||||
)
|
||||
if "client_secret" in graph_config:
|
||||
opts.graph_client_secret = graph_config["client_secret"]
|
||||
else:
|
||||
logger.critical(
|
||||
"client_secret setting missing from the "
|
||||
"msgraph config section"
|
||||
"client_secret setting missing from the msgraph config section"
|
||||
)
|
||||
exit(-1)
|
||||
|
||||
@@ -845,7 +850,7 @@ def _main():
|
||||
opts.graph_tenant_id = graph_config["tenant_id"]
|
||||
else:
|
||||
logger.critical(
|
||||
"tenant_id setting missing from the " "msgraph config section"
|
||||
"tenant_id setting missing from the msgraph config section"
|
||||
)
|
||||
exit(-1)
|
||||
|
||||
@@ -854,8 +859,7 @@ def _main():
|
||||
opts.graph_client_secret = graph_config["client_secret"]
|
||||
else:
|
||||
logger.critical(
|
||||
"client_secret setting missing from the "
|
||||
"msgraph config section"
|
||||
"client_secret setting missing from the msgraph config section"
|
||||
)
|
||||
exit(-1)
|
||||
|
||||
@@ -863,7 +867,7 @@ def _main():
|
||||
opts.graph_client_id = graph_config["client_id"]
|
||||
else:
|
||||
logger.critical(
|
||||
"client_id setting missing from the " "msgraph config section"
|
||||
"client_id setting missing from the msgraph config section"
|
||||
)
|
||||
exit(-1)
|
||||
|
||||
@@ -871,10 +875,13 @@ def _main():
|
||||
opts.graph_mailbox = graph_config["mailbox"]
|
||||
elif opts.graph_auth_method != AuthMethod.UsernamePassword.name:
|
||||
logger.critical(
|
||||
"mailbox setting missing from the " "msgraph config section"
|
||||
"mailbox setting missing from the msgraph config section"
|
||||
)
|
||||
exit(-1)
|
||||
|
||||
if "graph_url" in graph_config:
|
||||
opts.graph_url = graph_config["graph_url"]
|
||||
|
||||
if "allow_unencrypted_storage" in graph_config:
|
||||
opts.graph_allow_unencrypted_storage = graph_config.getboolean(
|
||||
"allow_unencrypted_storage"
|
||||
@@ -886,7 +893,7 @@ def _main():
|
||||
opts.elasticsearch_hosts = _str_to_list(elasticsearch_config["hosts"])
|
||||
else:
|
||||
logger.critical(
|
||||
"hosts setting missing from the " "elasticsearch config section"
|
||||
"hosts setting missing from the elasticsearch config section"
|
||||
)
|
||||
exit(-1)
|
||||
if "timeout" in elasticsearch_config:
|
||||
@@ -924,7 +931,7 @@ def _main():
|
||||
opts.opensearch_hosts = _str_to_list(opensearch_config["hosts"])
|
||||
else:
|
||||
logger.critical(
|
||||
"hosts setting missing from the " "opensearch config section"
|
||||
"hosts setting missing from the opensearch config section"
|
||||
)
|
||||
exit(-1)
|
||||
if "timeout" in opensearch_config:
|
||||
@@ -960,21 +967,21 @@ def _main():
|
||||
opts.hec = hec_config["url"]
|
||||
else:
|
||||
logger.critical(
|
||||
"url setting missing from the " "splunk_hec config section"
|
||||
"url setting missing from the splunk_hec config section"
|
||||
)
|
||||
exit(-1)
|
||||
if "token" in hec_config:
|
||||
opts.hec_token = hec_config["token"]
|
||||
else:
|
||||
logger.critical(
|
||||
"token setting missing from the " "splunk_hec config section"
|
||||
"token setting missing from the splunk_hec config section"
|
||||
)
|
||||
exit(-1)
|
||||
if "index" in hec_config:
|
||||
opts.hec_index = hec_config["index"]
|
||||
else:
|
||||
logger.critical(
|
||||
"index setting missing from the " "splunk_hec config section"
|
||||
"index setting missing from the splunk_hec config section"
|
||||
)
|
||||
exit(-1)
|
||||
if "skip_certificate_verification" in hec_config:
|
||||
@@ -987,9 +994,7 @@ def _main():
|
||||
if "hosts" in kafka_config:
|
||||
opts.kafka_hosts = _str_to_list(kafka_config["hosts"])
|
||||
else:
|
||||
logger.critical(
|
||||
"hosts setting missing from the " "kafka config section"
|
||||
)
|
||||
logger.critical("hosts setting missing from the kafka config section")
|
||||
exit(-1)
|
||||
if "user" in kafka_config:
|
||||
opts.kafka_username = kafka_config["user"]
|
||||
@@ -1004,21 +1009,20 @@ def _main():
|
||||
opts.kafka_aggregate_topic = kafka_config["aggregate_topic"]
|
||||
else:
|
||||
logger.critical(
|
||||
"aggregate_topic setting missing from the " "kafka config section"
|
||||
"aggregate_topic setting missing from the kafka config section"
|
||||
)
|
||||
exit(-1)
|
||||
if "forensic_topic" in kafka_config:
|
||||
opts.kafka_forensic_topic = kafka_config["forensic_topic"]
|
||||
else:
|
||||
logger.critical(
|
||||
"forensic_topic setting missing from the " "kafka config section"
|
||||
"forensic_topic setting missing from the kafka config section"
|
||||
)
|
||||
if "smtp_tls_topic" in kafka_config:
|
||||
opts.kafka_smtp_tls_topic = kafka_config["smtp_tls_topic"]
|
||||
else:
|
||||
logger.critical(
|
||||
"forensic_topic setting missing from the "
|
||||
"splunk_hec config section"
|
||||
"forensic_topic setting missing from the splunk_hec config section"
|
||||
)
|
||||
|
||||
if "smtp" in config.sections():
|
||||
@@ -1026,7 +1030,7 @@ def _main():
|
||||
if "host" in smtp_config:
|
||||
opts.smtp_host = smtp_config["host"]
|
||||
else:
|
||||
logger.critical("host setting missing from the " "smtp config section")
|
||||
logger.critical("host setting missing from the smtp config section")
|
||||
exit(-1)
|
||||
if "port" in smtp_config:
|
||||
opts.smtp_port = smtp_config.getint("port")
|
||||
@@ -1038,23 +1042,21 @@ def _main():
|
||||
if "user" in smtp_config:
|
||||
opts.smtp_user = smtp_config["user"]
|
||||
else:
|
||||
logger.critical("user setting missing from the " "smtp config section")
|
||||
logger.critical("user setting missing from the smtp config section")
|
||||
exit(-1)
|
||||
if "password" in smtp_config:
|
||||
opts.smtp_password = smtp_config["password"]
|
||||
else:
|
||||
logger.critical(
|
||||
"password setting missing from the " "smtp config section"
|
||||
)
|
||||
logger.critical("password setting missing from the smtp config section")
|
||||
exit(-1)
|
||||
if "from" in smtp_config:
|
||||
opts.smtp_from = smtp_config["from"]
|
||||
else:
|
||||
logger.critical("from setting missing from the " "smtp config section")
|
||||
logger.critical("from setting missing from the smtp config section")
|
||||
if "to" in smtp_config:
|
||||
opts.smtp_to = _str_to_list(smtp_config["to"])
|
||||
else:
|
||||
logger.critical("to setting missing from the " "smtp config section")
|
||||
logger.critical("to setting missing from the smtp config section")
|
||||
if "subject" in smtp_config:
|
||||
opts.smtp_subject = smtp_config["subject"]
|
||||
if "attachment" in smtp_config:
|
||||
@@ -1067,7 +1069,7 @@ def _main():
|
||||
if "bucket" in s3_config:
|
||||
opts.s3_bucket = s3_config["bucket"]
|
||||
else:
|
||||
logger.critical("bucket setting missing from the " "s3 config section")
|
||||
logger.critical("bucket setting missing from the s3 config section")
|
||||
exit(-1)
|
||||
if "path" in s3_config:
|
||||
opts.s3_path = s3_config["path"]
|
||||
@@ -1092,9 +1094,7 @@ def _main():
|
||||
if "server" in syslog_config:
|
||||
opts.syslog_server = syslog_config["server"]
|
||||
else:
|
||||
logger.critical(
|
||||
"server setting missing from the " "syslog config section"
|
||||
)
|
||||
logger.critical("server setting missing from the syslog config section")
|
||||
exit(-1)
|
||||
if "port" in syslog_config:
|
||||
opts.syslog_port = syslog_config["port"]
|
||||
@@ -1145,17 +1145,17 @@ def _main():
|
||||
if "host" in gelf_config:
|
||||
opts.gelf_host = gelf_config["host"]
|
||||
else:
|
||||
logger.critical("host setting missing from the " "gelf config section")
|
||||
logger.critical("host setting missing from the gelf config section")
|
||||
exit(-1)
|
||||
if "port" in gelf_config:
|
||||
opts.gelf_port = gelf_config["port"]
|
||||
else:
|
||||
logger.critical("port setting missing from the " "gelf config section")
|
||||
logger.critical("port setting missing from the gelf config section")
|
||||
exit(-1)
|
||||
if "mode" in gelf_config:
|
||||
opts.gelf_mode = gelf_config["mode"]
|
||||
else:
|
||||
logger.critical("mode setting missing from the " "gelf config section")
|
||||
logger.critical("mode setting missing from the gelf config section")
|
||||
exit(-1)
|
||||
|
||||
if "webhook" in config.sections():
|
||||
@@ -1181,8 +1181,7 @@ def _main():
|
||||
try:
|
||||
fh = logging.FileHandler(opts.log_file, "a")
|
||||
formatter = logging.Formatter(
|
||||
"%(asctime)s - "
|
||||
"%(levelname)s - [%(filename)s:%(lineno)d] - %(message)s"
|
||||
"%(asctime)s - %(levelname)s - [%(filename)s:%(lineno)d] - %(message)s"
|
||||
)
|
||||
fh.setFormatter(formatter)
|
||||
logger.addHandler(fh)
|
||||
@@ -1290,7 +1289,7 @@ def _main():
|
||||
|
||||
if opts.hec:
|
||||
if opts.hec_token is None or opts.hec_index is None:
|
||||
logger.error("HEC token and HEC index are required when " "using HEC URL")
|
||||
logger.error("HEC token and HEC index are required when using HEC URL")
|
||||
exit(1)
|
||||
|
||||
verify = True
|
||||
@@ -1415,7 +1414,17 @@ def _main():
|
||||
logger.error("Failed to parse {0} - {1}".format(result[1], result[0]))
|
||||
else:
|
||||
if result[0]["report_type"] == "aggregate":
|
||||
aggregate_reports.append(result[0]["report"])
|
||||
report_org = result[0]["report"]["report_metadata"]["org_name"]
|
||||
report_id = result[0]["report"]["report_metadata"]["report_id"]
|
||||
report_key = f"{report_org}_{report_id}"
|
||||
if report_key not in SEEN_AGGREGATE_REPORT_IDS:
|
||||
SEEN_AGGREGATE_REPORT_IDS[report_key] = True
|
||||
aggregate_reports.append(result[0]["report"])
|
||||
else:
|
||||
logger.debug(
|
||||
"Skipping duplicate aggregate report "
|
||||
f"from {report_org} with ID: {report_id}"
|
||||
)
|
||||
elif result[0]["report_type"] == "forensic":
|
||||
forensic_reports.append(result[0]["report"])
|
||||
elif result[0]["report_type"] == "smtp_tls":
|
||||
@@ -1443,7 +1452,7 @@ def _main():
|
||||
try:
|
||||
if opts.imap_user is None or opts.imap_password is None:
|
||||
logger.error(
|
||||
"IMAP user and password must be specified if" "host is specified"
|
||||
"IMAP user and password must be specified ifhost is specified"
|
||||
)
|
||||
|
||||
ssl = True
|
||||
@@ -1482,6 +1491,7 @@ def _main():
|
||||
password=opts.graph_password,
|
||||
token_file=opts.graph_token_file,
|
||||
allow_unencrypted_storage=opts.graph_allow_unencrypted_storage,
|
||||
graph_url=opts.graph_url,
|
||||
)
|
||||
|
||||
except Exception:
|
||||
@@ -1540,6 +1550,7 @@ def _main():
|
||||
nameservers=opts.nameservers,
|
||||
test=opts.mailbox_test,
|
||||
strip_attachment_payloads=opts.strip_attachment_payloads,
|
||||
since=opts.mailbox_since,
|
||||
)
|
||||
|
||||
aggregate_reports += reports["aggregate_reports"]
|
||||
|
||||
@@ -552,8 +552,8 @@ def save_forensic_report_to_elasticsearch(
|
||||
for original_header in original_headers:
|
||||
headers[original_header.lower()] = original_headers[original_header]
|
||||
|
||||
arrival_date_human = forensic_report["arrival_date_utc"]
|
||||
arrival_date = human_timestamp_to_datetime(arrival_date_human)
|
||||
arrival_date = human_timestamp_to_datetime(forensic_report["arrival_date_utc"])
|
||||
arrival_date_epoch_milliseconds = int(arrival_date.timestamp() * 1000)
|
||||
|
||||
if index_suffix is not None:
|
||||
search_index = "dmarc_forensic_{0}*".format(index_suffix)
|
||||
@@ -562,20 +562,35 @@ def save_forensic_report_to_elasticsearch(
|
||||
if index_prefix is not None:
|
||||
search_index = "{0}{1}".format(index_prefix, search_index)
|
||||
search = Search(index=search_index)
|
||||
arrival_query = {"match": {"arrival_date": arrival_date}}
|
||||
q = Q(arrival_query)
|
||||
q = Q(dict(match=dict(arrival_date=arrival_date_epoch_milliseconds)))
|
||||
|
||||
from_ = None
|
||||
to_ = None
|
||||
subject = None
|
||||
if "from" in headers:
|
||||
from_ = headers["from"]
|
||||
from_query = {"match_phrase": {"sample.headers.from": from_}}
|
||||
q = q & Q(from_query)
|
||||
# We convert the FROM header from a string list to a flat string.
|
||||
headers["from"] = headers["from"][0]
|
||||
if headers["from"][0] == "":
|
||||
headers["from"] = headers["from"][1]
|
||||
else:
|
||||
headers["from"] = " <".join(headers["from"]) + ">"
|
||||
|
||||
from_ = dict()
|
||||
from_["sample.headers.from"] = headers["from"]
|
||||
from_query = Q(dict(match_phrase=from_))
|
||||
q = q & from_query
|
||||
if "to" in headers:
|
||||
to_ = headers["to"]
|
||||
to_query = {"match_phrase": {"sample.headers.to": to_}}
|
||||
q = q & Q(to_query)
|
||||
# We convert the TO header from a string list to a flat string.
|
||||
headers["to"] = headers["to"][0]
|
||||
if headers["to"][0] == "":
|
||||
headers["to"] = headers["to"][1]
|
||||
else:
|
||||
headers["to"] = " <".join(headers["to"]) + ">"
|
||||
|
||||
to_ = dict()
|
||||
to_["sample.headers.to"] = headers["to"]
|
||||
to_query = Q(dict(match_phrase=to_))
|
||||
q = q & to_query
|
||||
if "subject" in headers:
|
||||
subject = headers["subject"]
|
||||
subject_query = {"match_phrase": {"sample.headers.subject": subject}}
|
||||
@@ -589,7 +604,9 @@ def save_forensic_report_to_elasticsearch(
|
||||
"A forensic sample to {0} from {1} "
|
||||
"with a subject of {2} and arrival date of {3} "
|
||||
"already exists in "
|
||||
"Elasticsearch".format(to_, from_, subject, arrival_date_human)
|
||||
"Elasticsearch".format(
|
||||
to_, from_, subject, forensic_report["arrival_date_utc"]
|
||||
)
|
||||
)
|
||||
|
||||
parsed_sample = forensic_report["parsed_sample"]
|
||||
@@ -625,7 +642,7 @@ def save_forensic_report_to_elasticsearch(
|
||||
user_agent=forensic_report["user_agent"],
|
||||
version=forensic_report["version"],
|
||||
original_mail_from=forensic_report["original_mail_from"],
|
||||
arrival_date=arrival_date,
|
||||
arrival_date=arrival_date_epoch_milliseconds,
|
||||
domain=forensic_report["reported_domain"],
|
||||
original_envelope_id=forensic_report["original_envelope_id"],
|
||||
authentication_results=forensic_report["authentication_results"],
|
||||
|
||||
@@ -63,24 +63,36 @@ class GmailConnection(MailboxConnection):
|
||||
).execute()
|
||||
except HttpError as e:
|
||||
if e.status_code == 409:
|
||||
logger.debug(
|
||||
f"Folder {folder_name} already exists, " f"skipping creation"
|
||||
)
|
||||
logger.debug(f"Folder {folder_name} already exists, skipping creation")
|
||||
else:
|
||||
raise e
|
||||
|
||||
def _fetch_all_message_ids(self, reports_label_id, page_token=None):
|
||||
results = (
|
||||
self.service.users()
|
||||
.messages()
|
||||
.list(
|
||||
userId="me",
|
||||
includeSpamTrash=self.include_spam_trash,
|
||||
labelIds=[reports_label_id],
|
||||
pageToken=page_token,
|
||||
def _fetch_all_message_ids(self, reports_label_id, page_token=None, since=None):
|
||||
if since:
|
||||
results = (
|
||||
self.service.users()
|
||||
.messages()
|
||||
.list(
|
||||
userId="me",
|
||||
includeSpamTrash=self.include_spam_trash,
|
||||
labelIds=[reports_label_id],
|
||||
pageToken=page_token,
|
||||
q=f"after:{since}",
|
||||
)
|
||||
.execute()
|
||||
)
|
||||
else:
|
||||
results = (
|
||||
self.service.users()
|
||||
.messages()
|
||||
.list(
|
||||
userId="me",
|
||||
includeSpamTrash=self.include_spam_trash,
|
||||
labelIds=[reports_label_id],
|
||||
pageToken=page_token,
|
||||
)
|
||||
.execute()
|
||||
)
|
||||
.execute()
|
||||
)
|
||||
messages = results.get("messages", [])
|
||||
for message in messages:
|
||||
yield message["id"]
|
||||
@@ -92,7 +104,13 @@ class GmailConnection(MailboxConnection):
|
||||
|
||||
def fetch_messages(self, reports_folder: str, **kwargs) -> List[str]:
|
||||
reports_label_id = self._find_label_id_for_label(reports_folder)
|
||||
return [id for id in self._fetch_all_message_ids(reports_label_id)]
|
||||
since = kwargs.get("since")
|
||||
if since:
|
||||
return [
|
||||
id for id in self._fetch_all_message_ids(reports_label_id, since=since)
|
||||
]
|
||||
else:
|
||||
return [id for id in self._fetch_all_message_ids(reports_label_id)]
|
||||
|
||||
def fetch_message(self, message_id):
|
||||
msg = (
|
||||
|
||||
@@ -89,6 +89,7 @@ class MSGraphConnection(MailboxConnection):
|
||||
self,
|
||||
auth_method: str,
|
||||
mailbox: str,
|
||||
graph_url: str,
|
||||
client_id: str,
|
||||
client_secret: str,
|
||||
username: str,
|
||||
@@ -108,7 +109,10 @@ class MSGraphConnection(MailboxConnection):
|
||||
token_path=token_path,
|
||||
allow_unencrypted_storage=allow_unencrypted_storage,
|
||||
)
|
||||
client_params = {"credential": credential}
|
||||
client_params = {
|
||||
"credential": credential,
|
||||
"cloud": graph_url,
|
||||
}
|
||||
if not isinstance(credential, ClientSecretCredential):
|
||||
scopes = ["Mail.ReadWrite"]
|
||||
# Detect if mailbox is shared
|
||||
@@ -137,25 +141,30 @@ class MSGraphConnection(MailboxConnection):
|
||||
request_url = f"/users/{self.mailbox_name}/mailFolders{sub_url}"
|
||||
resp = self._client.post(request_url, json=request_body)
|
||||
if resp.status_code == 409:
|
||||
logger.debug(f"Folder {folder_name} already exists, " f"skipping creation")
|
||||
logger.debug(f"Folder {folder_name} already exists, skipping creation")
|
||||
elif resp.status_code == 201:
|
||||
logger.debug(f"Created folder {folder_name}")
|
||||
else:
|
||||
logger.warning(f"Unknown response " f"{resp.status_code} {resp.json()}")
|
||||
logger.warning(f"Unknown response {resp.status_code} {resp.json()}")
|
||||
|
||||
def fetch_messages(self, folder_name: str, **kwargs) -> List[str]:
|
||||
"""Returns a list of message UIDs in the specified folder"""
|
||||
folder_id = self._find_folder_id_from_folder_path(folder_name)
|
||||
url = f"/users/{self.mailbox_name}/mailFolders/" f"{folder_id}/messages"
|
||||
url = f"/users/{self.mailbox_name}/mailFolders/{folder_id}/messages"
|
||||
since = kwargs.get("since")
|
||||
if not since:
|
||||
since = None
|
||||
batch_size = kwargs.get("batch_size")
|
||||
if not batch_size:
|
||||
batch_size = 0
|
||||
emails = self._get_all_messages(url, batch_size)
|
||||
emails = self._get_all_messages(url, batch_size, since)
|
||||
return [email["id"] for email in emails]
|
||||
|
||||
def _get_all_messages(self, url, batch_size):
|
||||
def _get_all_messages(self, url, batch_size, since):
|
||||
messages: list
|
||||
params = {"$select": "id"}
|
||||
if since:
|
||||
params["$filter"] = f"receivedDateTime ge {since}"
|
||||
if batch_size and batch_size > 0:
|
||||
params["$top"] = batch_size
|
||||
else:
|
||||
@@ -166,7 +175,7 @@ class MSGraphConnection(MailboxConnection):
|
||||
messages = result.json()["value"]
|
||||
# Loop if next page is present and not obtained message limit.
|
||||
while "@odata.nextLink" in result.json() and (
|
||||
batch_size == 0 or batch_size - len(messages) > 0
|
||||
since is not None or (batch_size == 0 or batch_size - len(messages) > 0)
|
||||
):
|
||||
result = self._client.get(result.json()["@odata.nextLink"])
|
||||
if result.status_code != 200:
|
||||
@@ -180,17 +189,19 @@ class MSGraphConnection(MailboxConnection):
|
||||
resp = self._client.patch(url, json={"isRead": "true"})
|
||||
if resp.status_code != 200:
|
||||
raise RuntimeWarning(
|
||||
f"Failed to mark message read" f"{resp.status_code}: {resp.json()}"
|
||||
f"Failed to mark message read{resp.status_code}: {resp.json()}"
|
||||
)
|
||||
|
||||
def fetch_message(self, message_id: str):
|
||||
def fetch_message(self, message_id: str, **kwargs):
|
||||
url = f"/users/{self.mailbox_name}/messages/{message_id}/$value"
|
||||
result = self._client.get(url)
|
||||
if result.status_code != 200:
|
||||
raise RuntimeWarning(
|
||||
f"Failed to fetch message" f"{result.status_code}: {result.json()}"
|
||||
f"Failed to fetch message{result.status_code}: {result.json()}"
|
||||
)
|
||||
self.mark_message_read(message_id)
|
||||
mark_read = kwargs.get("mark_read")
|
||||
if mark_read:
|
||||
self.mark_message_read(message_id)
|
||||
return result.text
|
||||
|
||||
def delete_message(self, message_id: str):
|
||||
@@ -198,7 +209,7 @@ class MSGraphConnection(MailboxConnection):
|
||||
resp = self._client.delete(url)
|
||||
if resp.status_code != 204:
|
||||
raise RuntimeWarning(
|
||||
f"Failed to delete message " f"{resp.status_code}: {resp.json()}"
|
||||
f"Failed to delete message {resp.status_code}: {resp.json()}"
|
||||
)
|
||||
|
||||
def move_message(self, message_id: str, folder_name: str):
|
||||
@@ -208,7 +219,7 @@ class MSGraphConnection(MailboxConnection):
|
||||
resp = self._client.post(url, json=request_body)
|
||||
if resp.status_code != 201:
|
||||
raise RuntimeWarning(
|
||||
f"Failed to move message " f"{resp.status_code}: {resp.json()}"
|
||||
f"Failed to move message {resp.status_code}: {resp.json()}"
|
||||
)
|
||||
|
||||
def keepalive(self):
|
||||
@@ -243,7 +254,7 @@ class MSGraphConnection(MailboxConnection):
|
||||
filter = f"?$filter=displayName eq '{folder_name}'"
|
||||
folders_resp = self._client.get(url + filter)
|
||||
if folders_resp.status_code != 200:
|
||||
raise RuntimeWarning(f"Failed to list folders." f"{folders_resp.json()}")
|
||||
raise RuntimeWarning(f"Failed to list folders.{folders_resp.json()}")
|
||||
folders: list = folders_resp.json()["value"]
|
||||
matched_folders = [
|
||||
folder for folder in folders if folder["displayName"] == folder_name
|
||||
|
||||
@@ -39,7 +39,11 @@ class IMAPConnection(MailboxConnection):
|
||||
|
||||
def fetch_messages(self, reports_folder: str, **kwargs):
|
||||
self._client.select_folder(reports_folder)
|
||||
return self._client.search()
|
||||
since = kwargs.get("since")
|
||||
if since:
|
||||
return self._client.search(["SINCE", since])
|
||||
else:
|
||||
return self._client.search()
|
||||
|
||||
def fetch_message(self, message_id):
|
||||
return self._client.fetch_message(message_id, parse=False)
|
||||
@@ -81,7 +85,5 @@ class IMAPConnection(MailboxConnection):
|
||||
logger.warning("IMAP connection timeout. Reconnecting...")
|
||||
sleep(check_timeout)
|
||||
except Exception as e:
|
||||
logger.warning(
|
||||
"IMAP connection error. {0}. " "Reconnecting...".format(e)
|
||||
)
|
||||
logger.warning("IMAP connection error. {0}. Reconnecting...".format(e))
|
||||
sleep(check_timeout)
|
||||
|
||||
@@ -202,13 +202,15 @@ class _SMTPTLSPolicyDoc(InnerDoc):
|
||||
receiving_ip,
|
||||
receiving_mx_helo,
|
||||
failed_session_count,
|
||||
sending_mta_ip=None,
|
||||
receiving_mx_hostname=None,
|
||||
additional_information_uri=None,
|
||||
failure_reason_code=None,
|
||||
):
|
||||
self.failure_details.append(
|
||||
_details = _SMTPTLSFailureDetailsDoc(
|
||||
result_type=result_type,
|
||||
ip_address=ip_address,
|
||||
sending_mta_ip=sending_mta_ip,
|
||||
receiving_mx_hostname=receiving_mx_hostname,
|
||||
receiving_mx_helo=receiving_mx_helo,
|
||||
receiving_ip=receiving_ip,
|
||||
@@ -216,9 +218,10 @@ class _SMTPTLSPolicyDoc(InnerDoc):
|
||||
additional_information=additional_information_uri,
|
||||
failure_reason_code=failure_reason_code,
|
||||
)
|
||||
self.failure_details.append(_details)
|
||||
|
||||
|
||||
class _SMTPTLSFailureReportDoc(Document):
|
||||
class _SMTPTLSReportDoc(Document):
|
||||
class Index:
|
||||
name = "smtp_tls"
|
||||
|
||||
@@ -499,6 +502,7 @@ def save_aggregate_report_to_opensearch(
|
||||
index = "{0}_{1}".format(index, index_suffix)
|
||||
if index_prefix:
|
||||
index = "{0}{1}".format(index_prefix, index)
|
||||
|
||||
index = "{0}-{1}".format(index, index_date)
|
||||
index_settings = dict(
|
||||
number_of_shards=number_of_shards, number_of_replicas=number_of_replicas
|
||||
@@ -548,8 +552,8 @@ def save_forensic_report_to_opensearch(
|
||||
for original_header in original_headers:
|
||||
headers[original_header.lower()] = original_headers[original_header]
|
||||
|
||||
arrival_date_human = forensic_report["arrival_date_utc"]
|
||||
arrival_date = human_timestamp_to_datetime(arrival_date_human)
|
||||
arrival_date = human_timestamp_to_datetime(forensic_report["arrival_date_utc"])
|
||||
arrival_date_epoch_milliseconds = int(arrival_date.timestamp() * 1000)
|
||||
|
||||
if index_suffix is not None:
|
||||
search_index = "dmarc_forensic_{0}*".format(index_suffix)
|
||||
@@ -558,20 +562,35 @@ def save_forensic_report_to_opensearch(
|
||||
if index_prefix is not None:
|
||||
search_index = "{0}{1}".format(index_prefix, search_index)
|
||||
search = Search(index=search_index)
|
||||
arrival_query = {"match": {"arrival_date": arrival_date}}
|
||||
q = Q(arrival_query)
|
||||
q = Q(dict(match=dict(arrival_date=arrival_date_epoch_milliseconds)))
|
||||
|
||||
from_ = None
|
||||
to_ = None
|
||||
subject = None
|
||||
if "from" in headers:
|
||||
from_ = headers["from"]
|
||||
from_query = {"match_phrase": {"sample.headers.from": from_}}
|
||||
q = q & Q(from_query)
|
||||
# We convert the FROM header from a string list to a flat string.
|
||||
headers["from"] = headers["from"][0]
|
||||
if headers["from"][0] == "":
|
||||
headers["from"] = headers["from"][1]
|
||||
else:
|
||||
headers["from"] = " <".join(headers["from"]) + ">"
|
||||
|
||||
from_ = dict()
|
||||
from_["sample.headers.from"] = headers["from"]
|
||||
from_query = Q(dict(match_phrase=from_))
|
||||
q = q & from_query
|
||||
if "to" in headers:
|
||||
to_ = headers["to"]
|
||||
to_query = {"match_phrase": {"sample.headers.to": to_}}
|
||||
q = q & Q(to_query)
|
||||
# We convert the TO header from a string list to a flat string.
|
||||
headers["to"] = headers["to"][0]
|
||||
if headers["to"][0] == "":
|
||||
headers["to"] = headers["to"][1]
|
||||
else:
|
||||
headers["to"] = " <".join(headers["to"]) + ">"
|
||||
|
||||
to_ = dict()
|
||||
to_["sample.headers.to"] = headers["to"]
|
||||
to_query = Q(dict(match_phrase=to_))
|
||||
q = q & to_query
|
||||
if "subject" in headers:
|
||||
subject = headers["subject"]
|
||||
subject_query = {"match_phrase": {"sample.headers.subject": subject}}
|
||||
@@ -585,7 +604,9 @@ def save_forensic_report_to_opensearch(
|
||||
"A forensic sample to {0} from {1} "
|
||||
"with a subject of {2} and arrival date of {3} "
|
||||
"already exists in "
|
||||
"OpenSearch".format(to_, from_, subject, arrival_date_human)
|
||||
"OpenSearch".format(
|
||||
to_, from_, subject, forensic_report["arrival_date_utc"]
|
||||
)
|
||||
)
|
||||
|
||||
parsed_sample = forensic_report["parsed_sample"]
|
||||
@@ -621,7 +642,7 @@ def save_forensic_report_to_opensearch(
|
||||
user_agent=forensic_report["user_agent"],
|
||||
version=forensic_report["version"],
|
||||
original_mail_from=forensic_report["original_mail_from"],
|
||||
arrival_date=arrival_date,
|
||||
arrival_date=arrival_date_epoch_milliseconds,
|
||||
domain=forensic_report["reported_domain"],
|
||||
original_envelope_id=forensic_report["original_envelope_id"],
|
||||
authentication_results=forensic_report["authentication_results"],
|
||||
@@ -685,7 +706,7 @@ def save_smtp_tls_report_to_opensearch(
|
||||
AlreadySaved
|
||||
"""
|
||||
logger.info("Saving aggregate report to OpenSearch")
|
||||
org_name = report["org_name"]
|
||||
org_name = report["organization_name"]
|
||||
report_id = report["report_id"]
|
||||
begin_date = human_timestamp_to_datetime(report["begin_date"], to_utc=True)
|
||||
end_date = human_timestamp_to_datetime(report["end_date"], to_utc=True)
|
||||
@@ -741,11 +762,11 @@ def save_smtp_tls_report_to_opensearch(
|
||||
number_of_shards=number_of_shards, number_of_replicas=number_of_replicas
|
||||
)
|
||||
|
||||
smtp_tls_doc = _SMTPTLSFailureReportDoc(
|
||||
organization_name=report["organization_name"],
|
||||
date_range=[report["date_begin"], report["date_end"]],
|
||||
date_begin=report["date_begin"],
|
||||
date_end=report["date_end"],
|
||||
smtp_tls_doc = _SMTPTLSReportDoc(
|
||||
org_name=report["organization_name"],
|
||||
date_range=[report["begin_date"], report["end_date"]],
|
||||
date_begin=report["begin_date"],
|
||||
date_end=report["end_date"],
|
||||
contact_info=report["contact_info"],
|
||||
report_id=report["report_id"],
|
||||
)
|
||||
@@ -760,32 +781,48 @@ def save_smtp_tls_report_to_opensearch(
|
||||
policy_doc = _SMTPTLSPolicyDoc(
|
||||
policy_domain=policy["policy_domain"],
|
||||
policy_type=policy["policy_type"],
|
||||
succesful_session_count=policy["successful_session_count"],
|
||||
failed_session_count=policy["failed_session_count"],
|
||||
policy_string=policy_strings,
|
||||
mx_host_patterns=mx_host_patterns,
|
||||
)
|
||||
if "failure_details" in policy:
|
||||
failure_details = policy["failure_details"]
|
||||
receiving_mx_hostname = None
|
||||
additional_information_uri = None
|
||||
failure_reason_code = None
|
||||
if "receiving_mx_hostname" in failure_details:
|
||||
receiving_mx_hostname = failure_details["receiving_mx_hostname"]
|
||||
if "additional_information_uri" in failure_details:
|
||||
additional_information_uri = failure_details[
|
||||
"additional_information_uri"
|
||||
]
|
||||
if "failure_reason_code" in failure_details:
|
||||
failure_reason_code = failure_details["failure_reason_code"]
|
||||
policy_doc.add_failure_details(
|
||||
result_type=failure_details["result_type"],
|
||||
ip_address=failure_details["ip_address"],
|
||||
receiving_ip=failure_details["receiving_ip"],
|
||||
receiving_mx_helo=failure_details["receiving_mx_helo"],
|
||||
failed_session_count=failure_details["failed_session_count"],
|
||||
receiving_mx_hostname=receiving_mx_hostname,
|
||||
additional_information_uri=additional_information_uri,
|
||||
failure_reason_code=failure_reason_code,
|
||||
)
|
||||
for failure_detail in policy["failure_details"]:
|
||||
receiving_mx_hostname = None
|
||||
additional_information_uri = None
|
||||
failure_reason_code = None
|
||||
ip_address = None
|
||||
receiving_ip = None
|
||||
receiving_mx_helo = None
|
||||
sending_mta_ip = None
|
||||
|
||||
if "receiving_mx_hostname" in failure_detail:
|
||||
receiving_mx_hostname = failure_detail["receiving_mx_hostname"]
|
||||
if "additional_information_uri" in failure_detail:
|
||||
additional_information_uri = failure_detail[
|
||||
"additional_information_uri"
|
||||
]
|
||||
if "failure_reason_code" in failure_detail:
|
||||
failure_reason_code = failure_detail["failure_reason_code"]
|
||||
if "ip_address" in failure_detail:
|
||||
ip_address = failure_detail["ip_address"]
|
||||
if "receiving_ip" in failure_detail:
|
||||
receiving_ip = failure_detail["receiving_ip"]
|
||||
if "receiving_mx_helo" in failure_detail:
|
||||
receiving_mx_helo = failure_detail["receiving_mx_helo"]
|
||||
if "sending_mta_ip" in failure_detail:
|
||||
sending_mta_ip = failure_detail["sending_mta_ip"]
|
||||
policy_doc.add_failure_details(
|
||||
result_type=failure_detail["result_type"],
|
||||
ip_address=ip_address,
|
||||
receiving_ip=receiving_ip,
|
||||
receiving_mx_helo=receiving_mx_helo,
|
||||
failed_session_count=failure_detail["failed_session_count"],
|
||||
sending_mta_ip=sending_mta_ip,
|
||||
receiving_mx_hostname=receiving_mx_hostname,
|
||||
additional_information_uri=additional_information_uri,
|
||||
failure_reason_code=failure_reason_code,
|
||||
)
|
||||
smtp_tls_doc.policies.append(policy_doc)
|
||||
|
||||
create_indexes([index], index_settings)
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
# About
|
||||
|
||||
`dbip-country-lite.mmdb` is provided by [dbip][dbip] under a
|
||||
[ Creative Commons Attribution 4.0 International License][cc].
|
||||
[Creative Commons Attribution 4.0 International License][cc].
|
||||
|
||||
[dbip]: https://db-ip.com/db/lite.php
|
||||
[dbip]: https://db-ip.com/db/download/ip-to-country-lite
|
||||
[cc]: http://creativecommons.org/licenses/by/4.0/
|
||||
|
||||
Binary file not shown.
@@ -19,33 +19,65 @@ The `service_type` is based on the following rule precedence:
|
||||
3. All telecommunications providers that offer internet access are identified as `ISP`, even if they also offer other services, such as web hosting or email hosting.
|
||||
4. All web hosting providers are identified as `Web Hosting`, even if the service also offers email hosting.
|
||||
5. All email account providers are identified as `Email Provider`, no matter how or where they are hosted
|
||||
6. All legitimate platforms offering their Software as a Service SaaS) are identified as `SaaS`, regardless of industry. This helps simplify metrics.
|
||||
6. All legitimate platforms offering their Software as a Service (SaaS) are identified as `SaaS`, regardless of industry. This helps simplify metrics.
|
||||
7. All other senders that use their own domain as a Reverse DNS base domain should be identified based on their industry
|
||||
|
||||
- Agriculture
|
||||
- Automotive
|
||||
- Beauty
|
||||
- Construction
|
||||
- Consulting
|
||||
- Defense
|
||||
- Education
|
||||
- Email Provider
|
||||
- Email Security
|
||||
- Education
|
||||
- Entertainment
|
||||
- Event Planning
|
||||
- Finance
|
||||
- Food
|
||||
- Government
|
||||
- Government Media
|
||||
- Healthcare
|
||||
- IaaS
|
||||
- Industrial
|
||||
- ISP
|
||||
- Logistics
|
||||
- Manufacturing
|
||||
- Marketing
|
||||
- MSP
|
||||
- MSSP
|
||||
- News
|
||||
- Nonprofit
|
||||
- PaaS
|
||||
- Photography
|
||||
- Print
|
||||
- Publishing
|
||||
- Real Estate
|
||||
- Retail
|
||||
- SaaS
|
||||
- Science
|
||||
- Search Engine
|
||||
- Social Media
|
||||
- Sports
|
||||
- Staffing
|
||||
- Technology
|
||||
- Travel
|
||||
- Web Host
|
||||
|
||||
The file currently contains over 600 mappings from a wide variety of email sending services, including large email
|
||||
providers, SaaS platforms, small web hosts, and healthcare companies. Ideally this mapping will continuously grow to
|
||||
include many other services and industries.
|
||||
The file currently contains over 1,400 mappings from a wide variety of email sending sources.
|
||||
|
||||
## known_unknown_base_reverse_dns.txt
|
||||
|
||||
A list of reverse DNS base domains that could not be identified as belonging to a particular organization, service, or industry.
|
||||
|
||||
## base_reverse_dns.csv
|
||||
|
||||
A CSV with the fields `source_name` and optionally `message_countcount`. This CSV can be generated byy exporting the base DNS data from the Kibana on Splunk dashboards provided by parsedmarc. This file is not tracked by Git.
|
||||
|
||||
## unknown_base_reverse_dns.csv
|
||||
|
||||
A CSV file with the fields `source_name` and `message_count`. This file is not tracked by Git.
|
||||
|
||||
## find_unknown_base_reverse_dns.py
|
||||
|
||||
This is a python script that reads the domains in `base_reverse_dns.csv` and writes the domains that are not in `base_reverse_dns_map.csv` or `known_unknown_base_reverse_dns.txt` to `unknown_base_reverse_dns.csv`. This is useful for identifying potential additional domains to contribute to `base_reverse_dns_map.csv` and `known_unknown_base_reverse_dns.txt`.
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
73
parsedmarc/resources/maps/find_unknown_base_reverse_dns.py
Executable file
73
parsedmarc/resources/maps/find_unknown_base_reverse_dns.py
Executable file
@@ -0,0 +1,73 @@
|
||||
#!/usr/bin/env python
|
||||
|
||||
import logging
|
||||
import os
|
||||
import csv
|
||||
|
||||
|
||||
def _main():
|
||||
input_csv_file_path = "base_reverse_dns.csv"
|
||||
base_reverse_dns_map_file_path = "base_reverse_dns_map.csv"
|
||||
known_unknown_list_file_path = "known_unknown_base_reverse_dns.txt"
|
||||
output_csv_file_path = "unknown_base_reverse_dns.csv"
|
||||
|
||||
csv_headers = ["source_name", "message_count"]
|
||||
|
||||
output_rows = []
|
||||
|
||||
logging.basicConfig()
|
||||
logger = logging.getLogger(__name__)
|
||||
logger.setLevel(logging.INFO)
|
||||
|
||||
for p in [
|
||||
input_csv_file_path,
|
||||
base_reverse_dns_map_file_path,
|
||||
known_unknown_list_file_path,
|
||||
]:
|
||||
if not os.path.exists(p):
|
||||
logger.error(f"{p} does not exist")
|
||||
exit(1)
|
||||
logger.info(f"Loading {known_unknown_list_file_path}")
|
||||
known_unknown_domains = []
|
||||
with open(known_unknown_list_file_path) as f:
|
||||
for line in f.readlines():
|
||||
domain = line.lower().strip()
|
||||
if domain in known_unknown_domains:
|
||||
logger.warning(
|
||||
f"{domain} is in {known_unknown_list_file_path} multiple times"
|
||||
)
|
||||
else:
|
||||
known_unknown_domains.append(domain)
|
||||
logger.info(f"Loading {base_reverse_dns_map_file_path}")
|
||||
known_domains = []
|
||||
with open(base_reverse_dns_map_file_path) as f:
|
||||
for row in csv.DictReader(f):
|
||||
domain = row["base_reverse_dns"].lower().strip()
|
||||
if domain in known_domains:
|
||||
logger.warning(
|
||||
f"{domain} is in {base_reverse_dns_map_file_path} multiple times"
|
||||
)
|
||||
else:
|
||||
known_domains.append(domain)
|
||||
if domain in known_unknown_domains and known_domains:
|
||||
pass
|
||||
logger.warning(
|
||||
f"{domain} is in {known_unknown_list_file_path} and {base_reverse_dns_map_file_path}"
|
||||
)
|
||||
|
||||
logger.info(f"Checking domains against {base_reverse_dns_map_file_path}")
|
||||
with open(input_csv_file_path) as f:
|
||||
for row in csv.DictReader(f):
|
||||
domain = row["source_name"].lower().strip()
|
||||
if domain not in known_domains and domain not in known_unknown_domains:
|
||||
logger.info(f"New unknown domain found: {domain}")
|
||||
output_rows.append(row)
|
||||
logger.info(f"Writing {output_csv_file_path}")
|
||||
with open(output_csv_file_path, "w") as f:
|
||||
writer = csv.DictWriter(f, fieldnames=csv_headers)
|
||||
writer.writeheader()
|
||||
writer.writerows(output_rows)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
_main()
|
||||
125
parsedmarc/resources/maps/known_unknown_base_reverse_dns.txt
Normal file
125
parsedmarc/resources/maps/known_unknown_base_reverse_dns.txt
Normal file
@@ -0,0 +1,125 @@
|
||||
200.in-addr.arpa
|
||||
adlucrumnewsletter.com
|
||||
admin.corpivensa.gob.ve
|
||||
aerospacevitro.us.com
|
||||
albagroup-eg.com
|
||||
anteldata.net.uy
|
||||
antonaoll.com
|
||||
aosau.net
|
||||
arandomserver.com
|
||||
asmecam.it
|
||||
b8sales.com
|
||||
bestinvestingtime.com
|
||||
biocorp.com
|
||||
bisno1.co.jp
|
||||
bluhosting.com
|
||||
bodiax.pp.ua
|
||||
bost-law.com
|
||||
brnonet.cz
|
||||
brushinglegal.de
|
||||
christus.mx
|
||||
cloud-edm.com
|
||||
cloudlogin.co
|
||||
cnode.io
|
||||
commerceinsurance.com
|
||||
coolblaze.com
|
||||
cps.com.ar
|
||||
detrot.xyz
|
||||
digi.net.my
|
||||
dkginternet.com
|
||||
doorsrv.com
|
||||
dreamtechmedia.com
|
||||
ds.network
|
||||
emailperegrine.com
|
||||
epsilon-group.com
|
||||
eyecandyhosting.xyz
|
||||
fetscorp.shop
|
||||
formicidaehunt.net
|
||||
fosterheap.com
|
||||
gendns.com
|
||||
ginous.eu.com
|
||||
gist-th.com
|
||||
gophermedia.com
|
||||
gqlists.us.com
|
||||
gratzl.de
|
||||
hgnbroken.us.com
|
||||
hosting1337.com
|
||||
hostingmichigan.com
|
||||
hostname.localhost
|
||||
hostnetwork.com
|
||||
hostwhitelabel.com
|
||||
idcfcloud.net
|
||||
immenzaces.com
|
||||
ivol.co
|
||||
jalanet.co.id
|
||||
kahlaa.com
|
||||
kbronet.com.tw
|
||||
kdnursing.org
|
||||
kitchenaildbd.com
|
||||
legenditds.com
|
||||
lighthouse-media.com
|
||||
lohkal.com
|
||||
lonestarmm.net
|
||||
magnetmail.net
|
||||
manhattanbulletpoint.com
|
||||
masterclassjournal.com
|
||||
moderntradingnews.com
|
||||
moonjaws.com
|
||||
motion4ever.net
|
||||
mschosting.com
|
||||
mspnet.pro
|
||||
mts-nn.ru
|
||||
mxthunder.net
|
||||
myrewards.net
|
||||
mysagestore.com
|
||||
ncport.ru
|
||||
nebdig.com
|
||||
neovet-base.ru
|
||||
nic.name
|
||||
nidix.net
|
||||
ogicom.net
|
||||
omegabrasil.inf.br
|
||||
onnet21.com
|
||||
ovaltinalization.co
|
||||
overta.ru
|
||||
passionatesmiles.com
|
||||
planethoster.net
|
||||
pmnhost.net
|
||||
popiup.com
|
||||
prima.com.ar
|
||||
prima.net.ar
|
||||
proudserver.com
|
||||
qontenciplc.autos
|
||||
raxa.host
|
||||
sahacker-2020.com
|
||||
samsales.site
|
||||
satirogluet.com
|
||||
securednshost.com
|
||||
securen.net
|
||||
securerelay.in
|
||||
securev.net
|
||||
servershost.biz
|
||||
smallvillages.com
|
||||
solusoftware.com
|
||||
spiritualtechnologies.io
|
||||
sprout.org
|
||||
stableserver.net
|
||||
stockexchangejournal.com
|
||||
suksangroup.com
|
||||
system.eu.com
|
||||
t-jon.com
|
||||
tenkids.net
|
||||
thaicloudsolutions.com
|
||||
thaimonster.com
|
||||
tullostrucking.com
|
||||
unite.services
|
||||
urawasl.com
|
||||
us.servername.us
|
||||
vendimetry.com
|
||||
vibrantwellnesscorp.com
|
||||
wallstreetsgossip.com
|
||||
weblinkinternational.com
|
||||
xsfati.us.com
|
||||
xspmail.jp
|
||||
zerowebhosting.net
|
||||
znlc.jp
|
||||
@@ -19,10 +19,11 @@ import csv
|
||||
import io
|
||||
|
||||
try:
|
||||
import importlib.resources as pkg_resources
|
||||
from importlib.resources import files
|
||||
except ImportError:
|
||||
# Try backported to PY<37 `importlib_resources`
|
||||
import importlib_resources as pkg_resources
|
||||
# Try backported to PY<3 `importlib_resources`
|
||||
from importlib.resources import files
|
||||
|
||||
|
||||
from dateutil.parser import parse as parse_date
|
||||
import dns.reversename
|
||||
@@ -280,14 +281,13 @@ def get_ip_address_country(ip_address, db_path=None):
|
||||
break
|
||||
|
||||
if db_path is None:
|
||||
with pkg_resources.path(
|
||||
parsedmarc.resources.dbip, "dbip-country-lite.mmdb"
|
||||
) as path:
|
||||
db_path = path
|
||||
db_path = str(
|
||||
files(parsedmarc.resources.dbip).joinpath("dbip-country-lite.mmdb")
|
||||
)
|
||||
|
||||
db_age = datetime.now() - datetime.fromtimestamp(os.stat(db_path).st_mtime)
|
||||
if db_age > timedelta(days=30):
|
||||
logger.warning("IP database is more than a month old")
|
||||
db_age = datetime.now() - datetime.fromtimestamp(os.stat(db_path).st_mtime)
|
||||
if db_age > timedelta(days=30):
|
||||
logger.warning("IP database is more than a month old")
|
||||
|
||||
db_reader = geoip2.database.Reader(db_path)
|
||||
|
||||
@@ -344,21 +344,28 @@ def get_service_from_reverse_dns_base_domain(
|
||||
|
||||
if not (offline or always_use_local_file) and len(reverse_dns_map) == 0:
|
||||
try:
|
||||
logger.debug(f"Trying to fetch " f"reverse DNS map from {url}...")
|
||||
csv_file.write(requests.get(url).text)
|
||||
logger.debug(f"Trying to fetch reverse DNS map from {url}...")
|
||||
response = requests.get(url)
|
||||
response.raise_for_status()
|
||||
csv_file.write(response.text)
|
||||
csv_file.seek(0)
|
||||
load_csv(csv_file)
|
||||
except requests.exceptions.RequestException as e:
|
||||
logger.warning(f"Failed to fetch reverse DNS map: {e}")
|
||||
except Exception:
|
||||
logger.warning("Not a valid CSV file")
|
||||
csv_file.seek(0)
|
||||
logger.debug(csv_file.read())
|
||||
|
||||
if len(reverse_dns_map) == 0:
|
||||
logger.info("Loading included reverse DNS map...")
|
||||
with pkg_resources.path(
|
||||
parsedmarc.resources.maps, "base_reverse_dns_map.csv"
|
||||
) as path:
|
||||
if local_file_path is not None:
|
||||
path = local_file_path
|
||||
with open(path) as csv_file:
|
||||
load_csv(csv_file)
|
||||
path = str(
|
||||
files(parsedmarc.resources.maps).joinpath("base_reverse_dns_map.csv")
|
||||
)
|
||||
if local_file_path is not None:
|
||||
path = local_file_path
|
||||
with open(path) as csv_file:
|
||||
load_csv(csv_file)
|
||||
try:
|
||||
service = reverse_dns_map[base_domain]
|
||||
except KeyError:
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
[build-system]
|
||||
requires = [
|
||||
"hatchling>=1.8.1",
|
||||
"hatchling>=1.27.0",
|
||||
]
|
||||
build-backend = "hatchling.build"
|
||||
|
||||
@@ -59,7 +59,7 @@ dependencies = [
|
||||
|
||||
[project.optional-dependencies]
|
||||
build = [
|
||||
"hatch",
|
||||
"hatch>=1.14.0",
|
||||
"myst-parser[linkify]",
|
||||
"nose",
|
||||
"pytest",
|
||||
|
||||
25
sortmaps.py
Executable file
25
sortmaps.py
Executable file
@@ -0,0 +1,25 @@
|
||||
#!/usr/bin/env python3
|
||||
|
||||
import os
|
||||
import glob
|
||||
import csv
|
||||
|
||||
|
||||
maps_dir = os.path.join("parsedmarc", "resources", "maps")
|
||||
csv_files = glob.glob(os.path.join(maps_dir, "*.csv"))
|
||||
|
||||
|
||||
def sort_csv(filepath, column=0):
|
||||
with open(filepath, mode="r", newline="") as infile:
|
||||
reader = csv.reader(infile)
|
||||
header = next(reader)
|
||||
sorted_rows = sorted(reader, key=lambda row: row[column])
|
||||
|
||||
with open(filepath, mode="w", newline="\n") as outfile:
|
||||
writer = csv.writer(outfile)
|
||||
writer.writerow(header)
|
||||
writer.writerows(sorted_rows)
|
||||
|
||||
|
||||
for csv_file in csv_files:
|
||||
sort_csv(csv_file)
|
||||
Reference in New Issue
Block a user