Add find_unknown_base_reverse_dns.py

This commit is contained in:
Sean Whalen
2025-04-24 13:48:51 -04:00
parent 8426daa26b
commit 1d8af3ccff
2 changed files with 106 additions and 5 deletions

View File

@@ -19,33 +19,65 @@ The `service_type` is based on the following rule precedence:
3. All telecommunications providers that offer internet access are identified as `ISP`, even if they also offer other services, such as web hosting or email hosting.
4. All web hosting providers are identified as `Web Hosting`, even if the service also offers email hosting.
5. All email account providers are identified as `Email Provider`, no matter how or where they are hosted
6. All legitimate platforms offering their Software as a Service SaaS) are identified as `SaaS`, regardless of industry. This helps simplify metrics.
6. All legitimate platforms offering their Software as a Service (SaaS) are identified as `SaaS`, regardless of industry. This helps simplify metrics.
7. All other senders that use their own domain as a Reverse DNS base domain should be identified based on their industry
- Agriculture
- Automotive
- Beauty
- Construction
- Consulting
- Defense
- Education
- Email Provider
- Email Security
- Education
- Entertainment
- Event Planning
- Finance
- Food
- Government
- Government Media
- Healthcare
- IaaS
- Industrial
- ISP
- Logistics
- Manufacturing
- Marketing
- MSP
- MSSP
- News
- Nonprofit
- PaaS
- Photography
- Print
- Publishing
- Real Estate
- Retail
- SaaS
- Science
- Search Engine
- Social Media
- Sports
- Staffing
- Technology
- Travel
- Web Host
The file currently contains over 600 mappings from a wide variety of email sending services, including large email
providers, SaaS platforms, small web hosts, and healthcare companies. Ideally this mapping will continuously grow to
include many other services and industries.
The file currently contains over 1,400 mappings from a wide variety of email sending sources.
## known_unknown_base_reverse_dns.txt
A list of reverse DNS base domains that could not be identified as belonging to a particular organization, service, or industry.
## base_reverse_dns.csv
A CSV with the fields `base_reverse_dns` and optionally `count`. This CSV can be generated byy exporting the base DNS data from the Kibana on Splunk dashboards provided by parsedmarc. This file is not tracked by Git.
## unknown_base_reverse_dns.csv
A CSV file with the fields `base_reverse_dns` and `count`. This file is not tracked by Git.
## find_unknown_base_reverse_dns.py
This is a python script that reads the domains in `base_reverse_dns.csv` and writes the domains that are not in `base_reverse_dns_map.csv` or `known_unknown_base_reverse_dns.txt` to `unknown_base_reverse_dns.csv`. This is useful for identifying potential additional domains to contribute to `base_reverse_dns_map.csv` and `known_unknown_base_reverse_dns.txt`.

View File

@@ -0,0 +1,69 @@
#!/usr/bin/env python
import logging
import os
import csv
def _main():
input_csv_file_path = "base_reverse_dns.csv"
base_reverse_dns_map_file_path = "base_reverse_dns_map.csv"
known_unknown_list_file_path = "known_unknown_base_reverse_dns.txt"
output_csv_file_path = "unknown_base_reverse_dns.csv"
csv_headers = ["base_reverse_dns", "count"]
output_rows = []
logging.basicConfig()
logger = logging.getLogger(__name__)
logger.setLevel(logging.INFO)
for p in [
input_csv_file_path,
base_reverse_dns_map_file_path,
known_unknown_list_file_path,
]:
if not os.path.exists(p):
logger.error(f"{p} does not exist")
exit(1)
logger.info(f"Loading {known_unknown_list_file_path}")
known_unknown_domains = []
with open(known_unknown_list_file_path) as f:
for line in f.readlines():
domain = line.lower().strip()
if domain in known_unknown_domains:
logger.warning(f"{domain} is in {known_unknown_list_file_path} multiple times")
else:
known_unknown_domains.append(domain)
logger.info(f"Loading {base_reverse_dns_map_file_path}")
known_domains = []
with open(base_reverse_dns_map_file_path) as f:
for row in csv.DictReader(f):
domain = row["base_reverse_dns"].lower().strip()
if domain in known_domains:
logger.warning(
f"{domain} is in {base_reverse_dns_map_file_path} multiple times"
)
else:
known_domains.append(domain)
if domain in known_unknown_domains and known_domains:
pass
logger.warning(f"{domain} is in {known_unknown_list_file_path} and {base_reverse_dns_map_file_path}")
logger.info(f"Checking domains against {base_reverse_dns_map_file_path}")
with open(input_csv_file_path) as f:
for row in csv.DictReader(f):
domain = row["base_reverse_dns"].lower().strip()
if domain not in known_domains or known_unknown_domains:
logger.info(f"New unknown domain found: {domain}")
output_rows.append(row)
logger.info(f"Writing {output_csv_file_path}")
with open(output_csv_file_path, "w") as f:
writer = csv.DictWriter(f, fieldnames=csv_headers)
writer.writeheader()
writer.writerows(output_rows)
if __name__ == "__main__":
_main()