mirror of
https://github.com/domainaware/parsedmarc.git
synced 2026-04-03 20:28:52 +00:00
Add find_unknown_base_reverse_dns.py
This commit is contained in:
@@ -19,33 +19,65 @@ The `service_type` is based on the following rule precedence:
|
||||
3. All telecommunications providers that offer internet access are identified as `ISP`, even if they also offer other services, such as web hosting or email hosting.
|
||||
4. All web hosting providers are identified as `Web Hosting`, even if the service also offers email hosting.
|
||||
5. All email account providers are identified as `Email Provider`, no matter how or where they are hosted
|
||||
6. All legitimate platforms offering their Software as a Service SaaS) are identified as `SaaS`, regardless of industry. This helps simplify metrics.
|
||||
6. All legitimate platforms offering their Software as a Service (SaaS) are identified as `SaaS`, regardless of industry. This helps simplify metrics.
|
||||
7. All other senders that use their own domain as a Reverse DNS base domain should be identified based on their industry
|
||||
|
||||
- Agriculture
|
||||
- Automotive
|
||||
- Beauty
|
||||
- Construction
|
||||
- Consulting
|
||||
- Defense
|
||||
- Education
|
||||
- Email Provider
|
||||
- Email Security
|
||||
- Education
|
||||
- Entertainment
|
||||
- Event Planning
|
||||
- Finance
|
||||
- Food
|
||||
- Government
|
||||
- Government Media
|
||||
- Healthcare
|
||||
- IaaS
|
||||
- Industrial
|
||||
- ISP
|
||||
- Logistics
|
||||
- Manufacturing
|
||||
- Marketing
|
||||
- MSP
|
||||
- MSSP
|
||||
- News
|
||||
- Nonprofit
|
||||
- PaaS
|
||||
- Photography
|
||||
- Print
|
||||
- Publishing
|
||||
- Real Estate
|
||||
- Retail
|
||||
- SaaS
|
||||
- Science
|
||||
- Search Engine
|
||||
- Social Media
|
||||
- Sports
|
||||
- Staffing
|
||||
- Technology
|
||||
- Travel
|
||||
- Web Host
|
||||
|
||||
The file currently contains over 600 mappings from a wide variety of email sending services, including large email
|
||||
providers, SaaS platforms, small web hosts, and healthcare companies. Ideally this mapping will continuously grow to
|
||||
include many other services and industries.
|
||||
The file currently contains over 1,400 mappings from a wide variety of email sending sources.
|
||||
|
||||
## known_unknown_base_reverse_dns.txt
|
||||
|
||||
A list of reverse DNS base domains that could not be identified as belonging to a particular organization, service, or industry.
|
||||
|
||||
## base_reverse_dns.csv
|
||||
|
||||
A CSV with the fields `base_reverse_dns` and optionally `count`. This CSV can be generated byy exporting the base DNS data from the Kibana on Splunk dashboards provided by parsedmarc. This file is not tracked by Git.
|
||||
|
||||
## unknown_base_reverse_dns.csv
|
||||
|
||||
A CSV file with the fields `base_reverse_dns` and `count`. This file is not tracked by Git.
|
||||
|
||||
## find_unknown_base_reverse_dns.py
|
||||
|
||||
This is a python script that reads the domains in `base_reverse_dns.csv` and writes the domains that are not in `base_reverse_dns_map.csv` or `known_unknown_base_reverse_dns.txt` to `unknown_base_reverse_dns.csv`. This is useful for identifying potential additional domains to contribute to `base_reverse_dns_map.csv` and `known_unknown_base_reverse_dns.txt`.
|
||||
|
||||
69
parsedmarc/resources/maps/find_unknown_base_reverse_dns.py
Executable file
69
parsedmarc/resources/maps/find_unknown_base_reverse_dns.py
Executable file
@@ -0,0 +1,69 @@
|
||||
#!/usr/bin/env python
|
||||
|
||||
import logging
|
||||
import os
|
||||
import csv
|
||||
|
||||
|
||||
def _main():
|
||||
input_csv_file_path = "base_reverse_dns.csv"
|
||||
base_reverse_dns_map_file_path = "base_reverse_dns_map.csv"
|
||||
known_unknown_list_file_path = "known_unknown_base_reverse_dns.txt"
|
||||
output_csv_file_path = "unknown_base_reverse_dns.csv"
|
||||
|
||||
csv_headers = ["base_reverse_dns", "count"]
|
||||
|
||||
output_rows = []
|
||||
|
||||
logging.basicConfig()
|
||||
logger = logging.getLogger(__name__)
|
||||
logger.setLevel(logging.INFO)
|
||||
|
||||
for p in [
|
||||
input_csv_file_path,
|
||||
base_reverse_dns_map_file_path,
|
||||
known_unknown_list_file_path,
|
||||
]:
|
||||
if not os.path.exists(p):
|
||||
logger.error(f"{p} does not exist")
|
||||
exit(1)
|
||||
logger.info(f"Loading {known_unknown_list_file_path}")
|
||||
known_unknown_domains = []
|
||||
with open(known_unknown_list_file_path) as f:
|
||||
for line in f.readlines():
|
||||
domain = line.lower().strip()
|
||||
if domain in known_unknown_domains:
|
||||
logger.warning(f"{domain} is in {known_unknown_list_file_path} multiple times")
|
||||
else:
|
||||
known_unknown_domains.append(domain)
|
||||
logger.info(f"Loading {base_reverse_dns_map_file_path}")
|
||||
known_domains = []
|
||||
with open(base_reverse_dns_map_file_path) as f:
|
||||
for row in csv.DictReader(f):
|
||||
domain = row["base_reverse_dns"].lower().strip()
|
||||
if domain in known_domains:
|
||||
logger.warning(
|
||||
f"{domain} is in {base_reverse_dns_map_file_path} multiple times"
|
||||
)
|
||||
else:
|
||||
known_domains.append(domain)
|
||||
if domain in known_unknown_domains and known_domains:
|
||||
pass
|
||||
logger.warning(f"{domain} is in {known_unknown_list_file_path} and {base_reverse_dns_map_file_path}")
|
||||
|
||||
logger.info(f"Checking domains against {base_reverse_dns_map_file_path}")
|
||||
with open(input_csv_file_path) as f:
|
||||
for row in csv.DictReader(f):
|
||||
domain = row["base_reverse_dns"].lower().strip()
|
||||
if domain not in known_domains or known_unknown_domains:
|
||||
logger.info(f"New unknown domain found: {domain}")
|
||||
output_rows.append(row)
|
||||
logger.info(f"Writing {output_csv_file_path}")
|
||||
with open(output_csv_file_path, "w") as f:
|
||||
writer = csv.DictWriter(f, fieldnames=csv_headers)
|
||||
writer.writeheader()
|
||||
writer.writerows(output_rows)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
_main()
|
||||
Reference in New Issue
Block a user