From 87ae6175f2b8cb697cfd26ead265170c5ea35362 Mon Sep 17 00:00:00 2001 From: Sean Whalen Date: Sun, 8 Jun 2025 19:51:13 -0400 Subject: [PATCH] Update lists --- .vscode/settings.json | 1 + build.sh | 4 +- .../resources/maps/base_reverse_dns_map.csv | 32 +++++++++++++++- .../maps/find_unknown_base_reverse_dns.py | 23 +++++++++++- .../maps/known_unknown_base_reverse_dns.txt | 19 ++++++++++ parsedmarc/resources/maps/psl_overrides.txt | 6 +++ sort.sh | 5 +++ sortmaps.py | 37 ++++++++++++++++--- 8 files changed, 116 insertions(+), 11 deletions(-) create mode 100644 parsedmarc/resources/maps/psl_overrides.txt create mode 100755 sort.sh diff --git a/.vscode/settings.json b/.vscode/settings.json index 5153c88..9561d56 100644 --- a/.vscode/settings.json +++ b/.vscode/settings.json @@ -45,6 +45,7 @@ "htpasswd", "httpasswd", "IMAP", + "infile", "Interaktive", "IPDB", "journalctl", diff --git a/build.sh b/build.sh index 626529f..4a1e5ff 100755 --- a/build.sh +++ b/build.sh @@ -17,9 +17,7 @@ touch build/html/.nojekyll if [ -d "./../parsedmarc-docs" ]; then cp -rf build/html/* ../../parsedmarc-docs/ fi -cd .. -sort -o "parsedmarc/resources/maps/known_unknown_base_reverse_dns.txt" "parsedmarc/resources/maps/known_unknown_base_reverse_dns.txt" -./sortmaps.py +sort.sh python3 tests.py rm -rf dist/ build/ hatch build \ No newline at end of file diff --git a/parsedmarc/resources/maps/base_reverse_dns_map.csv b/parsedmarc/resources/maps/base_reverse_dns_map.csv index 9e95b2f..252246b 100644 --- a/parsedmarc/resources/maps/base_reverse_dns_map.csv +++ b/parsedmarc/resources/maps/base_reverse_dns_map.csv @@ -84,6 +84,8 @@ asahikawa-med.ac.jp,Asahikawa Med,Healthcare ashtelstudios.com,Ashtel Studios,Healthcare asiaitserver.com, Asia IT Solution Co.,MSP askbis.com,BIS,Web Host +asl2.liguria.it,ASL2,Healthcare +asmecal.it,ASMENET CALABRIA,Government assaytechnology.com,Assay Technology,Industrial assentportal.com,Assent,SaaS atmailcloud.com,atmail,Email Provider @@ -92,6 +94,7 @@ atw.ne.jp,ATW,Web Host au.com,au,ISP auone-net.jp,KDDI,ISP aussiebb.com.au,Aussie Broadband,ISP +autotask.net,Kaseya,SaaS averypartners.net,Avery Partners,Healthcare avis.ne.jp,Densan Avis,ISP aviso.ci,Orange,ISP @@ -161,8 +164,8 @@ calpoly.edu,Cal Poly,Education calvin.edu,Calvin University,Education canonet.ne.jp,Canonet,MSP cardhealth.com,Cardinal Health,Healthcare -cardinalhealth.com,Cardinal Health,Healthcare cardinal.com,Cardinal Health,Healthcare +cardinalhealth.com,Cardinal Health,Healthcare carecentrix.com,CareCentrix,Healthcare carrierzone.com,carrierzone,Email Security carsforkids.org,Cars For Kids,Nonprofit @@ -176,6 +179,7 @@ centerasecurity.com,Centera Email Defence,Email Security centerasecurity.dk,Centera Email Defence,Email Security centralinteractiva.com.mx,Central Interactiva,Marketing centurylink.com.pe,Cirion,MSP +chaiyohosting.com,Chaiy oHosting,Web Host charter.net,Charter,ISP chiba-u.jp,Chiba University,Education chikamori.com,Chikamori Health Care Group,Healthcare @@ -274,6 +278,7 @@ default-host.net,INHOSTED LP,Web Host delhitel.net,Delhi Telephone Company,ISP deskwing.net,DESKWING,Email Provider dexanet.co.id,Dexanet,ISP +dfn.nl,DELTA Fiber,ISP dgsys.es,DGsys,Web Host dhl.com,DHL,logistics diakovere.de,DIAKOVERE,Healthcare @@ -328,6 +333,7 @@ ecm8.com,Campaign Master (UK),Marketing edgepark.com,Edgepark,Healthcare edu.com,InstructionalAssistant,SaaS eduneering.com,UL EHS training,SaaS +egress.cloud,Egress Software,Email Security egs-seg.gc.ca,Canada Government Electronic Directory Services (GEDS),Government ehafconsulting.org,EHAF Consulting Engineers,Construction ehime-u.ac.jp,Ehime University,Education @@ -340,9 +346,10 @@ elcom.ru,Rostelcom,ISP electric.net,VIPRE,Email Security elevatedstudios.tech,Elevated Studios,Web Host eleven.mx,E;rven Marketing Labs,Marketing -elink.net,Earthlink,ISP elive.net,Elive,Web Host ellipse.net,Ellipse,MSP +elmecnet.net,Elmec Informatica,MSP +elnk.net,Earthlink,ISP email-od.com,SocketLabs,SaaS emailarray.com,Emailarray,Email Provider emailowl.com,NameSilo,Web Host @@ -458,6 +465,7 @@ gsbridge.com,Golden State Bridge,Industrial gtnexus.com,Infor Nexus (Formerly GT Nexus),SaaS gts.sk,GTS Slovakia,ISP guardedhost.com,Omnis Network,Web Host +gva.es,Generalitat Valenciana,Government gwu.edu,The George Washington University,Education h-isac.org,H-ISAC,Healthcare hammacher.com,Hammacher Schlemmer,Retail @@ -654,6 +662,7 @@ lghealth.org,Penn Medicine Lancaster General Health,Healthcare lindsaymunicipalhospital.com,Lindsay Municipal Hospital,Healthcare link3.net,Link3 Technologies,ISP linkedin.com,LinkedIn,Social Media +linode.com,Linode,Web Host live-servers.net,Fasthosts Internet Ltd,Web Host livedo.jp,Livedo Corporation,Healthcare llumc.edu,Loma Linda University Medical Center,Healthcare @@ -731,6 +740,7 @@ menlosecurity.com,Menlo Security,Email Security mercranet.com,Mercranet,Web Host mercurygate.net,MercuryGate,SaaS meric.net.tr,Meriç Hosting,Web Host +merula.net,Merula,ISP messagelabs.com,Symantec Email Security,Email Security messagingengine.com,Fastmail,Email Provider mesvr.com,ReadNotify,Email Provider @@ -740,6 +750,7 @@ mho.de,Marienhospital Osnabrück,Healthcare mhos.de,Marienhospital Osnabrück,Healthcare mia.net,HostDrive,Web Host miamioh.edu,Miami University,Education +microfocus-japan.com,Micro Focus,MSP microsoft.com,Microsoft,Technology middlesex.ca,"Middlesex County, Canada",Government midi-loisirs.com,Midi Loisirs,Entertainment @@ -803,6 +814,7 @@ nagoya-cu.ac.jp,Nagoya City University,Education nagoya-u.ac.jp,Nagoya University,Education nahealth.com,Northern Arizona Healthcare,Healthcare name.com,Name.com,Web Host +nameserver.sk,Webglobe,Web Host namespro.ca,Namespro,Web Host nano.uz,Nano Telecom,ISP nascoeducation.com,Nasco EDucation,Education @@ -826,9 +838,11 @@ netrevolution.com,NetRevolution,ISP netroad.ru,Mobile TeleSystems,ISP netsolus.com,netsolus KC,MSP netsville.com,Netsville,Marketing +nettlinx.com,Nelinx,ISP network80.com,Network80,Web Host neuca.pl,NECUA Group,Healthcare newpages.com.my,NEWPAGES,Retail +newsmanapp.com,NewsMAN,Marketing newsunseo.com,NewSunSEO,Marketing newtekwebhosting.com,Newtek Technology Solutions,Web Host nexcess.net,Nexcess,Web Host @@ -976,6 +990,7 @@ prw.net,Puerto Rico Webmasters,Web Host pserver.space,Profitserver,Web Host psychz.net,Psychz Networks,Web Host ptd.net,PTD,ISP +ptrcloud.net,GMO GlobalSign,IaaS pubnix.net,PubNIX,Web Host puc-rio.br,PUC Rio,Education pucminas.br,PUC Minas,Education @@ -1100,6 +1115,7 @@ signium.co.jp,Signium,Consulting siho.org,Siho Insurance Services,Finance simpro.com.br,Simpro,Healthcare simus.uz,Simus,ISP +sinergit.com.do,Sinergit,MSP siriustelecom.uz,Sirius Telecom,ISP siteprotect.com,SiteMail,Email Provider sixinternet.com.br,Six Internet,ISP @@ -1220,6 +1236,7 @@ tktelekom.pl,TK Telecom,Healthcare tm.net,Mercury Telecom,ISP tmc.edu,Truett McConnell University,Education tmcz.cz,T-Mobile,ISP +tmd.ac.jp,Science Tokyo,Education tmddedicated.com,TMDHosting,Web Host tmkultra.net.br,Tmk Net,ISP tnc-neuro.com,Tallahassee Neurological Clinic,Healthcare @@ -1340,6 +1357,7 @@ web-dns1.com,Web Hosting Canada,Web Host web-hosting.com,Namecheap,Web Host web.africa,Webafrica,ISP webetic.net,Webetic,Web Host +webglobe.com,Webglobe,Web Host webhostingireland.ie,Hosting Ireland,Web Host webmasters.com,Webmasters.com,Web Host webnames.ca,Webnames.ca,Web Host @@ -1380,6 +1398,7 @@ xinet.com.mx,Xinet,MSP xipline.com,Ritter Communications,ISP xmission.com,XMission,SaaS xmr3.com,OpenText,SaaS +xnet.mx,Xnet,MSP xrea.com,XREA,Web Host xserver.jp,Xserver,Web Host yadtel.net,Zirrus,ISP @@ -1398,6 +1417,8 @@ yuuai.or.jp,Social Medical Corporation Yuuaikai,Healthcare z.com,Z.com,Web Host zaansmc.nl,Zaans Medical Center,Healthcare zare.com,Zare,Web Host +zcmail.net,Zoho Campaigns,Marketing +zcsend.net,Zoho Campaigns,Marketing zdsys.com,Zendesk,SaaS zedality.com,Zedality,Web Host zirrus.com,Xirrus.com,ISP @@ -1414,3 +1435,10 @@ zsttk.ru,TTK,ISP zyner.net,Zyner,Email Provider zyner.one,Zyner,Email Provider zyner.org,Zyner,Email Provider +akura.ne.jp,Akura,Logistics +assp.org,American Society of Safety Professionals,Healthcare +complemar.com,Complemar,Logistics +victorkaiser.com,Global Transport,Logistics +bnpparibas.fr,Banque BNP Paribas,Finance +clarix.com,Clarix,MSP +collectivhosting.com,Collectiv,Web Host diff --git a/parsedmarc/resources/maps/find_unknown_base_reverse_dns.py b/parsedmarc/resources/maps/find_unknown_base_reverse_dns.py index 0a66245..f4eebd4 100755 --- a/parsedmarc/resources/maps/find_unknown_base_reverse_dns.py +++ b/parsedmarc/resources/maps/find_unknown_base_reverse_dns.py @@ -9,6 +9,7 @@ def _main(): input_csv_file_path = "base_reverse_dns.csv" base_reverse_dns_map_file_path = "base_reverse_dns_map.csv" known_unknown_list_file_path = "known_unknown_base_reverse_dns.txt" + psl_overrides_file_path = "psl_overrides.txt" output_csv_file_path = "unknown_base_reverse_dns.csv" csv_headers = ["source_name", "message_count"] @@ -23,6 +24,7 @@ def _main(): input_csv_file_path, base_reverse_dns_map_file_path, known_unknown_list_file_path, + psl_overrides_file_path, ]: if not os.path.exists(p): logger.error(f"{p} does not exist") @@ -38,6 +40,18 @@ def _main(): ) else: known_unknown_domains.append(domain) + logger.info(f"Loading {psl_overrides_file_path}") + psl_overrides = [] + with open(psl_overrides_file_path) as f: + for line in f.readlines(): + domain = line.lower().strip() + if domain in psl_overrides: + logger.warning( + f"{domain} is in {psl_overrides_file_path} \ + multiple times" + ) + else: + psl_overrides.append(domain) logger.info(f"Loading {base_reverse_dns_map_file_path}") known_domains = [] with open(base_reverse_dns_map_file_path) as f: @@ -52,13 +66,20 @@ def _main(): if domain in known_unknown_domains and known_domains: pass logger.warning( - f"{domain} is in {known_unknown_list_file_path} and {base_reverse_dns_map_file_path}" + f"{domain} is in {known_unknown_list_file_path} and \ + {base_reverse_dns_map_file_path}" ) logger.info(f"Checking domains against {base_reverse_dns_map_file_path}") with open(input_csv_file_path) as f: for row in csv.DictReader(f): domain = row["source_name"].lower().strip() + if domain == "": + continue + for psl_domain in psl_overrides: + if domain.endswith(psl_domain): + domain = psl_domain + break if domain not in known_domains and domain not in known_unknown_domains: logger.info(f"New unknown domain found: {domain}") output_rows.append(row) diff --git a/parsedmarc/resources/maps/known_unknown_base_reverse_dns.txt b/parsedmarc/resources/maps/known_unknown_base_reverse_dns.txt index 8e9a6f9..5dc0180 100644 --- a/parsedmarc/resources/maps/known_unknown_base_reverse_dns.txt +++ b/parsedmarc/resources/maps/known_unknown_base_reverse_dns.txt @@ -123,3 +123,22 @@ xsfati.us.com xspmail.jp zerowebhosting.net znlc.jp +cavabeen.com +llsend.com +blguss.com +itsidc.com +anviklass.org +a7e.ru +antis.edu +ctla.co.kr +ip-147-135-108.us +cloudaccess.net +netkl.org +bluenet.ch +i-mecca.net +emailgids.net +jimishare.com +anglishment.com +ports.net +rapidns.com +a94434500-blog.com diff --git a/parsedmarc/resources/maps/psl_overrides.txt b/parsedmarc/resources/maps/psl_overrides.txt new file mode 100644 index 0000000..45789fa --- /dev/null +++ b/parsedmarc/resources/maps/psl_overrides.txt @@ -0,0 +1,6 @@ +amazonaws.com +cloudaccess.net +linode.comn +h-serv.co.uk +plesk.pages +akura.ne.jp diff --git a/sort.sh b/sort.sh new file mode 100755 index 0000000..b8b92d4 --- /dev/null +++ b/sort.sh @@ -0,0 +1,5 @@ +#!/usr/bin/env bash + +sort -o "parsedmarc/resources/maps/known_unknown_base_reverse_dns.txt" "parsedmarc/resources/maps/known_unknown_base_reverse_dns.txt" +sort -o "parsedmarc/resources/maps/public_suffix_overrides.txt" "parsedmarc/resources/maps/public_suffix_overrides.txt" +./sortmaps.py diff --git a/sortmaps.py b/sortmaps.py index 54a3e3d..4653f64 100755 --- a/sortmaps.py +++ b/sortmaps.py @@ -1,12 +1,11 @@ #!/usr/bin/env python3 import os -import glob import csv - maps_dir = os.path.join("parsedmarc", "resources", "maps") -csv_files = glob.glob(os.path.join(maps_dir, "*.csv")) +map_files = ["base_reverse_dns_map.csv"] +list_files = ["known_unknown_base_reverse_dns.txt", "psl_overrides.txt"] def sort_csv(filepath, column=0): @@ -14,12 +13,40 @@ def sort_csv(filepath, column=0): reader = csv.reader(infile) header = next(reader) sorted_rows = sorted(reader, key=lambda row: row[column]) + existing_values = [] + for row in sorted_rows: + if row[column] in existing_values: + print(f"Warning: {row[column]} is in {filepath} multiple times") with open(filepath, mode="w", newline="\n") as outfile: writer = csv.writer(outfile) writer.writerow(header) writer.writerows(sorted_rows) +def sort_list_file(filepath, lowercase=True, strip=True, deduplicate=True, + remove_blank_lines=True, ending_newline=True, newline="\n"): + with open(filepath, mode="r", newline=newline) as infile: + lines = infile.readlines() + for i in range(len(lines)): + if lowercase: + lines[i] = lines[i].lower() + if strip: + lines[i] = lines[i].strip() + if deduplicate: + lines = list(set(lines)) + if remove_blank_lines: + while "" in lines: + lines.remove("") + lines = sorted(lines) + if ending_newline: + if lines[-1] != newline: + lines.append(newline) + with open(filepath, mode="w", newline=newline) as outfile: + outfile.write("\n".join(newline)) + outfile.write(newline) -for csv_file in csv_files: - sort_csv(csv_file) + +for csv_file in map_files: + sort_csv(os.path.join(maps_dir, csv_file)) +for list_file in list_files: + sort_list_file( os.path.join(maps_dir, list_file))