mirror of
https://github.com/domainaware/parsedmarc.git
synced 2026-03-19 04:55:58 +00:00
Update lists
This commit is contained in:
1
.vscode/settings.json
vendored
1
.vscode/settings.json
vendored
@@ -45,6 +45,7 @@
|
||||
"htpasswd",
|
||||
"httpasswd",
|
||||
"IMAP",
|
||||
"infile",
|
||||
"Interaktive",
|
||||
"IPDB",
|
||||
"journalctl",
|
||||
|
||||
4
build.sh
4
build.sh
@@ -17,9 +17,7 @@ touch build/html/.nojekyll
|
||||
if [ -d "./../parsedmarc-docs" ]; then
|
||||
cp -rf build/html/* ../../parsedmarc-docs/
|
||||
fi
|
||||
cd ..
|
||||
sort -o "parsedmarc/resources/maps/known_unknown_base_reverse_dns.txt" "parsedmarc/resources/maps/known_unknown_base_reverse_dns.txt"
|
||||
./sortmaps.py
|
||||
sort.sh
|
||||
python3 tests.py
|
||||
rm -rf dist/ build/
|
||||
hatch build
|
||||
@@ -84,6 +84,8 @@ asahikawa-med.ac.jp,Asahikawa Med,Healthcare
|
||||
ashtelstudios.com,Ashtel Studios,Healthcare
|
||||
asiaitserver.com, Asia IT Solution Co.,MSP
|
||||
askbis.com,BIS,Web Host
|
||||
asl2.liguria.it,ASL2,Healthcare
|
||||
asmecal.it,ASMENET CALABRIA,Government
|
||||
assaytechnology.com,Assay Technology,Industrial
|
||||
assentportal.com,Assent,SaaS
|
||||
atmailcloud.com,atmail,Email Provider
|
||||
@@ -92,6 +94,7 @@ atw.ne.jp,ATW,Web Host
|
||||
au.com,au,ISP
|
||||
auone-net.jp,KDDI,ISP
|
||||
aussiebb.com.au,Aussie Broadband,ISP
|
||||
autotask.net,Kaseya,SaaS
|
||||
averypartners.net,Avery Partners,Healthcare
|
||||
avis.ne.jp,Densan Avis,ISP
|
||||
aviso.ci,Orange,ISP
|
||||
@@ -161,8 +164,8 @@ calpoly.edu,Cal Poly,Education
|
||||
calvin.edu,Calvin University,Education
|
||||
canonet.ne.jp,Canonet,MSP
|
||||
cardhealth.com,Cardinal Health,Healthcare
|
||||
cardinalhealth.com,Cardinal Health,Healthcare
|
||||
cardinal.com,Cardinal Health,Healthcare
|
||||
cardinalhealth.com,Cardinal Health,Healthcare
|
||||
carecentrix.com,CareCentrix,Healthcare
|
||||
carrierzone.com,carrierzone,Email Security
|
||||
carsforkids.org,Cars For Kids,Nonprofit
|
||||
@@ -176,6 +179,7 @@ centerasecurity.com,Centera Email Defence,Email Security
|
||||
centerasecurity.dk,Centera Email Defence,Email Security
|
||||
centralinteractiva.com.mx,Central Interactiva,Marketing
|
||||
centurylink.com.pe,Cirion,MSP
|
||||
chaiyohosting.com,Chaiy oHosting,Web Host
|
||||
charter.net,Charter,ISP
|
||||
chiba-u.jp,Chiba University,Education
|
||||
chikamori.com,Chikamori Health Care Group,Healthcare
|
||||
@@ -274,6 +278,7 @@ default-host.net,INHOSTED LP,Web Host
|
||||
delhitel.net,Delhi Telephone Company,ISP
|
||||
deskwing.net,DESKWING,Email Provider
|
||||
dexanet.co.id,Dexanet,ISP
|
||||
dfn.nl,DELTA Fiber,ISP
|
||||
dgsys.es,DGsys,Web Host
|
||||
dhl.com,DHL,logistics
|
||||
diakovere.de,DIAKOVERE,Healthcare
|
||||
@@ -328,6 +333,7 @@ ecm8.com,Campaign Master (UK),Marketing
|
||||
edgepark.com,Edgepark,Healthcare
|
||||
edu.com,InstructionalAssistant,SaaS
|
||||
eduneering.com,UL EHS training,SaaS
|
||||
egress.cloud,Egress Software,Email Security
|
||||
egs-seg.gc.ca,Canada Government Electronic Directory Services (GEDS),Government
|
||||
ehafconsulting.org,EHAF Consulting Engineers,Construction
|
||||
ehime-u.ac.jp,Ehime University,Education
|
||||
@@ -340,9 +346,10 @@ elcom.ru,Rostelcom,ISP
|
||||
electric.net,VIPRE,Email Security
|
||||
elevatedstudios.tech,Elevated Studios,Web Host
|
||||
eleven.mx,E;rven Marketing Labs,Marketing
|
||||
elink.net,Earthlink,ISP
|
||||
elive.net,Elive,Web Host
|
||||
ellipse.net,Ellipse,MSP
|
||||
elmecnet.net,Elmec Informatica,MSP
|
||||
elnk.net,Earthlink,ISP
|
||||
email-od.com,SocketLabs,SaaS
|
||||
emailarray.com,Emailarray,Email Provider
|
||||
emailowl.com,NameSilo,Web Host
|
||||
@@ -458,6 +465,7 @@ gsbridge.com,Golden State Bridge,Industrial
|
||||
gtnexus.com,Infor Nexus (Formerly GT Nexus),SaaS
|
||||
gts.sk,GTS Slovakia,ISP
|
||||
guardedhost.com,Omnis Network,Web Host
|
||||
gva.es,Generalitat Valenciana,Government
|
||||
gwu.edu,The George Washington University,Education
|
||||
h-isac.org,H-ISAC,Healthcare
|
||||
hammacher.com,Hammacher Schlemmer,Retail
|
||||
@@ -654,6 +662,7 @@ lghealth.org,Penn Medicine Lancaster General Health,Healthcare
|
||||
lindsaymunicipalhospital.com,Lindsay Municipal Hospital,Healthcare
|
||||
link3.net,Link3 Technologies,ISP
|
||||
linkedin.com,LinkedIn,Social Media
|
||||
linode.com,Linode,Web Host
|
||||
live-servers.net,Fasthosts Internet Ltd,Web Host
|
||||
livedo.jp,Livedo Corporation,Healthcare
|
||||
llumc.edu,Loma Linda University Medical Center,Healthcare
|
||||
@@ -731,6 +740,7 @@ menlosecurity.com,Menlo Security,Email Security
|
||||
mercranet.com,Mercranet,Web Host
|
||||
mercurygate.net,MercuryGate,SaaS
|
||||
meric.net.tr,Meriç Hosting,Web Host
|
||||
merula.net,Merula,ISP
|
||||
messagelabs.com,Symantec Email Security,Email Security
|
||||
messagingengine.com,Fastmail,Email Provider
|
||||
mesvr.com,ReadNotify,Email Provider
|
||||
@@ -740,6 +750,7 @@ mho.de,Marienhospital Osnabrück,Healthcare
|
||||
mhos.de,Marienhospital Osnabrück,Healthcare
|
||||
mia.net,HostDrive,Web Host
|
||||
miamioh.edu,Miami University,Education
|
||||
microfocus-japan.com,Micro Focus,MSP
|
||||
microsoft.com,Microsoft,Technology
|
||||
middlesex.ca,"Middlesex County, Canada",Government
|
||||
midi-loisirs.com,Midi Loisirs,Entertainment
|
||||
@@ -803,6 +814,7 @@ nagoya-cu.ac.jp,Nagoya City University,Education
|
||||
nagoya-u.ac.jp,Nagoya University,Education
|
||||
nahealth.com,Northern Arizona Healthcare,Healthcare
|
||||
name.com,Name.com,Web Host
|
||||
nameserver.sk,Webglobe,Web Host
|
||||
namespro.ca,Namespro,Web Host
|
||||
nano.uz,Nano Telecom,ISP
|
||||
nascoeducation.com,Nasco EDucation,Education
|
||||
@@ -826,9 +838,11 @@ netrevolution.com,NetRevolution,ISP
|
||||
netroad.ru,Mobile TeleSystems,ISP
|
||||
netsolus.com,netsolus KC,MSP
|
||||
netsville.com,Netsville,Marketing
|
||||
nettlinx.com,Nelinx,ISP
|
||||
network80.com,Network80,Web Host
|
||||
neuca.pl,NECUA Group,Healthcare
|
||||
newpages.com.my,NEWPAGES,Retail
|
||||
newsmanapp.com,NewsMAN,Marketing
|
||||
newsunseo.com,NewSunSEO,Marketing
|
||||
newtekwebhosting.com,Newtek Technology Solutions,Web Host
|
||||
nexcess.net,Nexcess,Web Host
|
||||
@@ -976,6 +990,7 @@ prw.net,Puerto Rico Webmasters,Web Host
|
||||
pserver.space,Profitserver,Web Host
|
||||
psychz.net,Psychz Networks,Web Host
|
||||
ptd.net,PTD,ISP
|
||||
ptrcloud.net,GMO GlobalSign,IaaS
|
||||
pubnix.net,PubNIX,Web Host
|
||||
puc-rio.br,PUC Rio,Education
|
||||
pucminas.br,PUC Minas,Education
|
||||
@@ -1100,6 +1115,7 @@ signium.co.jp,Signium,Consulting
|
||||
siho.org,Siho Insurance Services,Finance
|
||||
simpro.com.br,Simpro,Healthcare
|
||||
simus.uz,Simus,ISP
|
||||
sinergit.com.do,Sinergit,MSP
|
||||
siriustelecom.uz,Sirius Telecom,ISP
|
||||
siteprotect.com,SiteMail,Email Provider
|
||||
sixinternet.com.br,Six Internet,ISP
|
||||
@@ -1220,6 +1236,7 @@ tktelekom.pl,TK Telecom,Healthcare
|
||||
tm.net,Mercury Telecom,ISP
|
||||
tmc.edu,Truett McConnell University,Education
|
||||
tmcz.cz,T-Mobile,ISP
|
||||
tmd.ac.jp,Science Tokyo,Education
|
||||
tmddedicated.com,TMDHosting,Web Host
|
||||
tmkultra.net.br,Tmk Net,ISP
|
||||
tnc-neuro.com,Tallahassee Neurological Clinic,Healthcare
|
||||
@@ -1340,6 +1357,7 @@ web-dns1.com,Web Hosting Canada,Web Host
|
||||
web-hosting.com,Namecheap,Web Host
|
||||
web.africa,Webafrica,ISP
|
||||
webetic.net,Webetic,Web Host
|
||||
webglobe.com,Webglobe,Web Host
|
||||
webhostingireland.ie,Hosting Ireland,Web Host
|
||||
webmasters.com,Webmasters.com,Web Host
|
||||
webnames.ca,Webnames.ca,Web Host
|
||||
@@ -1380,6 +1398,7 @@ xinet.com.mx,Xinet,MSP
|
||||
xipline.com,Ritter Communications,ISP
|
||||
xmission.com,XMission,SaaS
|
||||
xmr3.com,OpenText,SaaS
|
||||
xnet.mx,Xnet,MSP
|
||||
xrea.com,XREA,Web Host
|
||||
xserver.jp,Xserver,Web Host
|
||||
yadtel.net,Zirrus,ISP
|
||||
@@ -1398,6 +1417,8 @@ yuuai.or.jp,Social Medical Corporation Yuuaikai,Healthcare
|
||||
z.com,Z.com,Web Host
|
||||
zaansmc.nl,Zaans Medical Center,Healthcare
|
||||
zare.com,Zare,Web Host
|
||||
zcmail.net,Zoho Campaigns,Marketing
|
||||
zcsend.net,Zoho Campaigns,Marketing
|
||||
zdsys.com,Zendesk,SaaS
|
||||
zedality.com,Zedality,Web Host
|
||||
zirrus.com,Xirrus.com,ISP
|
||||
@@ -1414,3 +1435,10 @@ zsttk.ru,TTK,ISP
|
||||
zyner.net,Zyner,Email Provider
|
||||
zyner.one,Zyner,Email Provider
|
||||
zyner.org,Zyner,Email Provider
|
||||
akura.ne.jp,Akura,Logistics
|
||||
assp.org,American Society of Safety Professionals,Healthcare
|
||||
complemar.com,Complemar,Logistics
|
||||
victorkaiser.com,Global Transport,Logistics
|
||||
bnpparibas.fr,Banque BNP Paribas,Finance
|
||||
clarix.com,Clarix,MSP
|
||||
collectivhosting.com,Collectiv,Web Host
|
||||
|
||||
|
@@ -9,6 +9,7 @@ def _main():
|
||||
input_csv_file_path = "base_reverse_dns.csv"
|
||||
base_reverse_dns_map_file_path = "base_reverse_dns_map.csv"
|
||||
known_unknown_list_file_path = "known_unknown_base_reverse_dns.txt"
|
||||
psl_overrides_file_path = "psl_overrides.txt"
|
||||
output_csv_file_path = "unknown_base_reverse_dns.csv"
|
||||
|
||||
csv_headers = ["source_name", "message_count"]
|
||||
@@ -23,6 +24,7 @@ def _main():
|
||||
input_csv_file_path,
|
||||
base_reverse_dns_map_file_path,
|
||||
known_unknown_list_file_path,
|
||||
psl_overrides_file_path,
|
||||
]:
|
||||
if not os.path.exists(p):
|
||||
logger.error(f"{p} does not exist")
|
||||
@@ -38,6 +40,18 @@ def _main():
|
||||
)
|
||||
else:
|
||||
known_unknown_domains.append(domain)
|
||||
logger.info(f"Loading {psl_overrides_file_path}")
|
||||
psl_overrides = []
|
||||
with open(psl_overrides_file_path) as f:
|
||||
for line in f.readlines():
|
||||
domain = line.lower().strip()
|
||||
if domain in psl_overrides:
|
||||
logger.warning(
|
||||
f"{domain} is in {psl_overrides_file_path} \
|
||||
multiple times"
|
||||
)
|
||||
else:
|
||||
psl_overrides.append(domain)
|
||||
logger.info(f"Loading {base_reverse_dns_map_file_path}")
|
||||
known_domains = []
|
||||
with open(base_reverse_dns_map_file_path) as f:
|
||||
@@ -52,13 +66,20 @@ def _main():
|
||||
if domain in known_unknown_domains and known_domains:
|
||||
pass
|
||||
logger.warning(
|
||||
f"{domain} is in {known_unknown_list_file_path} and {base_reverse_dns_map_file_path}"
|
||||
f"{domain} is in {known_unknown_list_file_path} and \
|
||||
{base_reverse_dns_map_file_path}"
|
||||
)
|
||||
|
||||
logger.info(f"Checking domains against {base_reverse_dns_map_file_path}")
|
||||
with open(input_csv_file_path) as f:
|
||||
for row in csv.DictReader(f):
|
||||
domain = row["source_name"].lower().strip()
|
||||
if domain == "":
|
||||
continue
|
||||
for psl_domain in psl_overrides:
|
||||
if domain.endswith(psl_domain):
|
||||
domain = psl_domain
|
||||
break
|
||||
if domain not in known_domains and domain not in known_unknown_domains:
|
||||
logger.info(f"New unknown domain found: {domain}")
|
||||
output_rows.append(row)
|
||||
|
||||
@@ -123,3 +123,22 @@ xsfati.us.com
|
||||
xspmail.jp
|
||||
zerowebhosting.net
|
||||
znlc.jp
|
||||
cavabeen.com
|
||||
llsend.com
|
||||
blguss.com
|
||||
itsidc.com
|
||||
anviklass.org
|
||||
a7e.ru
|
||||
antis.edu
|
||||
ctla.co.kr
|
||||
ip-147-135-108.us
|
||||
cloudaccess.net
|
||||
netkl.org
|
||||
bluenet.ch
|
||||
i-mecca.net
|
||||
emailgids.net
|
||||
jimishare.com
|
||||
anglishment.com
|
||||
ports.net
|
||||
rapidns.com
|
||||
a94434500-blog.com
|
||||
|
||||
6
parsedmarc/resources/maps/psl_overrides.txt
Normal file
6
parsedmarc/resources/maps/psl_overrides.txt
Normal file
@@ -0,0 +1,6 @@
|
||||
amazonaws.com
|
||||
cloudaccess.net
|
||||
linode.comn
|
||||
h-serv.co.uk
|
||||
plesk.pages
|
||||
akura.ne.jp
|
||||
5
sort.sh
Executable file
5
sort.sh
Executable file
@@ -0,0 +1,5 @@
|
||||
#!/usr/bin/env bash
|
||||
|
||||
sort -o "parsedmarc/resources/maps/known_unknown_base_reverse_dns.txt" "parsedmarc/resources/maps/known_unknown_base_reverse_dns.txt"
|
||||
sort -o "parsedmarc/resources/maps/public_suffix_overrides.txt" "parsedmarc/resources/maps/public_suffix_overrides.txt"
|
||||
./sortmaps.py
|
||||
37
sortmaps.py
37
sortmaps.py
@@ -1,12 +1,11 @@
|
||||
#!/usr/bin/env python3
|
||||
|
||||
import os
|
||||
import glob
|
||||
import csv
|
||||
|
||||
|
||||
maps_dir = os.path.join("parsedmarc", "resources", "maps")
|
||||
csv_files = glob.glob(os.path.join(maps_dir, "*.csv"))
|
||||
map_files = ["base_reverse_dns_map.csv"]
|
||||
list_files = ["known_unknown_base_reverse_dns.txt", "psl_overrides.txt"]
|
||||
|
||||
|
||||
def sort_csv(filepath, column=0):
|
||||
@@ -14,12 +13,40 @@ def sort_csv(filepath, column=0):
|
||||
reader = csv.reader(infile)
|
||||
header = next(reader)
|
||||
sorted_rows = sorted(reader, key=lambda row: row[column])
|
||||
existing_values = []
|
||||
for row in sorted_rows:
|
||||
if row[column] in existing_values:
|
||||
print(f"Warning: {row[column]} is in {filepath} multiple times")
|
||||
|
||||
with open(filepath, mode="w", newline="\n") as outfile:
|
||||
writer = csv.writer(outfile)
|
||||
writer.writerow(header)
|
||||
writer.writerows(sorted_rows)
|
||||
|
||||
def sort_list_file(filepath, lowercase=True, strip=True, deduplicate=True,
|
||||
remove_blank_lines=True, ending_newline=True, newline="\n"):
|
||||
with open(filepath, mode="r", newline=newline) as infile:
|
||||
lines = infile.readlines()
|
||||
for i in range(len(lines)):
|
||||
if lowercase:
|
||||
lines[i] = lines[i].lower()
|
||||
if strip:
|
||||
lines[i] = lines[i].strip()
|
||||
if deduplicate:
|
||||
lines = list(set(lines))
|
||||
if remove_blank_lines:
|
||||
while "" in lines:
|
||||
lines.remove("")
|
||||
lines = sorted(lines)
|
||||
if ending_newline:
|
||||
if lines[-1] != newline:
|
||||
lines.append(newline)
|
||||
with open(filepath, mode="w", newline=newline) as outfile:
|
||||
outfile.write("\n".join(newline))
|
||||
outfile.write(newline)
|
||||
|
||||
for csv_file in csv_files:
|
||||
sort_csv(csv_file)
|
||||
|
||||
for csv_file in map_files:
|
||||
sort_csv(os.path.join(maps_dir, csv_file))
|
||||
for list_file in list_files:
|
||||
sort_list_file( os.path.join(maps_dir, list_file))
|
||||
|
||||
Reference in New Issue
Block a user