mirror of
https://github.com/domainaware/parsedmarc.git
synced 2026-04-03 12:18:54 +00:00
Improve list verification
This commit is contained in:
2
.gitignore
vendored
2
.gitignore
vendored
@@ -106,7 +106,7 @@ ENV/
|
||||
.idea/
|
||||
|
||||
# VS Code launch config
|
||||
.vscode/launch.json
|
||||
#.vscode/launch.json
|
||||
|
||||
# Visual Studio Code settings
|
||||
#.vscode/
|
||||
|
||||
31
.vscode/launch.json
vendored
Normal file
31
.vscode/launch.json
vendored
Normal file
@@ -0,0 +1,31 @@
|
||||
{
|
||||
// Use IntelliSense to learn about possible attributes.
|
||||
// Hover to view descriptions of existing attributes.
|
||||
// For more information, visit: https://go.microsoft.com/fwlink/?linkid=830387
|
||||
"version": "0.2.0",
|
||||
"configurations": [
|
||||
{
|
||||
"name": "Python Debugger: Current File",
|
||||
"type": "debugpy",
|
||||
"request": "launch",
|
||||
"program": "${file}",
|
||||
"console": "integratedTerminal"
|
||||
},
|
||||
{
|
||||
"name": "sortlists.py",
|
||||
"type": "debugpy",
|
||||
"request": "launch",
|
||||
"program": "sortlists.py",
|
||||
"cwd": "${workspaceFolder}/parsedmarc/resources/maps",
|
||||
"console": "integratedTerminal"
|
||||
},
|
||||
{
|
||||
"name": "find_unknown_base_reverse_dns.py",
|
||||
"type": "debugpy",
|
||||
"request": "launch",
|
||||
"program": "find_unknown_base_reverse_dns.py",
|
||||
"cwd": "${workspaceFolder}/parsedmarc/resources/maps",
|
||||
"console": "integratedTerminal"
|
||||
}
|
||||
]
|
||||
}
|
||||
1
.vscode/settings.json
vendored
1
.vscode/settings.json
vendored
@@ -109,6 +109,7 @@
|
||||
"setuptools",
|
||||
"smartquotes",
|
||||
"SMTPTLS",
|
||||
"sortlists",
|
||||
"sortmaps",
|
||||
"sourcetype",
|
||||
"STARTTLS",
|
||||
|
||||
2
build.sh
2
build.sh
@@ -19,7 +19,7 @@ if [ -d "./../parsedmarc-docs" ]; then
|
||||
fi
|
||||
cd ..
|
||||
cd parsedmarc/resources/maps
|
||||
python3 sortmaps.py
|
||||
python3 sortlists.py
|
||||
echo "Checking for invalid UTF-8 bytes in base_reverse_dns_map.csv"
|
||||
python3 find_bad_utf8.py base_reverse_dns_map.csv
|
||||
cd ../../..
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
44
parsedmarc/resources/maps/base_reverse_dns_types.txt
Normal file
44
parsedmarc/resources/maps/base_reverse_dns_types.txt
Normal file
@@ -0,0 +1,44 @@
|
||||
Agriculture
|
||||
Automotive
|
||||
Beauty
|
||||
Conglomerate
|
||||
Construction
|
||||
Consulting
|
||||
Defense
|
||||
Education
|
||||
Email Provider
|
||||
Email Security
|
||||
Entertainment
|
||||
Event Planning
|
||||
Finance
|
||||
Food
|
||||
Government
|
||||
Government Media
|
||||
Healthcare
|
||||
ISP
|
||||
IaaS
|
||||
Industrial
|
||||
Legal
|
||||
Logistics
|
||||
MSP
|
||||
MSSP
|
||||
Manufacturing
|
||||
Marketing
|
||||
News
|
||||
Nonprofit
|
||||
PaaS
|
||||
Photography
|
||||
Physical Security
|
||||
Print
|
||||
Publishing
|
||||
Real Estate
|
||||
Retail
|
||||
SaaS
|
||||
Science
|
||||
Search Engine
|
||||
Social Media
|
||||
Sports
|
||||
Staffing
|
||||
Technology
|
||||
Travel
|
||||
Web Host
|
||||
@@ -15,22 +15,14 @@ def _main():
|
||||
|
||||
output_rows = []
|
||||
|
||||
for p in [
|
||||
input_csv_file_path,
|
||||
base_reverse_dns_map_file_path,
|
||||
known_unknown_list_file_path,
|
||||
psl_overrides_file_path,
|
||||
]:
|
||||
if not os.path.exists(p):
|
||||
print(f"Error: {p} does not exist")
|
||||
exit(1)
|
||||
|
||||
known_unknown_domains = []
|
||||
psl_overrides = []
|
||||
known_domains = []
|
||||
output_rows = []
|
||||
|
||||
def load_list(file_path, list_var):
|
||||
if not os.path.exists(file_path):
|
||||
print(f"Error: {file_path} does not exist")
|
||||
print(f"Loading {file_path}")
|
||||
list_var = []
|
||||
with open(file_path) as f:
|
||||
@@ -44,9 +36,11 @@ def _main():
|
||||
|
||||
load_list(known_unknown_list_file_path, known_unknown_domains)
|
||||
load_list(psl_overrides_file_path, psl_overrides)
|
||||
|
||||
print(f"Checking domains against {base_reverse_dns_map_file_path}")
|
||||
if not os.path.exists(input_csv_file_path):
|
||||
print(f"Error: {input_csv_file_path} does not exist")
|
||||
exit(1)
|
||||
with open(input_csv_file_path) as f:
|
||||
print(f"Checking domains against {base_reverse_dns_map_file_path}")
|
||||
for row in csv.DictReader(f):
|
||||
domain = row["source_name"].lower().strip()
|
||||
if domain == "":
|
||||
|
||||
@@ -1,10 +1,12 @@
|
||||
185.in-addr.arpa
|
||||
190.in-addr.arpa
|
||||
200.in-addr.arpa
|
||||
444qcuhilla.com
|
||||
9services.com
|
||||
a7e.ru
|
||||
a94434500-blog.com
|
||||
abv-10.top
|
||||
adcritic.net
|
||||
adlucrumnewsletter.com
|
||||
admin.corpivensa.gob.ve
|
||||
advantageiq.com
|
||||
@@ -15,6 +17,9 @@ aghories.com
|
||||
ai270.net
|
||||
albagroup-eg.com
|
||||
alchemy.net
|
||||
alohabeachcamp.net
|
||||
aluminumpipetubing.com
|
||||
americanstorageca.com
|
||||
anchorfundhub.com
|
||||
anglishment.com
|
||||
anteldata.net.uy
|
||||
@@ -32,6 +37,7 @@ aurelienvos.com
|
||||
automatech.lat
|
||||
avistaadvantage.com
|
||||
b8sales.com
|
||||
banaras.co
|
||||
bearandbullmarketnews.com
|
||||
bestinvestingtime.com
|
||||
biocorp.com
|
||||
@@ -42,14 +48,17 @@ bluhosting.com
|
||||
bodiax.pp.ua
|
||||
bost-law.com
|
||||
brainity.com
|
||||
brazalnde.net
|
||||
brnonet.cz
|
||||
brushinglegal.de
|
||||
brw.net
|
||||
budgeteasehub.com
|
||||
buoytoys.com
|
||||
c53dw7m24rj.com
|
||||
cashflowmasterypro.com
|
||||
cavabeen.com
|
||||
cbti.net
|
||||
chauffeurplan.co.uk
|
||||
checkpox.fun
|
||||
chegouseuvlache.org
|
||||
christus.mx
|
||||
@@ -63,12 +72,16 @@ cnode.io
|
||||
code-it.net
|
||||
colombiaceropapel.org
|
||||
commerceinsurance.com
|
||||
comsharempc.com
|
||||
coolblaze.com
|
||||
coowo.com
|
||||
corpemail.net
|
||||
cp2-myorderbox.com
|
||||
cps.com.ar
|
||||
ctla.co.kr
|
||||
cumbalikonakhotel.com
|
||||
currencyexconverter.com
|
||||
daakbabu.com
|
||||
dastans.ru
|
||||
datahost36.de
|
||||
descarca-counter-strike.net
|
||||
@@ -78,6 +91,7 @@ dinofelis.cn
|
||||
diwkyncbi.top
|
||||
dkginternet.com
|
||||
dns-oid.com
|
||||
domainserver.ne.jp
|
||||
domconfig.com
|
||||
doorsrv.com
|
||||
dreampox.fun
|
||||
@@ -86,6 +100,7 @@ ds.network
|
||||
dvj.theworkpc.com
|
||||
dwlcka.com
|
||||
dyntcorp.com
|
||||
easternkingspei.com
|
||||
economiceagles.com
|
||||
egosimail.com
|
||||
emailgids.net
|
||||
@@ -96,7 +111,9 @@ erestaff.com
|
||||
example.com
|
||||
exposervers.com-new
|
||||
eyecandyhosting.xyz
|
||||
feipnghardware.com
|
||||
fetscorp.shop
|
||||
fewo-usedom.net
|
||||
fin-crime.com
|
||||
financeaimpoint.com
|
||||
financeupward.com
|
||||
@@ -104,19 +121,27 @@ flex-video.bnr.la
|
||||
formicidaehunt.net
|
||||
fosterheap.com
|
||||
frontiernet.net
|
||||
ftifb7tk3c.com
|
||||
gendns.com
|
||||
getgreencardsfast.com
|
||||
getthatroi.com
|
||||
gigidea.net
|
||||
giize.com
|
||||
ginous.eu.com
|
||||
gist-th.com
|
||||
goldsboroughplace.com
|
||||
gophermedia.com
|
||||
gqlists.us.com
|
||||
gratzl.de
|
||||
greatestworldnews.com
|
||||
greennutritioncare.com
|
||||
h-serv.co.uk
|
||||
haedefpartners.com
|
||||
halcyon-aboveboard.com
|
||||
hanzubon.org
|
||||
hgnbroken.us.com
|
||||
hopsinthehanger.com
|
||||
hostelsucre.com
|
||||
hosting1337.com
|
||||
hostinglotus.cloud
|
||||
hostingmichigan.com
|
||||
@@ -137,20 +162,25 @@ idealconcept.live
|
||||
igppevents.org.uk
|
||||
imjtmn.cn
|
||||
immenzaces.com
|
||||
indulgent-holistic.com
|
||||
inshaaegypt.com
|
||||
ip-147-135-108.us
|
||||
ip-178-33-109.eu
|
||||
ip-ptr.tech
|
||||
iswhatpercent.com
|
||||
itsidc.com
|
||||
itwebs.com
|
||||
ivol.co
|
||||
jalanet.co.id
|
||||
jimishare.com
|
||||
jlenterprises.co.uk
|
||||
joyomokei.com
|
||||
jumanra.org
|
||||
kahlaa.com
|
||||
kbronet.com.tw
|
||||
kdnursing.org
|
||||
kihy.theworkpc.com
|
||||
kingschurchwirral.org
|
||||
kitchenaildbd.com
|
||||
layerdns.cloud
|
||||
legenditds.com
|
||||
@@ -159,6 +189,7 @@ listertermoformadoa.com
|
||||
llsend.com
|
||||
lohkal.com
|
||||
lonestarmm.net
|
||||
longmarquis.com
|
||||
longwoodmgmt.com
|
||||
lwl-puehringer.at
|
||||
lynx.net.lb
|
||||
@@ -173,7 +204,10 @@ matroguel.cam
|
||||
maximpactipo.com
|
||||
mechanicalwalk.store
|
||||
mediavobis.com
|
||||
mindworksunlimited.com
|
||||
mirth-gale.com
|
||||
misorpresa.com
|
||||
mjinn.com
|
||||
moderntradingnews.com
|
||||
moonjaws.com
|
||||
morningnewscatcher.com
|
||||
@@ -182,8 +216,10 @@ mschosting.com
|
||||
msdp1.com
|
||||
mspnet.pro
|
||||
mts-nn.ru
|
||||
multifamilydesign.com
|
||||
mxserver.ro
|
||||
mxthunder.net
|
||||
my-ihor.ru
|
||||
myrewards.net
|
||||
mysagestore.com
|
||||
mysecurewebserver.com
|
||||
@@ -202,67 +238,84 @@ newwallstreetcode.com
|
||||
ngvcv.cn
|
||||
nic.name
|
||||
nidix.net
|
||||
nieuwedagnetwerk.net
|
||||
nlscanme.com
|
||||
nmeuh.cn
|
||||
noisndametal.com
|
||||
nwo.giize.com
|
||||
nwwhalewatchers.org
|
||||
offerslatedeals.com
|
||||
office365.us
|
||||
ogicom.net
|
||||
olivettilexikon.co.uk
|
||||
omegabrasil.inf.br
|
||||
onnet21.com
|
||||
oppt-ac.fit
|
||||
orbitel.net.co
|
||||
outsidences.com
|
||||
ovaltinalization.co
|
||||
overta.ru
|
||||
ox28vgrurc.com
|
||||
panaltyspot.space
|
||||
passionatesmiles.com
|
||||
paulinelam.com
|
||||
perimetercenter.net
|
||||
permanentscreen.com
|
||||
phdns3.es
|
||||
pigelixval1.com
|
||||
planethoster.net
|
||||
plesk.page
|
||||
pmnhost.net
|
||||
pokiloandhu.net
|
||||
pokupki5.ru
|
||||
popiup.com
|
||||
ports.net
|
||||
prima.com.ar
|
||||
prima.net.ar
|
||||
profsol.co.uk
|
||||
prohealthmotion.com
|
||||
proudserver.com
|
||||
psnm.ru
|
||||
pvcwindowsprices.live
|
||||
qontenciplc.autos
|
||||
quatthonggiotico.com
|
||||
qxyxab44njd.com
|
||||
rapidns.com
|
||||
raxa.host
|
||||
reliablepanel.com
|
||||
rgb365.eu
|
||||
riddlecamera.net
|
||||
riddletrends.com
|
||||
runnin-rebels.com
|
||||
rwdhosting.ca
|
||||
s500host.com
|
||||
sahacker-2020.com
|
||||
samsales.site
|
||||
saransk.ru
|
||||
satirogluet.com
|
||||
scioncontacts.com
|
||||
seaspraymta3.net
|
||||
secorp.mx
|
||||
securen.net
|
||||
securerelay.in
|
||||
securev.net
|
||||
servershost.biz
|
||||
shopfox.ca
|
||||
silvestrejaguar.sbs
|
||||
silvestreonca.sbs
|
||||
simplediagnostics.org
|
||||
siriuscloud.jp
|
||||
sisglobalresearch.com
|
||||
smallvillages.com
|
||||
smartape-vps.com
|
||||
solusoftware.com
|
||||
southcoastwebhosting12.com
|
||||
spiritualtechnologies.io
|
||||
sprout.org
|
||||
stableserver.net
|
||||
stockepictigers.com
|
||||
stockexchangejournal.com
|
||||
subterranean-concave.com
|
||||
suksangroup.com
|
||||
sysop4.com
|
||||
system.eu.com
|
||||
@@ -276,8 +329,10 @@ thaicloudsolutions.com
|
||||
thaimonster.com
|
||||
thepushcase.com
|
||||
totaal.net
|
||||
tqh.ro
|
||||
traderlearningcenter.com
|
||||
tullostrucking.com
|
||||
turbinetrends.com
|
||||
ultragate.com
|
||||
unite.services
|
||||
urawasl.com
|
||||
@@ -294,8 +349,10 @@ web-login.eu
|
||||
weblinkinternational.com
|
||||
webnox.io
|
||||
welllivinghive.com
|
||||
whoflew.com
|
||||
wisdomhard.com
|
||||
wisewealthcircle.com
|
||||
wodeniowa.com
|
||||
wsiph2.com
|
||||
xnt.mx
|
||||
xpnuf.cn
|
||||
|
||||
184
parsedmarc/resources/maps/sortlists.py
Executable file
184
parsedmarc/resources/maps/sortlists.py
Executable file
@@ -0,0 +1,184 @@
|
||||
#!/usr/bin/env python3
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import os
|
||||
import csv
|
||||
from pathlib import Path
|
||||
from typing import Mapping, Iterable, Optional, Collection, Union, List, Dict
|
||||
|
||||
|
||||
class CSVValidationError(Exception):
|
||||
def __init__(self, errors: list[str]):
|
||||
super().__init__("\n".join(errors))
|
||||
self.errors = errors
|
||||
|
||||
|
||||
def sort_csv(
|
||||
filepath: Union[str, Path],
|
||||
field: str,
|
||||
*,
|
||||
sort_field_value_must_be_unique: bool = True,
|
||||
strip_whitespace: bool = True,
|
||||
fields_to_lowercase: Optional[Iterable[str]] = None,
|
||||
case_insensitive_sort: bool = False,
|
||||
required_fields: Optional[Iterable[str]] = None,
|
||||
allowed_values: Optional[Mapping[str, Collection[str]]] = None,
|
||||
) -> List[Dict[str, str]]:
|
||||
"""
|
||||
Read a CSV, optionally normalize rows (strip whitespace, lowercase certain fields),
|
||||
validate field values, and write the sorted CSV back to the same path.
|
||||
|
||||
- filepath: Path to the CSV to sort.
|
||||
- field: The field name to sort by.
|
||||
- fields_to_lowercase: Permanently lowercases these field(s) in the data.
|
||||
- strip_whitespace: Remove all whitespace at the beginning and of field values.
|
||||
- case_insensitive_sort: Ignore case when sorting without changing values.
|
||||
- required_fields: A list of fields that must have data in all rows.
|
||||
- allowed_values: A mapping of allowed values for fields.
|
||||
"""
|
||||
path = Path(filepath)
|
||||
required_fields = set(required_fields or [])
|
||||
lower_set = set(fields_to_lowercase or [])
|
||||
allowed_sets = {k: set(v) for k, v in (allowed_values or {}).items()}
|
||||
if sort_field_value_must_be_unique:
|
||||
seen_sort_field_values = []
|
||||
|
||||
with path.open("r", newline="") as infile:
|
||||
reader = csv.DictReader(infile)
|
||||
fieldnames = reader.fieldnames or []
|
||||
if field not in fieldnames:
|
||||
raise CSVValidationError([f"Missing sort column: {field!r}"])
|
||||
missing_headers = required_fields - set(fieldnames)
|
||||
if missing_headers:
|
||||
raise CSVValidationError(
|
||||
[f"Missing required header(s): {sorted(missing_headers)}"]
|
||||
)
|
||||
rows = list(reader)
|
||||
|
||||
def normalize_row(row: Dict[str, str]) -> None:
|
||||
if strip_whitespace:
|
||||
for k, v in row.items():
|
||||
if isinstance(v, str):
|
||||
row[k] = v.strip()
|
||||
for fld in lower_set:
|
||||
if fld in row and isinstance(row[fld], str):
|
||||
row[fld] = row[fld].lower()
|
||||
|
||||
def validate_row(
|
||||
row: Dict[str, str], sort_field: str, line_no: int, errors: list[str]
|
||||
) -> None:
|
||||
if sort_field_value_must_be_unique:
|
||||
if row[sort_field] in seen_sort_field_values:
|
||||
errors.append(f"Line {line_no}: Duplicate row for '{sort_field}'")
|
||||
else:
|
||||
seen_sort_field_values.append(sort_field)
|
||||
for rf in required_fields:
|
||||
val = row.get(rf)
|
||||
if val is None or val == "":
|
||||
errors.append(
|
||||
f"Line {line_no}: Missing value for required field '{rf}'"
|
||||
)
|
||||
for field, allowed_values in allowed_sets.items():
|
||||
if field in row:
|
||||
val = row[field]
|
||||
if val not in allowed_values:
|
||||
errors.append(
|
||||
f"Line {line_no}: '{val}' is not an allowed value for '{field}' "
|
||||
f"(allowed: {sorted(allowed_values)})"
|
||||
)
|
||||
|
||||
errors: list[str] = []
|
||||
for idx, row in enumerate(rows, start=2): # header is line 1
|
||||
normalize_row(row)
|
||||
validate_row(row, field, idx, errors)
|
||||
|
||||
if errors:
|
||||
raise CSVValidationError(errors)
|
||||
|
||||
def sort_key(r: Dict[str, str]):
|
||||
v = r.get(field, "")
|
||||
if isinstance(v, str) and case_insensitive_sort:
|
||||
return v.casefold()
|
||||
return v
|
||||
|
||||
rows.sort(key=sort_key)
|
||||
|
||||
with path.open(filepath, "w", newline="") as outfile:
|
||||
writer = csv.DictWriter(outfile, fieldnames=fieldnames)
|
||||
writer.writeheader()
|
||||
writer.writerows(rows)
|
||||
|
||||
|
||||
def sort_list_file(
|
||||
filepath: Union[str, Path],
|
||||
*,
|
||||
lowercase: bool = True,
|
||||
strip: bool = True,
|
||||
deduplicate: bool = True,
|
||||
remove_blank_lines: bool = True,
|
||||
ending_newline: bool = True,
|
||||
newline: Optional[str] = "\n",
|
||||
):
|
||||
"""Read a list from a file, sort it, optionally strip and deduplicate the values,
|
||||
then write that list back to the file.
|
||||
|
||||
- Filepath: The path to the file.
|
||||
- lowercase: Lowercase all values prior to sorting.
|
||||
- remove_blank_lines: Remove any plank lines.
|
||||
- ending_newline: End the file with a newline, even if remove_blank_lines is true.
|
||||
- newline: The newline character to use.
|
||||
"""
|
||||
with open(filepath, mode="r", newline=newline) as infile:
|
||||
lines = infile.readlines()
|
||||
for i in range(len(lines)):
|
||||
if lowercase:
|
||||
lines[i] = lines[i].lower()
|
||||
if strip:
|
||||
lines[i] = lines[i].strip()
|
||||
if deduplicate:
|
||||
lines = list(set(lines))
|
||||
if remove_blank_lines:
|
||||
while "" in lines:
|
||||
lines.remove("")
|
||||
lines = sorted(lines)
|
||||
if ending_newline:
|
||||
if lines[-1] != "":
|
||||
lines.append("")
|
||||
with open(filepath, mode="w", newline=newline) as outfile:
|
||||
outfile.write("\n".join(lines))
|
||||
|
||||
|
||||
def _main():
|
||||
map_file = "base_reverse_dns_map.csv"
|
||||
map_key = "base_reverse_dns"
|
||||
list_files = ["known_unknown_base_reverse_dns.txt", "psl_overrides.txt"]
|
||||
types_file = "base_reverse_dns_types.txt"
|
||||
|
||||
with open(types_file) as f:
|
||||
types = f.readlines()
|
||||
while "" in types:
|
||||
types.remove("")
|
||||
|
||||
map_allowed_values = {"Type": types}
|
||||
|
||||
for list_file in list_files:
|
||||
if not os.path.exists(list_file):
|
||||
print(f"Error: {list_file} does not exist")
|
||||
exit(1)
|
||||
sort_list_file(list_file)
|
||||
if not os.path.exists(types_file):
|
||||
print(f"Error: {types_file} does not exist")
|
||||
exit(1)
|
||||
sort_list_file(types_file, lowercase=False)
|
||||
if not os.path.exists(map_file):
|
||||
print(f"Error: {map_file} does not exist")
|
||||
exit(1)
|
||||
try:
|
||||
sort_csv(map_file, map_key, allowed_values=map_allowed_values)
|
||||
except CSVValidationError as e:
|
||||
print(f"{map_file} did not validate: {e}")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
_main()
|
||||
@@ -1,69 +0,0 @@
|
||||
#!/usr/bin/env python3
|
||||
|
||||
import os
|
||||
import csv
|
||||
|
||||
maps_dir = os.path.join(".")
|
||||
map_files = ["base_reverse_dns_map.csv"]
|
||||
list_files = ["known_unknown_base_reverse_dns.txt", "psl_overrides.txt"]
|
||||
|
||||
|
||||
def sort_csv(filepath, column=0, strip_whitespace=True):
|
||||
with open(filepath, mode="r", newline="\n") as infile:
|
||||
reader = csv.reader(infile)
|
||||
header = next(reader)
|
||||
|
||||
def normalize_row(row):
|
||||
return [
|
||||
field.strip() if strip_whitespace and isinstance(field, str) else field
|
||||
for field in row
|
||||
]
|
||||
|
||||
rows = [normalize_row(row) for row in reader]
|
||||
sorted_rows = sorted(rows, key=lambda row: row[column])
|
||||
existing_values = []
|
||||
for row in sorted_rows:
|
||||
if row[column] in existing_values:
|
||||
print(f"Warning: {row[column]} is in {filepath} multiple times")
|
||||
else:
|
||||
existing_values.append(row[column])
|
||||
|
||||
with open(filepath, mode="w", newline="\n") as outfile:
|
||||
writer = csv.writer(outfile, lineterminator="\n")
|
||||
writer.writerow(header)
|
||||
writer.writerows(sorted_rows)
|
||||
|
||||
|
||||
def sort_list_file(
|
||||
filepath,
|
||||
lowercase=True,
|
||||
strip=True,
|
||||
deduplicate=True,
|
||||
remove_blank_lines=True,
|
||||
ending_newline=True,
|
||||
newline="\n",
|
||||
):
|
||||
with open(filepath, mode="r", newline=newline) as infile:
|
||||
lines = infile.readlines()
|
||||
for i in range(len(lines)):
|
||||
if lowercase:
|
||||
lines[i] = lines[i].lower()
|
||||
if strip:
|
||||
lines[i] = lines[i].strip()
|
||||
if deduplicate:
|
||||
lines = list(set(lines))
|
||||
if remove_blank_lines:
|
||||
while "" in lines:
|
||||
lines.remove("")
|
||||
lines = sorted(lines)
|
||||
if ending_newline:
|
||||
if lines[-1] != "":
|
||||
lines.append("")
|
||||
with open(filepath, mode="w", newline=newline) as outfile:
|
||||
outfile.write("\n".join(lines))
|
||||
|
||||
|
||||
for csv_file in map_files:
|
||||
sort_csv(os.path.join(maps_dir, csv_file))
|
||||
for list_file in list_files:
|
||||
sort_list_file(os.path.join(maps_dir, list_file))
|
||||
Reference in New Issue
Block a user