- Fix since option to correctly work with weeks (PR #604)
- Add 183 entries to `base_reverse_dns_map.csv`
- Add 57 entries to `known_unknown_base_reverse_dns.txt`
- Check for invalid UTF-8 bytes in `base_reverse_dns_map.csv` at build
- Remove unneeded items from the `parsedmarc.resources` module at build
This commit is contained in:
Sean Whalen
2025-08-17 17:00:11 -04:00
parent 01630bb61c
commit 3feb478793
7 changed files with 40 additions and 4 deletions

10
.vscode/settings.json vendored
View File

@@ -13,6 +13,7 @@
"automodule",
"backported",
"bellsouth",
"boto",
"brakhane",
"Brightmail",
"CEST",
@@ -36,6 +37,7 @@
"expiringdict",
"fieldlist",
"genindex",
"geoip",
"geoipupdate",
"Geolite",
"geolocation",
@@ -44,7 +46,9 @@
"hostnames",
"htpasswd",
"httpasswd",
"httplib",
"IMAP",
"imapclient",
"infile",
"Interaktive",
"IPDB",
@@ -81,14 +85,18 @@
"nosecureimap",
"nosniff",
"nwettbewerb",
"opensearch",
"parsedmarc",
"passsword",
"Postorius",
"premade",
"procs",
"publicsuffix",
"publicsuffixlist",
"publixsuffix",
"pygelf",
"pypy",
"pytest",
"quickstart",
"Reindex",
"replyto",
@@ -96,10 +104,12 @@
"Rollup",
"Rpdm",
"SAMEORIGIN",
"sdist",
"Servernameone",
"setuptools",
"smartquotes",
"SMTPTLS",
"sortmaps",
"sourcetype",
"STARTTLS",
"tasklist",

View File

@@ -1,13 +1,20 @@
Changelog
=========
8.18.6
------
- Fix since option to correctly work with weeks (PR #604)
- Add 183 entries to `base_reverse_dns_map.csv`
- Add 57 entries to `known_unknown_base_reverse_dns.txt`
- Check for invalid UTF-8 bytes in `base_reverse_dns_map.csv` at build
- Remove unneeded items from the `parsedmarc.resources` module at build
8.18.5
------
- Fix CSV download
- Fix since option to correctly work with weeks
8.18.4
------

View File

@@ -18,7 +18,11 @@ if [ -d "./../parsedmarc-docs" ]; then
cp -rf build/html/* ../../parsedmarc-docs/
fi
cd ..
cd parsedmarc/resources/maps
python3 sortmaps.py
echo "Checking for invalid UTF-8 bytes in base_reverse_dns_map.csv"
python3 find_bad_utf8.py base_reverse_dns_map.csv
cd ../../..
python3 tests.py
rm -rf dist/ build/
hatch build

View File

@@ -1,2 +1,2 @@
__version__ = "8.18.5"
__version__ = "8.18.6"
USER_AGENT = f"parsedmarc/{__version__}"

View File

@@ -83,6 +83,10 @@ A CSV with the fields `source_name` and optionally `message_count`. This CSV can
A CSV file with the fields `source_name` and `message_count`. This file is not tracked by Git.
## find_bad_utf8.py
Locates invalid UTF-8 bytes in files and optionally tries to current them. Generated by GPT5. Helped me find where I had introduced invalid bytes in `base_reverse_dns_map.csv`.
## find_unknown_base_reverse_dns.py
This is a python script that reads the domains in `base_reverse_dns.csv` and writes the domains that are not in `base_reverse_dns_map.csv` or `known_unknown_base_reverse_dns.txt` to `unknown_base_reverse_dns.csv`. This is useful for identifying potential additional domains to contribute to `base_reverse_dns_map.csv` and `known_unknown_base_reverse_dns.txt`.

View File

@@ -3,7 +3,7 @@
import os
import csv
maps_dir = os.path.join("parsedmarc", "resources", "maps")
maps_dir = os.path.join(".")
map_files = ["base_reverse_dns_map.csv"]
list_files = ["known_unknown_base_reverse_dns.txt", "psl_overrides.txt"]

View File

@@ -82,3 +82,14 @@ path = "parsedmarc/constants.py"
include = [
"/parsedmarc",
]
[tool.hatch.build]
exclude = [
"base_reverse_dns.csv",
"find_bad_utf8.py",
"find_unknown_base_reverse_dns.py",
"unknown_base_reverse_dns.csv",
"sortmaps.py",
"README.md",
"*.bak"
]