From 3feb4787935f433a2e36928748fff159922fd3cc Mon Sep 17 00:00:00 2001 From: Sean Whalen Date: Sun, 17 Aug 2025 17:00:11 -0400 Subject: [PATCH] 8.18.6 - Fix since option to correctly work with weeks (PR #604) - Add 183 entries to `base_reverse_dns_map.csv` - Add 57 entries to `known_unknown_base_reverse_dns.txt` - Check for invalid UTF-8 bytes in `base_reverse_dns_map.csv` at build - Remove unneeded items from the `parsedmarc.resources` module at build --- .vscode/settings.json | 10 ++++++++++ CHANGELOG.md | 11 +++++++++-- build.sh | 4 ++++ parsedmarc/constants.py | 2 +- parsedmarc/resources/maps/README.md | 4 ++++ sortmaps.py => parsedmarc/resources/maps/sortmaps.py | 2 +- pyproject.toml | 11 +++++++++++ 7 files changed, 40 insertions(+), 4 deletions(-) rename sortmaps.py => parsedmarc/resources/maps/sortmaps.py (96%) diff --git a/.vscode/settings.json b/.vscode/settings.json index 9561d56..a5618e4 100644 --- a/.vscode/settings.json +++ b/.vscode/settings.json @@ -13,6 +13,7 @@ "automodule", "backported", "bellsouth", + "boto", "brakhane", "Brightmail", "CEST", @@ -36,6 +37,7 @@ "expiringdict", "fieldlist", "genindex", + "geoip", "geoipupdate", "Geolite", "geolocation", @@ -44,7 +46,9 @@ "hostnames", "htpasswd", "httpasswd", + "httplib", "IMAP", + "imapclient", "infile", "Interaktive", "IPDB", @@ -81,14 +85,18 @@ "nosecureimap", "nosniff", "nwettbewerb", + "opensearch", "parsedmarc", "passsword", "Postorius", "premade", "procs", "publicsuffix", + "publicsuffixlist", "publixsuffix", + "pygelf", "pypy", + "pytest", "quickstart", "Reindex", "replyto", @@ -96,10 +104,12 @@ "Rollup", "Rpdm", "SAMEORIGIN", + "sdist", "Servernameone", "setuptools", "smartquotes", "SMTPTLS", + "sortmaps", "sourcetype", "STARTTLS", "tasklist", diff --git a/CHANGELOG.md b/CHANGELOG.md index dc4ce9f..1c1b544 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,13 +1,20 @@ Changelog ========= +8.18.6 +------ + +- Fix since option to correctly work with weeks (PR #604) +- Add 183 entries to `base_reverse_dns_map.csv` +- Add 57 entries to `known_unknown_base_reverse_dns.txt` +- Check for invalid UTF-8 bytes in `base_reverse_dns_map.csv` at build +- Remove unneeded items from the `parsedmarc.resources` module at build + 8.18.5 ------ - Fix CSV download -- Fix since option to correctly work with weeks - 8.18.4 ------ diff --git a/build.sh b/build.sh index 836dd04..db58333 100755 --- a/build.sh +++ b/build.sh @@ -18,7 +18,11 @@ if [ -d "./../parsedmarc-docs" ]; then cp -rf build/html/* ../../parsedmarc-docs/ fi cd .. +cd parsedmarc/resources/maps python3 sortmaps.py +echo "Checking for invalid UTF-8 bytes in base_reverse_dns_map.csv" +python3 find_bad_utf8.py base_reverse_dns_map.csv +cd ../../.. python3 tests.py rm -rf dist/ build/ hatch build diff --git a/parsedmarc/constants.py b/parsedmarc/constants.py index bf39084..7102a17 100644 --- a/parsedmarc/constants.py +++ b/parsedmarc/constants.py @@ -1,2 +1,2 @@ -__version__ = "8.18.5" +__version__ = "8.18.6" USER_AGENT = f"parsedmarc/{__version__}" diff --git a/parsedmarc/resources/maps/README.md b/parsedmarc/resources/maps/README.md index 13b7595..15c8ca6 100644 --- a/parsedmarc/resources/maps/README.md +++ b/parsedmarc/resources/maps/README.md @@ -83,6 +83,10 @@ A CSV with the fields `source_name` and optionally `message_count`. This CSV can A CSV file with the fields `source_name` and `message_count`. This file is not tracked by Git. +## find_bad_utf8.py + +Locates invalid UTF-8 bytes in files and optionally tries to current them. Generated by GPT5. Helped me find where I had introduced invalid bytes in `base_reverse_dns_map.csv`. + ## find_unknown_base_reverse_dns.py This is a python script that reads the domains in `base_reverse_dns.csv` and writes the domains that are not in `base_reverse_dns_map.csv` or `known_unknown_base_reverse_dns.txt` to `unknown_base_reverse_dns.csv`. This is useful for identifying potential additional domains to contribute to `base_reverse_dns_map.csv` and `known_unknown_base_reverse_dns.txt`. diff --git a/sortmaps.py b/parsedmarc/resources/maps/sortmaps.py similarity index 96% rename from sortmaps.py rename to parsedmarc/resources/maps/sortmaps.py index 35b599b..7da0ee5 100755 --- a/sortmaps.py +++ b/parsedmarc/resources/maps/sortmaps.py @@ -3,7 +3,7 @@ import os import csv -maps_dir = os.path.join("parsedmarc", "resources", "maps") +maps_dir = os.path.join(".") map_files = ["base_reverse_dns_map.csv"] list_files = ["known_unknown_base_reverse_dns.txt", "psl_overrides.txt"] diff --git a/pyproject.toml b/pyproject.toml index 6f84e9a..43b2f58 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -82,3 +82,14 @@ path = "parsedmarc/constants.py" include = [ "/parsedmarc", ] + +[tool.hatch.build] +exclude = [ +"base_reverse_dns.csv", +"find_bad_utf8.py", +"find_unknown_base_reverse_dns.py", +"unknown_base_reverse_dns.csv", +"sortmaps.py", +"README.md", +"*.bak" +]