mirror of
https://github.com/domainaware/parsedmarc.git
synced 2026-05-20 10:55:24 +00:00
ff6f75d740
* Drop base_reverse_dns_types.txt; sortlists.py now reads types from README.md The .txt file duplicated the README's industry list and introduced drift risk — twice in the project's history we had to add types to the .txt only because the README had been updated independently. Make the README the single source of truth. - Add `<!-- types-list:start -->` / `<!-- types-list:end -->` HTML comment markers around the bullet list in parsedmarc/resources/maps/README.md. Markers don't render in GitHub's preview. - New `load_types_from_readme()` in sortlists.py parses the bullet items between the markers and returns them. Errors clearly if the README is missing or the markers are absent. - Delete base_reverse_dns_types.txt. - Fix a pre-existing typo in README precedence rule 4: `Web Hosting` → `Web Host` (matches the canonical type used in 4,176 map rows). Smoke test: feeding a row with a bogus type still triggers the validator (`'NotARealType' is not an allowed value for 'type'`), confirming the README-derived list flows through identically. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com> * sortlists.py: normalize README types-list block in place Before validating the map, the validator now sorts the <!-- types-list:start --> / <!-- types-list:end --> block in README.md alphabetically (case-insensitively), trims leading and trailing whitespace from each item, and deduplicates case- insensitively, rewriting the README in place if any of those need fixing. Errors clearly when two entries differ only by casing (which would otherwise silently lose one). Adding a new category is now just inserting a `- New Type` line anywhere inside the markers — `sortlists.py` will tidy it on the next run. Same shape as how the validator already normalizes known_unknown_base_reverse_dns.txt and psl_overrides.txt. The pure read path is preserved as `load_types_from_readme()` for callers that don't want a side-effecting rewrite (tests, downstream tooling). Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com> * Stop shipping maintainer scripts; bump to 9.11.2 The exclude list in [tool.hatch.build] was originally meant to keep maintainer-only batch tooling under parsedmarc/resources/maps/ out of the wheel and sdist (it lists `find_bad_utf8.py`, `find_unknown_base_reverse_dns.py`, the renamed-and-removed `sortmaps.py`). The list never grew when new tools were added, so `collect_domain_info.py`, `classify_unknown_domains.py`, `detect_psl_overrides.py`, `detect_rebrands.py`, and `sortlists.py` all started shipping in distributions despite contributing nothing to runtime functionality. Replace the per-file basename list with a single glob pattern: parsedmarc/resources/maps/[!_]*.py The leading-`_` exception keeps `__init__.py` shipping (required so that `importlib.resources.files(parsedmarc.resources.maps)` can locate the bundled CSV/TXT data files), while excluding any other .py file under that directory — including future maintainer scripts that haven't been written yet. Drop the now-redundant per-file entries from the exclude list: `find_bad_utf8.py`, `find_unknown_base_reverse_dns.py`, and the already-removed `sortmaps.py`. The non-.py exclusions stay (`base_reverse_dns.csv`, `unknown_base_reverse_dns.csv`, `README.md`, `*.bak`). Verified with `hatch build`: - Wheel under parsedmarc/resources/maps/: __init__.py + 3 data files (CSV/TXTs), no maintainer .py - sdist matches - Clean-venv install of the built wheel loads 298 PSL overrides and `get_base_domain('host01.netlify.app')` returns `netlify.app` Bump to 9.11.2 since this changes shipped artifacts. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com> --------- Co-authored-by: Sean Whalen <seanthegeek@users.noreply.github.com> Co-authored-by: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
99 lines
2.5 KiB
TOML
99 lines
2.5 KiB
TOML
[build-system]
|
|
requires = [
|
|
"hatchling>=1.27.0",
|
|
]
|
|
requires_python = ">=3.10,<3.15"
|
|
build-backend = "hatchling.build"
|
|
|
|
[project]
|
|
name = "parsedmarc"
|
|
dynamic = [
|
|
"version",
|
|
]
|
|
description = "A Python package and CLI for parsing aggregate and forensic DMARC reports"
|
|
readme = "README.md"
|
|
license = "Apache-2.0"
|
|
authors = [
|
|
{ name = "Sean Whalen", email = "whalenster@gmail.com" },
|
|
]
|
|
keywords = [
|
|
"DMARC",
|
|
"parser",
|
|
"reporting",
|
|
]
|
|
classifiers = [
|
|
"Development Status :: 5 - Production/Stable",
|
|
"Intended Audience :: Developers",
|
|
"Intended Audience :: Information Technology",
|
|
"License :: OSI Approved :: Apache Software License",
|
|
"Operating System :: OS Independent",
|
|
"Programming Language :: Python :: 3"
|
|
]
|
|
requires-python = ">=3.10"
|
|
dependencies = [
|
|
"azure-identity>=1.8.0",
|
|
"azure-monitor-ingestion>=1.0.0",
|
|
"boto3>=1.16.63",
|
|
"dateparser>=1.1.1",
|
|
"dnspython>=2.0.0",
|
|
"elasticsearch-dsl==7.4.0",
|
|
"elasticsearch<7.14.0",
|
|
"expiringdict>=1.1.4",
|
|
"kafka-python-ng>=2.2.2",
|
|
"lxml>=4.4.0",
|
|
"mailsuite[gmail,msgraph]>=2.0.2",
|
|
"maxminddb>=2.0.0",
|
|
"opensearch-py>=2.4.2,<=4.0.0",
|
|
"publicsuffixlist>=0.10.0",
|
|
"pygelf>=0.4.2",
|
|
"requests>=2.22.0",
|
|
"tqdm>=4.31.1",
|
|
"urllib3>=1.25.7",
|
|
"xmltodict>=0.12.0",
|
|
"PyYAML>=6.0.3"
|
|
]
|
|
|
|
[project.optional-dependencies]
|
|
build = [
|
|
# Used only by maintainer tooling under parsedmarc/resources/maps/ —
|
|
# `collect_domain_info.py --use-search-fallback` falls back to a
|
|
# DuckDuckGo search when the homepage fetch returns a bot-block / parked
|
|
# / empty page. Optional import; the script runs without it as long as
|
|
# the fallback flag isn't passed.
|
|
"ddgs>=9.0.0",
|
|
"hatch>=1.14.0",
|
|
"myst-parser[linkify]",
|
|
"nose",
|
|
"pytest",
|
|
"pytest-cov",
|
|
"ruff",
|
|
"sphinx",
|
|
"sphinx_rtd_theme",
|
|
]
|
|
|
|
[project.scripts]
|
|
parsedmarc = "parsedmarc.cli:_main"
|
|
|
|
[project.urls]
|
|
Homepage = "https://domainaware.github.io/parsedmarc"
|
|
|
|
[tool.hatch.version]
|
|
path = "parsedmarc/constants.py"
|
|
|
|
[tool.hatch.build.targets.sdist]
|
|
include = [
|
|
"/parsedmarc",
|
|
]
|
|
|
|
[tool.hatch.build]
|
|
exclude = [
|
|
"base_reverse_dns.csv",
|
|
"unknown_base_reverse_dns.csv",
|
|
"README.md",
|
|
"*.bak",
|
|
# Maintenance tooling: any Python file under parsedmarc/resources/maps/
|
|
# whose name doesn't start with `_` (i.e. everything except __init__.py,
|
|
# which must keep shipping for `importlib.resources.files()` lookups).
|
|
"parsedmarc/resources/maps/[!_]*.py",
|
|
]
|