mirror of
https://github.com/domainaware/parsedmarc.git
synced 2026-05-21 11:25:23 +00:00
5b08627eaa
* Split tests.py into per-module tests/test_<module>.py The 5174-line tests.py monolith is split into per-module files under tests/, mirroring the checkdmarc layout: tests/test_init.py parsedmarc/__init__.py parsing surface tests/test_cli.py parsedmarc/cli.py + config / env-vars / SIGHUP tests/test_utils.py parsedmarc/utils.py (DNS, IP info, PSL, etc.) tests/test_webhook.py parsedmarc/webhook.py tests/test_kafkaclient.py parsedmarc/kafkaclient.py tests/test_splunk.py parsedmarc/splunk.py tests/test_syslog.py parsedmarc/syslog.py tests/test_loganalytics.py parsedmarc/loganalytics.py tests/test_gelf.py parsedmarc/gelf.py tests/test_s3.py parsedmarc/s3.py tests/test_maps.py parsedmarc/resources/maps/ maintainer scripts The split is purely a redistribution — no test bodies changed, no tests added or removed. All 276 existing tests pass under the new layout. The current tests.py contains two kitchen-sink classes (`Test` at line 54 and `TestEnvVarConfig` at line 2360) holding tests that span many modules. Their methods are routed to the correct per-module file by name prefix; the wholly-thematic classes (TestExtractReport, TestUtilsXxx, TestSighupReload, etc.) move whole. Each target file gets its own `class Test(unittest.TestCase)` for the redistributed kitchen-sink methods, plus the thematic classes verbatim. Wiring updates: - `.github/workflows/python-tests.yml`: `pytest ... tests.py` → `python -m pytest ... tests/` (also switches to `python -m pytest` per the checkdmarc convention so cwd lands on the project root). - `pyproject.toml`: adds `[tool.pytest.ini_options] testpaths = ["tests"]` and `[tool.coverage.run] source = ["parsedmarc"]` with an `omit` for `parsedmarc/resources/maps/*.py`. The maps scripts are maintainer-only batch tooling that ships out of the wheel; excluding them from coverage makes the headline number reflect only installed library code. Runtime coverage on the new layout is 59% (was 45% with maps counted), and PR-B will push it to 90%+. - `AGENTS.md`: documents the new layout and how to run individual files / tests; tells future contributors not to reintroduce a monolithic tests.py. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com> * Restore 66.9% coverage baseline (count tests/ + parsedmarc) Master's headline 66.9% number on Codecov includes the tests.py file itself (99.35% covered) being measured alongside parsedmarc/*. The original tests.py had no `[tool.coverage.run]` block, so coverage's default — "measure every file imported during the run" — counted the test code as if it were product code. The split commit added `source = ["parsedmarc"]` which suppressed measurement of the test files (correct in principle, since test files aren't shipped code), and that alone made the headline number drop by ~8 percentage points without any actual loss of testing. This commit swaps `source` for an explicit `include = ["parsedmarc/*", "tests/*"]` so both halves are measured the way they were on master. Verified: 276 tests, 66.96% line coverage (effectively unchanged from master's 66.90%). If you want the shipped-code-only number (was the headline that this commit overrides), run `pytest --cov=parsedmarc tests/`. That number is currently 59% and is the focus of the upcoming coverage-expansion PR. Also adds junit.xml to .gitignore so the CI artefact doesn't get accidentally committed. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com> * Restrict coverage to shipped code (`source = ["parsedmarc"]`) Reverts the prior commit's `include = ["tests/*"]`. Counting the test files toward coverage was wrong — it conflates "shipped code exercised by tests" with "test code that pytest auto-runs", inflates the headline number, and rewards writing more tests rather than tests that verify more code. Master's apparent 66.9% was an artefact of the old monolithic tests.py having no [tool.coverage.run] block at all; coverage's default behaviour measured every imported file, including the test file itself at ~99% "covered", which added ~8 percentage points to the displayed number without any real testing signal. Restricting to `source = ["parsedmarc"]` plus the existing maps omit gives a meaningful baseline: 59% of shipped code is exercised by the test suite today. That's the number the next PR is targeting to lift to 90%+ before the 10.0.0 release; the Codecov "drop" here is a measurement correction, not a regression. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com> --------- Co-authored-by: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
143 lines
5.7 KiB
Python
143 lines
5.7 KiB
Python
"""Tests for the map-maintenance scripts under parsedmarc/resources/maps/.
|
|
|
|
These scripts are maintainer-only batch tooling — they do not ship in the
|
|
wheel — but they still need regression coverage because they enforce the
|
|
privacy and integrity rules for the reverse-DNS map data files."""
|
|
|
|
import unittest
|
|
|
|
|
|
class TestMapScriptsIPDetection(unittest.TestCase):
|
|
"""Full-IP detection and PSL folding in the map-maintenance scripts."""
|
|
|
|
def test_collect_domain_info_detects_full_ips(self):
|
|
import parsedmarc.resources.maps.collect_domain_info as cdi
|
|
|
|
# Dotted and dashed four-octet patterns with valid octets: detected.
|
|
self.assertTrue(cdi._has_full_ip("74-208-244-234.cprapid.com"))
|
|
self.assertTrue(cdi._has_full_ip("host.192.168.1.1.example.com"))
|
|
self.assertTrue(cdi._has_full_ip("a-10-20-30-40-brand.com"))
|
|
# Three octets is NOT a full IP — OVH's reverse-DNS pattern stays safe.
|
|
self.assertFalse(cdi._has_full_ip("ip-147-135-108.us"))
|
|
# Out-of-range octet fails the 0-255 sanity check.
|
|
self.assertFalse(cdi._has_full_ip("999-1-2-3-foo.com"))
|
|
# Pure domain, no IP.
|
|
self.assertFalse(cdi._has_full_ip("example.com"))
|
|
|
|
def test_find_unknown_detects_full_ips(self):
|
|
import parsedmarc.resources.maps.find_unknown_base_reverse_dns as fu
|
|
|
|
self.assertTrue(fu._has_full_ip("170-254-144-204-nobreinternet.com.br"))
|
|
self.assertFalse(fu._has_full_ip("ip-147-135-108.us"))
|
|
self.assertFalse(fu._has_full_ip("cprapid.com"))
|
|
|
|
def test_apply_psl_override_dot_prefix(self):
|
|
import parsedmarc.resources.maps.collect_domain_info as cdi
|
|
|
|
ov = [".cprapid.com", ".linode.com"]
|
|
self.assertEqual(cdi._apply_psl_override("foo.cprapid.com", ov), "cprapid.com")
|
|
self.assertEqual(cdi._apply_psl_override("a.b.linode.com", ov), "linode.com")
|
|
|
|
def test_apply_psl_override_dash_prefix(self):
|
|
import parsedmarc.resources.maps.collect_domain_info as cdi
|
|
|
|
ov = ["-nobre.com.br"]
|
|
self.assertEqual(
|
|
cdi._apply_psl_override("1-2-3-4-nobre.com.br", ov), "nobre.com.br"
|
|
)
|
|
|
|
def test_apply_psl_override_no_match(self):
|
|
import parsedmarc.resources.maps.collect_domain_info as cdi
|
|
|
|
ov = [".cprapid.com"]
|
|
self.assertEqual(cdi._apply_psl_override("example.com", ov), "example.com")
|
|
|
|
|
|
class TestDetectPSLOverrides(unittest.TestCase):
|
|
"""Cluster detection, brand-tail extraction, and full-pipeline behaviour
|
|
for `detect_psl_overrides.py`."""
|
|
|
|
def setUp(self):
|
|
import parsedmarc.resources.maps.detect_psl_overrides as dpo
|
|
|
|
self.dpo = dpo
|
|
|
|
def test_extract_brand_tail_dot_separator(self):
|
|
self.assertEqual(
|
|
self.dpo.extract_brand_tail("74-208-244-234.cprapid.com"),
|
|
".cprapid.com",
|
|
)
|
|
|
|
def test_extract_brand_tail_dash_separator(self):
|
|
self.assertEqual(
|
|
self.dpo.extract_brand_tail("170-254-144-204-nobre.com.br"),
|
|
"-nobre.com.br",
|
|
)
|
|
|
|
def test_extract_brand_tail_no_separator(self):
|
|
self.assertEqual(
|
|
self.dpo.extract_brand_tail("host134-254-143-190tigobusiness.com.ni"),
|
|
"tigobusiness.com.ni",
|
|
)
|
|
|
|
def test_extract_brand_tail_no_ip_returns_none(self):
|
|
self.assertIsNone(self.dpo.extract_brand_tail("plain.example.com"))
|
|
|
|
def test_extract_brand_tail_rejects_short_tail(self):
|
|
"""A tail shorter than MIN_TAIL_LEN is rejected to avoid folding to `.com`."""
|
|
# Four-octet IP followed by only `.br` (2 chars after the dot) — too short.
|
|
self.assertIsNone(self.dpo.extract_brand_tail("1-2-3-4.br"))
|
|
|
|
def test_detect_clusters_meets_threshold(self):
|
|
domains = [
|
|
"1-2-3-4.cprapid.com",
|
|
"5-6-7-8.cprapid.com",
|
|
"9-10-11-12.cprapid.com",
|
|
"1-2-3-4-other.com.br", # not enough of these
|
|
]
|
|
clusters = self.dpo.detect_clusters(domains, threshold=3, known_overrides=set())
|
|
self.assertIn(".cprapid.com", clusters)
|
|
self.assertEqual(len(clusters[".cprapid.com"]), 3)
|
|
self.assertNotIn("-other.com.br", clusters)
|
|
|
|
def test_detect_clusters_honours_threshold(self):
|
|
domains = [
|
|
"1-2-3-4.cprapid.com",
|
|
"5-6-7-8.cprapid.com",
|
|
]
|
|
clusters = self.dpo.detect_clusters(domains, threshold=3, known_overrides=set())
|
|
self.assertEqual(clusters, {})
|
|
|
|
def test_detect_clusters_skips_known_overrides(self):
|
|
"""Tails already in psl_overrides.txt must not be re-proposed."""
|
|
domains = [
|
|
"1-2-3-4.cprapid.com",
|
|
"5-6-7-8.cprapid.com",
|
|
"9-10-11-12.cprapid.com",
|
|
]
|
|
clusters = self.dpo.detect_clusters(
|
|
domains, threshold=3, known_overrides={".cprapid.com"}
|
|
)
|
|
self.assertNotIn(".cprapid.com", clusters)
|
|
|
|
def test_apply_override_matches_first(self):
|
|
"""apply_override iterates in list order and returns on the first match."""
|
|
ov = [".cprapid.com", "-nobre.com.br"]
|
|
self.assertEqual(
|
|
self.dpo.apply_override("1-2-3-4.cprapid.com", ov), "cprapid.com"
|
|
)
|
|
self.assertEqual(
|
|
self.dpo.apply_override("1-2-3-4-nobre.com.br", ov), "nobre.com.br"
|
|
)
|
|
self.assertEqual(self.dpo.apply_override("unrelated.com", ov), "unrelated.com")
|
|
|
|
def test_has_full_ip_shared_with_other_scripts(self):
|
|
"""The detect script's IP check must agree with the other map scripts."""
|
|
self.assertTrue(self.dpo.has_full_ip("74-208-244-234.cprapid.com"))
|
|
self.assertFalse(self.dpo.has_full_ip("ip-147-135-108.us"))
|
|
self.assertFalse(self.dpo.has_full_ip("example.com"))
|
|
|
|
|
|
if __name__ == "__main__":
|
|
unittest.main(verbosity=2)
|