mirror of
https://github.com/domainaware/parsedmarc.git
synced 2026-05-01 09:49:27 +00:00
Skip caching weak-fallback IP attributions
get_reverse_dns() swallows every DNSException as None, so a transient PTR lookup failure (timeout, SERVFAIL, socket error) is indistinguishable from a genuine no-PTR case. When that lands on the raw-as_name fallback branch (no map match for the ASN domain either), the weak result was getting cached in the 4-hour IP-info cache — locking in the misattribution even after the PTR became resolvable. Observed in the wild: 91.244.70.212 has PTR customer.evolus-ix.com (which the map correctly classifies as Evolus IX, ISP), but the user's dataset showed it with source_name = raw as_name and source_type = null — the signature of a transient PTR lookup failure that then got cached. Fix: skip the cache write when the row is in that specific weak-fallback state (reverse_dns=None AND type=None AND name=as_name). PTR-backed matches and ASN-domain matches are stable attributions and continue to be cached as before. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -1,5 +1,11 @@
|
||||
# Changelog
|
||||
|
||||
## 9.10.2
|
||||
|
||||
### Fixed
|
||||
|
||||
- `get_ip_address_info()` no longer caches weak-fallback attributions (no PTR + no ASN-domain map match → raw `as_name` used as `source_name`, `source_type` left null). `get_reverse_dns()` swallows every `DNSException` as `None`, so a transient PTR lookup failure (timeout, SERVFAIL, socket error) is indistinguishable from a genuine no-PTR case at that layer — caching the weak result would poison the 4-hour cache with a misattribution that persisted even after the PTR became resolvable again. PTR-backed matches and ASN-domain matches (both stable attributions) are still cached as before; only the specific `reverse_dns=None AND type=None AND name=as_name` state skips the cache write so the next lookup retries.
|
||||
|
||||
## 9.10.1
|
||||
|
||||
### Fixed
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
__version__ = "9.10.1"
|
||||
__version__ = "9.10.2"
|
||||
|
||||
USER_AGENT = f"parsedmarc/{__version__}"
|
||||
|
||||
|
||||
+16
-1
@@ -952,7 +952,22 @@ def get_ip_address_info(
|
||||
# classification. Better than leaving the row unattributed.
|
||||
info["name"] = info["as_name"]
|
||||
|
||||
if cache is not None:
|
||||
# Don't cache weak-fallback attributions — rows where we had no PTR AND
|
||||
# the ASN domain wasn't in the map, so ``name`` is just the raw ``as_name``
|
||||
# from the MMDB. ``get_reverse_dns()`` swallows every ``DNSException`` as
|
||||
# ``None``, so a transient PTR lookup failure (timeout, SERVFAIL, OSError)
|
||||
# is indistinguishable from a real no-PTR case at this point. Caching the
|
||||
# weak result would poison the 4-hour cache with a misattribution even
|
||||
# after the PTR becomes resolvable again. Re-running on the next lookup
|
||||
# is cheap and either produces a proper PTR-backed match or the same
|
||||
# (still-best-effort) ASN attribution.
|
||||
weak_fallback = (
|
||||
info["reverse_dns"] is None
|
||||
and info["type"] is None
|
||||
and info["name"] is not None
|
||||
and info["name"] == info["as_name"]
|
||||
)
|
||||
if cache is not None and not weak_fallback:
|
||||
cache[ip_address] = info
|
||||
logger.debug(f"IP address {ip_address} added to cache")
|
||||
|
||||
|
||||
@@ -274,6 +274,49 @@ class Test(unittest.TestCase):
|
||||
self.assertEqual(info["name"], "Some Unmapped Org, Inc.")
|
||||
self.assertEqual(info["as_domain"], "unmapped-for-this-test.example")
|
||||
|
||||
def testWeakFallbackAttributionIsNotCached(self):
|
||||
"""A transient PTR lookup failure that lands on the raw-as_name
|
||||
fallback must not poison the cache. ``get_reverse_dns()`` swallows
|
||||
every DNSException as ``None``, so a timeout looks identical to a
|
||||
real no-PTR case — if we cached the weak attribution, the 4-hour
|
||||
TTL would lock in a misattribution even after the PTR returns.
|
||||
|
||||
PTR-backed matches and ASN-domain matches are stable attributions
|
||||
and must still be cached, so we only skip the specific
|
||||
``reverse_dns=None AND type=None AND name=as_name`` state."""
|
||||
from unittest.mock import patch
|
||||
from expiringdict import ExpiringDict
|
||||
|
||||
cache = ExpiringDict(max_len=100, max_age_seconds=14400)
|
||||
|
||||
# Scenario 1: weak fallback (no PTR, unmapped as_domain, raw as_name
|
||||
# used). Must NOT be cached.
|
||||
with patch(
|
||||
"parsedmarc.utils.get_ip_address_db_record",
|
||||
return_value={
|
||||
"country": "US",
|
||||
"asn": 64496,
|
||||
"as_name": "Some Unmapped Org, Inc.",
|
||||
"as_domain": "unmapped-for-this-test.example",
|
||||
},
|
||||
):
|
||||
parsedmarc.utils.get_ip_address_info("192.0.2.1", offline=True, cache=cache)
|
||||
self.assertNotIn("192.0.2.1", cache)
|
||||
|
||||
# Scenario 2: ASN-domain match (no PTR, as_domain IS in the map).
|
||||
# Stable attribution — must still be cached.
|
||||
with patch(
|
||||
"parsedmarc.utils.get_ip_address_db_record",
|
||||
return_value={
|
||||
"country": "US",
|
||||
"asn": 15169,
|
||||
"as_name": "Google LLC",
|
||||
"as_domain": "google.com",
|
||||
},
|
||||
):
|
||||
parsedmarc.utils.get_ip_address_info("192.0.2.2", offline=True, cache=cache)
|
||||
self.assertIn("192.0.2.2", cache)
|
||||
|
||||
def testIPinfoAPIPrimarySourceAndInvalidKeyIsFatal(self):
|
||||
"""With an API token configured, lookups hit the API first via the
|
||||
documented ?token= query param. A 401/403 response propagates as
|
||||
|
||||
Reference in New Issue
Block a user