From a3d8651b74ee91190dbbebd37a401ef834bebc75 Mon Sep 17 00:00:00 2001 From: Sean Whalen Date: Thu, 23 Apr 2026 17:00:55 -0400 Subject: [PATCH] Skip caching weak-fallback IP attributions MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit get_reverse_dns() swallows every DNSException as None, so a transient PTR lookup failure (timeout, SERVFAIL, socket error) is indistinguishable from a genuine no-PTR case. When that lands on the raw-as_name fallback branch (no map match for the ASN domain either), the weak result was getting cached in the 4-hour IP-info cache — locking in the misattribution even after the PTR became resolvable. Observed in the wild: 91.244.70.212 has PTR customer.evolus-ix.com (which the map correctly classifies as Evolus IX, ISP), but the user's dataset showed it with source_name = raw as_name and source_type = null — the signature of a transient PTR lookup failure that then got cached. Fix: skip the cache write when the row is in that specific weak-fallback state (reverse_dns=None AND type=None AND name=as_name). PTR-backed matches and ASN-domain matches are stable attributions and continue to be cached as before. Co-Authored-By: Claude Opus 4.7 (1M context) --- CHANGELOG.md | 6 ++++++ parsedmarc/constants.py | 2 +- parsedmarc/utils.py | 17 +++++++++++++++- tests.py | 43 +++++++++++++++++++++++++++++++++++++++++ 4 files changed, 66 insertions(+), 2 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 1a9d982..2d03717 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,11 @@ # Changelog +## 9.10.2 + +### Fixed + +- `get_ip_address_info()` no longer caches weak-fallback attributions (no PTR + no ASN-domain map match → raw `as_name` used as `source_name`, `source_type` left null). `get_reverse_dns()` swallows every `DNSException` as `None`, so a transient PTR lookup failure (timeout, SERVFAIL, socket error) is indistinguishable from a genuine no-PTR case at that layer — caching the weak result would poison the 4-hour cache with a misattribution that persisted even after the PTR became resolvable again. PTR-backed matches and ASN-domain matches (both stable attributions) are still cached as before; only the specific `reverse_dns=None AND type=None AND name=as_name` state skips the cache write so the next lookup retries. + ## 9.10.1 ### Fixed diff --git a/parsedmarc/constants.py b/parsedmarc/constants.py index d5bbebf..064c5c7 100644 --- a/parsedmarc/constants.py +++ b/parsedmarc/constants.py @@ -1,4 +1,4 @@ -__version__ = "9.10.1" +__version__ = "9.10.2" USER_AGENT = f"parsedmarc/{__version__}" diff --git a/parsedmarc/utils.py b/parsedmarc/utils.py index 4e880ff..deb6a24 100644 --- a/parsedmarc/utils.py +++ b/parsedmarc/utils.py @@ -952,7 +952,22 @@ def get_ip_address_info( # classification. Better than leaving the row unattributed. info["name"] = info["as_name"] - if cache is not None: + # Don't cache weak-fallback attributions — rows where we had no PTR AND + # the ASN domain wasn't in the map, so ``name`` is just the raw ``as_name`` + # from the MMDB. ``get_reverse_dns()`` swallows every ``DNSException`` as + # ``None``, so a transient PTR lookup failure (timeout, SERVFAIL, OSError) + # is indistinguishable from a real no-PTR case at this point. Caching the + # weak result would poison the 4-hour cache with a misattribution even + # after the PTR becomes resolvable again. Re-running on the next lookup + # is cheap and either produces a proper PTR-backed match or the same + # (still-best-effort) ASN attribution. + weak_fallback = ( + info["reverse_dns"] is None + and info["type"] is None + and info["name"] is not None + and info["name"] == info["as_name"] + ) + if cache is not None and not weak_fallback: cache[ip_address] = info logger.debug(f"IP address {ip_address} added to cache") diff --git a/tests.py b/tests.py index 54f34cb..dfd2933 100755 --- a/tests.py +++ b/tests.py @@ -274,6 +274,49 @@ class Test(unittest.TestCase): self.assertEqual(info["name"], "Some Unmapped Org, Inc.") self.assertEqual(info["as_domain"], "unmapped-for-this-test.example") + def testWeakFallbackAttributionIsNotCached(self): + """A transient PTR lookup failure that lands on the raw-as_name + fallback must not poison the cache. ``get_reverse_dns()`` swallows + every DNSException as ``None``, so a timeout looks identical to a + real no-PTR case — if we cached the weak attribution, the 4-hour + TTL would lock in a misattribution even after the PTR returns. + + PTR-backed matches and ASN-domain matches are stable attributions + and must still be cached, so we only skip the specific + ``reverse_dns=None AND type=None AND name=as_name`` state.""" + from unittest.mock import patch + from expiringdict import ExpiringDict + + cache = ExpiringDict(max_len=100, max_age_seconds=14400) + + # Scenario 1: weak fallback (no PTR, unmapped as_domain, raw as_name + # used). Must NOT be cached. + with patch( + "parsedmarc.utils.get_ip_address_db_record", + return_value={ + "country": "US", + "asn": 64496, + "as_name": "Some Unmapped Org, Inc.", + "as_domain": "unmapped-for-this-test.example", + }, + ): + parsedmarc.utils.get_ip_address_info("192.0.2.1", offline=True, cache=cache) + self.assertNotIn("192.0.2.1", cache) + + # Scenario 2: ASN-domain match (no PTR, as_domain IS in the map). + # Stable attribution — must still be cached. + with patch( + "parsedmarc.utils.get_ip_address_db_record", + return_value={ + "country": "US", + "asn": 15169, + "as_name": "Google LLC", + "as_domain": "google.com", + }, + ): + parsedmarc.utils.get_ip_address_info("192.0.2.2", offline=True, cache=cache) + self.assertIn("192.0.2.2", cache) + def testIPinfoAPIPrimarySourceAndInvalidKeyIsFatal(self): """With an API token configured, lookups hit the API first via the documented ?token= query param. A 401/403 response propagates as