diff --git a/CHANGELOG.md b/CHANGELOG.md index eae978d..149ed7f 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,22 @@ # Changelog +## 9.8.0 + +### Changes + +- Replaced the bundled DB-IP Country Lite database with the [IPinfo Lite] database (`parsedmarc/resources/ipinfo/ipinfo_lite.mmdb`, under the [Creative Commons Attribution-ShareAlike 4.0 License][cc-by-sa-4]) for greater IP-to-country lookup accuracy. The download URL / cached filename / packaged module path have all moved from `dbip/dbip-country-lite.mmdb` to `ipinfo/ipinfo_lite.mmdb`. +- `get_ip_address_country()` now reads MMDBs with `maxminddb` directly and handles both schemas — the IPinfo flat-top-level `country_code` field and the MaxMind/DBIP nested `country.iso_code` field — so users who drop in their own MMDB from any of these providers continue to work. The in-disk search list for user-supplied files still includes `ipinfo_lite.mmdb`, `GeoLite2-Country.mmdb`, and `dbip-country-lite*.mmdb`. +- Dropped the `geoip2` dependency (its only use was the `.country()` helper, which is incompatible with the IPinfo schema). Added `maxminddb` as a direct dependency — it was already installed transitively through `geoip2`, so this is a no-op for most environments. + +### Upgrade notes + +- Callers that imported `parsedmarc.resources.dbip` directly need to switch to `parsedmarc.resources.ipinfo`. The `parsedmarc.resources.dbip` module has been removed. +- Callers that imported `geoip2` only because `parsedmarc` depended on it will need to add it to their own requirements. `parsedmarc` itself no longer depends on `geoip2`. +- The auto-update download URL used by previous parsedmarc versions (`.../dbip/dbip-country-lite.mmdb`) is no longer hosted on `master`; those versions will fail to download and fall back to their bundled copy, which is the documented behavior of `load_ip_db()`. + +[IPinfo Lite]: https://ipinfo.io/lite +[cc-by-sa-4]: https://creativecommons.org/licenses/by-sa/4.0/deed.en + ## 9.7.1 ### Changes diff --git a/docs/source/installation.md b/docs/source/installation.md index b1f1a61..530906d 100644 --- a/docs/source/installation.md +++ b/docs/source/installation.md @@ -44,19 +44,20 @@ least: ### geoipupdate setup :::{note} -Starting in `parsedmarc` 7.1.0, a static copy of the -[IP to Country Lite database] from IPDB is distributed with -`parsedmarc`, under the terms of the -[Creative Commons Attribution 4.0 International License]. +Starting in `parsedmarc` 9.8.0, a static copy of the +[IPinfo Lite] database is distributed with `parsedmarc`, under the +terms of the [Creative Commons Attribution-ShareAlike 4.0 License], as a fallback if the [MaxMind GeoLite2 Country database] is not -installed. +installed. Prior versions bundled the DB-IP Country Lite database +instead; both share the same MMDB format, so users who have installed +either (or a MaxMind GeoLite2) database locally will continue to work +without changes. -Starting in `parsedmarc` 9.6.0, the bundled DB-IP database is -automatically updated at startup by downloading the latest copy from -GitHub, unless the `offline` flag is set. The database is cached -locally and refreshed on each run (or on `SIGHUP` in watch mode). -If the download fails, a previously cached copy or the bundled -database is used as a fallback. +The bundled database is automatically updated at startup by downloading +the latest copy from GitHub, unless the `offline` flag is set. The +database is cached locally and refreshed on each run (or on `SIGHUP` +in watch mode). If the download fails, a previously cached copy or the +bundled database is used as a fallback. The download URL can be overridden with the `ip_db_url` setting, and the location of a local database file can be overridden with the @@ -203,7 +204,8 @@ sudo apt-get install libemail-outlook-message-perl [Component "contrib"]: https://wiki.debian.org/SourcesList#Component [geoipupdate]: https://github.com/maxmind/geoipupdate [geoipupdate releases page on github]: https://github.com/maxmind/geoipupdate/releases -[ip to country lite database]: https://db-ip.com/db/download/ip-to-country-lite +[ipinfo lite]: https://ipinfo.io/lite +[creative commons attribution-sharealike 4.0 license]: https://creativecommons.org/licenses/by-sa/4.0/deed.en [license keys]: https://www.maxmind.com/en/accounts/current/license-key [maxmind geoipupdate page]: https://dev.maxmind.com/geoip/updating-databases/ [maxmind geolite2 country database]: https://dev.maxmind.com/geoip/geolite2-free-geolocation-data diff --git a/docs/source/usage.md b/docs/source/usage.md index c3b1584..27d682e 100644 --- a/docs/source/usage.md +++ b/docs/source/usage.md @@ -133,7 +133,7 @@ The full set of configuration options are: - `forensic_json_filename` - str: filename for the forensic JSON output file - `ip_db_path` - str: An optional custom path to a MMDB file - from MaxMind or DBIP + from IPinfo, MaxMind, or DBIP - `ip_db_url` - str: Overrides the default download URL for the IP-to-country database (env var: `PARSEDMARC_GENERAL_IP_DB_URL`) - `offline` - bool: Do not use online queries for geolocation diff --git a/parsedmarc/__init__.py b/parsedmarc/__init__.py index 12b6b70..f15293d 100644 --- a/parsedmarc/__init__.py +++ b/parsedmarc/__init__.py @@ -317,7 +317,7 @@ def _parse_report_record( always_use_local_files (bool): Do not download files reverse_dns_map_path (str): Path to a reverse DNS map file reverse_dns_map_url (str): URL to a reverse DNS map file - ip_db_path (str): Path to a MMDB file from MaxMind or DBIP + ip_db_path (str): Path to a MMDB file from IPinfo, MaxMind, or DBIP offline (bool): Do not query online for geolocation or DNS nameservers (list): A list of one or more nameservers to use (Cloudflare's public DNS resolvers by default) @@ -685,7 +685,7 @@ def parse_aggregate_report_xml( Args: xml (str): A string of DMARC aggregate report XML - ip_db_path (str): Path to a MMDB file from MaxMind or DBIP + ip_db_path (str): Path to a MMDB file from IPinfo, MaxMind, or DBIP always_use_local_files (bool): Do not download files reverse_dns_map_path (str): Path to a reverse DNS map file reverse_dns_map_url (str): URL to a reverse DNS map file @@ -1009,7 +1009,7 @@ def parse_aggregate_report_file( always_use_local_files (bool): Do not download files reverse_dns_map_path (str): Path to a reverse DNS map file reverse_dns_map_url (str): URL to a reverse DNS map file - ip_db_path (str): Path to a MMDB file from MaxMind or DBIP + ip_db_path (str): Path to a MMDB file from IPinfo, MaxMind, or DBIP nameservers (list): A list of one or more nameservers to use (Cloudflare's public DNS resolvers by default) dns_timeout (float): Sets the DNS timeout in seconds @@ -1257,7 +1257,7 @@ def parse_forensic_report( Args: feedback_report (str): A message's feedback report as a string sample (str): The RFC 822 headers or RFC 822 message sample - ip_db_path (str): Path to a MMDB file from MaxMind or DBIP + ip_db_path (str): Path to a MMDB file from IPinfo, MaxMind, or DBIP always_use_local_files (bool): Do not download files reverse_dns_map_path (str): Path to a reverse DNS map file reverse_dns_map_url (str): URL to a reverse DNS map file @@ -1493,7 +1493,7 @@ def parse_report_email( Args: input_: An emailed DMARC report in RFC 822 format, as bytes or a string - ip_db_path (str): Path to a MMDB file from MaxMind or DBIP + ip_db_path (str): Path to a MMDB file from IPinfo, MaxMind, or DBIP always_use_local_files (bool): Do not download files reverse_dns_map_path (str): Path to a reverse DNS map reverse_dns_map_url (str): URL to a reverse DNS map @@ -1715,7 +1715,7 @@ def parse_report_file( or other transient errors strip_attachment_payloads (bool): Remove attachment payloads from forensic report results - ip_db_path (str): Path to a MMDB file from MaxMind or DBIP + ip_db_path (str): Path to a MMDB file from IPinfo, MaxMind, or DBIP always_use_local_files (bool): Do not download files reverse_dns_map_path (str): Path to a reverse DNS map reverse_dns_map_url (str): URL to a reverse DNS map @@ -1814,7 +1814,7 @@ def get_dmarc_reports_from_mbox( always_use_local_files (bool): Do not download files reverse_dns_map_path (str): Path to a reverse DNS map file reverse_dns_map_url (str): URL to a reverse DNS map file - ip_db_path (str): Path to a MMDB file from MaxMind or DBIP + ip_db_path (str): Path to a MMDB file from IPinfo, MaxMind, or DBIP offline (bool): Do not make online queries for geolocation or DNS normalize_timespan_threshold_hours (float): Normalize timespans beyond this @@ -1907,7 +1907,7 @@ def get_dmarc_reports_from_mailbox( archive_folder (str): The folder to move processed mail to delete (bool): Delete messages after processing them test (bool): Do not move or delete messages after processing them - ip_db_path (str): Path to a MMDB file from MaxMind or DBIP + ip_db_path (str): Path to a MMDB file from IPinfo, MaxMind, or DBIP always_use_local_files (bool): Do not download files reverse_dns_map_path (str): Path to a reverse DNS map file reverse_dns_map_url (str): URL to a reverse DNS map file @@ -2249,7 +2249,7 @@ def watch_inbox( test (bool): Do not move or delete messages after processing them check_timeout (int): Number of seconds to wait for a IMAP IDLE response or the number of seconds until the next mail check - ip_db_path (str): Path to a MMDB file from MaxMind or DBIP + ip_db_path (str): Path to a MMDB file from IPinfo, MaxMind, or DBIP always_use_local_files (bool): Do not download files reverse_dns_map_path (str): Path to a reverse DNS map file reverse_dns_map_url (str): URL to a reverse DNS map file diff --git a/parsedmarc/constants.py b/parsedmarc/constants.py index 2058710..6039f1b 100644 --- a/parsedmarc/constants.py +++ b/parsedmarc/constants.py @@ -1,4 +1,4 @@ -__version__ = "9.7.1" +__version__ = "9.8.0" USER_AGENT = f"parsedmarc/{__version__}" diff --git a/parsedmarc/resources/dbip/README.md b/parsedmarc/resources/dbip/README.md deleted file mode 100644 index 85f222f..0000000 --- a/parsedmarc/resources/dbip/README.md +++ /dev/null @@ -1,7 +0,0 @@ -# About - -`dbip-country-lite.mmdb` is provided by [dbip][dbip] under a -[Creative Commons Attribution 4.0 International License][cc]. - -[dbip]: https://db-ip.com/db/download/ip-to-country-lite -[cc]: http://creativecommons.org/licenses/by/4.0/ diff --git a/parsedmarc/resources/dbip/dbip-country-lite.mmdb b/parsedmarc/resources/dbip/dbip-country-lite.mmdb deleted file mode 100755 index b7f4cfd..0000000 Binary files a/parsedmarc/resources/dbip/dbip-country-lite.mmdb and /dev/null differ diff --git a/parsedmarc/resources/ipinfo/README.md b/parsedmarc/resources/ipinfo/README.md new file mode 100644 index 0000000..d4b5304 --- /dev/null +++ b/parsedmarc/resources/ipinfo/README.md @@ -0,0 +1,7 @@ +# About + +`ipinfo_lite.mmdb` is provided by [IPinfo][ipinfo] under the +[Creative CommonsAttribution-ShareAlike 4.0 License][cc]. + +[ipinfo]: https://ipinfo.io/lite +[cc]: https://creativecommons.org/licenses/by-sa/4.0/deed.en diff --git a/parsedmarc/resources/dbip/__init__.py b/parsedmarc/resources/ipinfo/__init__.py similarity index 100% rename from parsedmarc/resources/dbip/__init__.py rename to parsedmarc/resources/ipinfo/__init__.py diff --git a/parsedmarc/resources/ipinfo/ipinfo_lite.mmdb b/parsedmarc/resources/ipinfo/ipinfo_lite.mmdb new file mode 100644 index 0000000..58ef036 Binary files /dev/null and b/parsedmarc/resources/ipinfo/ipinfo_lite.mmdb differ diff --git a/parsedmarc/utils.py b/parsedmarc/utils.py index 1df6ea2..9f85728 100644 --- a/parsedmarc/utils.py +++ b/parsedmarc/utils.py @@ -32,13 +32,12 @@ except ImportError: import dns.exception import dns.resolver import dns.reversename -import geoip2.database -import geoip2.errors +import maxminddb import publicsuffixlist import requests from dateutil.parser import parse as parse_date -import parsedmarc.resources.dbip +import parsedmarc.resources.ipinfo import parsedmarc.resources.maps from parsedmarc.constants import ( DEFAULT_DNS_MAX_RETRIES, @@ -416,8 +415,8 @@ def load_ip_db( if url is None: url = ( "https://github.com/domainaware/parsedmarc/raw/" - "refs/heads/master/parsedmarc/resources/dbip/" - "dbip-country-lite.mmdb" + "refs/heads/master/parsedmarc/resources/ipinfo/" + "ipinfo_lite.mmdb" ) if local_file_path is not None and os.path.isfile(local_file_path): @@ -426,7 +425,7 @@ def load_ip_db( return cache_dir = os.path.join(tempfile.gettempdir(), "parsedmarc") - cached_path = os.path.join(cache_dir, "dbip-country-lite.mmdb") + cached_path = os.path.join(cache_dir, "ipinfo_lite.mmdb") if not (offline or always_use_local_file): try: @@ -454,9 +453,7 @@ def load_ip_db( return # Final fallback: bundled copy - _IP_DB_PATH = str( - files(parsedmarc.resources.dbip).joinpath("dbip-country-lite.mmdb") - ) + _IP_DB_PATH = str(files(parsedmarc.resources.ipinfo).joinpath("ipinfo_lite.mmdb")) logger.info("Using bundled IP database") @@ -469,12 +466,13 @@ def get_ip_address_country( Args: ip_address (str): The IP address to query for - db_path (str): Path to a MMDB file from MaxMind or DBIP + db_path (str): Path to a MMDB file from IPinfo, MaxMind, or DBIP Returns: str: And ISO country code associated with the given IP address """ db_paths = [ + "ipinfo_lite.mmdb", "GeoLite2-Country.mmdb", "/usr/local/share/GeoIP/GeoLite2-Country.mmdb", "/usr/share/GeoIP/GeoLite2-Country.mmdb", @@ -490,12 +488,12 @@ def get_ip_address_country( if db_path is not None: if not os.path.isfile(db_path): - db_path = None logger.warning( f"No file exists at {db_path}. Falling back to an " - "included copy of the IPDB IP to Country " + "included copy of the IPinfo IP to Country " "Lite database." ) + db_path = None if db_path is None: for system_path in db_paths: @@ -508,21 +506,28 @@ def get_ip_address_country( db_path = _IP_DB_PATH else: db_path = str( - files(parsedmarc.resources.dbip).joinpath("dbip-country-lite.mmdb") + files(parsedmarc.resources.ipinfo).joinpath("ipinfo_lite.mmdb") ) db_age = datetime.now() - datetime.fromtimestamp(os.stat(db_path).st_mtime) if db_age > timedelta(days=30): logger.warning("IP database is more than a month old") - db_reader = geoip2.database.Reader(db_path) + db_reader = maxminddb.open_database(db_path) + record = db_reader.get(ip_address) - country = None - - try: - country = db_reader.country(ip_address).country.iso_code - except geoip2.errors.AddressNotFoundError: - pass + # Support both the IPinfo schema (flat top-level ``country_code``) and the + # MaxMind/DBIP schema (nested ``country.iso_code``) so users dropping in + # their own MMDB from any of these providers keeps working. + country: Optional[str] = None + if isinstance(record, dict): + code = record.get("country_code") + if code is None: + nested = record.get("country") + if isinstance(nested, dict): + code = nested.get("iso_code") + if isinstance(code, str): + country = code return country diff --git a/pyproject.toml b/pyproject.toml index a33018c..458ee06 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -39,7 +39,6 @@ dependencies = [ "elasticsearch-dsl==7.4.0", "elasticsearch<7.14.0", "expiringdict>=1.1.4", - "geoip2>=3.0.0", "google-api-core>=2.4.0", "google-api-python-client>=2.35.0", "google-auth-httplib2>=0.1.0", @@ -49,6 +48,7 @@ dependencies = [ "kafka-python-ng>=2.2.2", "lxml>=4.4.0", "mailsuite>=1.11.2", + "maxminddb>=2.0.0", "msgraph-core==0.2.2", "opensearch-py>=2.4.2,<=4.0.0", "publicsuffixlist>=0.10.0",