diff --git a/CHANGELOG.md b/CHANGELOG.md index a3cae01..c674c0d 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,13 @@ # Changelog +## 9.6.0 + +### Changes + +- The included DB-IP Country Lite database is now automatically updated at startup (and on SIGHUP in watch mode) by downloading the latest copy from GitHub, unless the `offline` flag is set. Falls back to a previously cached copy or the bundled database on failure. This allows the IP-to-country database to stay current without requiring a new package release. +- Updated the included DB-IP Country Lite database to the 2026-04 release. +- Added the `ip_db_url` configuration option (`PARSEDMARC_GENERAL_IP_DB_URL` env var) to override the default download URL for the IP-to-country database. + ## 9.5.5 ### Fixed diff --git a/docs/source/installation.md b/docs/source/installation.md index 5a8a519..b1f1a61 100644 --- a/docs/source/installation.md +++ b/docs/source/installation.md @@ -49,11 +49,17 @@ Starting in `parsedmarc` 7.1.0, a static copy of the `parsedmarc`, under the terms of the [Creative Commons Attribution 4.0 International License]. as a fallback if the [MaxMind GeoLite2 Country database] is not -installed. However, `parsedmarc` cannot install updated versions of -these databases as they are released, so MaxMind's databases and the -[geoipupdate] tool is still the preferable solution. +installed. -The location of the database file can be overridden by using the +Starting in `parsedmarc` 9.6.0, the bundled DB-IP database is +automatically updated at startup by downloading the latest copy from +GitHub, unless the `offline` flag is set. The database is cached +locally and refreshed on each run (or on `SIGHUP` in watch mode). +If the download fails, a previously cached copy or the bundled +database is used as a fallback. + +The download URL can be overridden with the `ip_db_url` setting, and +the location of a local database file can be overridden with the `ip_db_path` setting. ::: diff --git a/docs/source/usage.md b/docs/source/usage.md index 72116d8..96db44b 100644 --- a/docs/source/usage.md +++ b/docs/source/usage.md @@ -134,9 +134,13 @@ The full set of configuration options are: JSON output file - `ip_db_path` - str: An optional custom path to a MMDB file from MaxMind or DBIP + - `ip_db_url` - str: Overrides the default download URL for the + IP-to-country database (env var: `PARSEDMARC_GENERAL_IP_DB_URL`) - `offline` - bool: Do not use online queries for geolocation - or DNS - - `always_use_local_files` - Disables the download of the reverse DNS map + or DNS. Also disables automatic downloading of the IP-to-country + database and reverse DNS map. + - `always_use_local_files` - Disables the download of the + IP-to-country database and reverse DNS map - `local_reverse_dns_map_path` - Overrides the default local file path to use for the reverse DNS map - `reverse_dns_map_url` - Overrides the default download URL for the reverse DNS map - `nameservers` - str: A comma separated list of @@ -751,7 +755,7 @@ for that batch have completed. The following settings are reloaded: - Multi-tenant index prefix domain map (`index_prefix_domain_map` — the referenced YAML file is re-read on reload) - DNS and GeoIP settings (`nameservers`, `dns_timeout`, `ip_db_path`, - `offline`, etc.) + `ip_db_url`, `offline`, etc.) - Processing flags (`strip_attachment_payloads`, `batch_size`, `check_timeout`, etc.) - Log level (`debug`, `verbose`, `warnings`, `silent`) diff --git a/parsedmarc/cli.py b/parsedmarc/cli.py index f770f0f..92e2cdd 100644 --- a/parsedmarc/cli.py +++ b/parsedmarc/cli.py @@ -54,6 +54,7 @@ from parsedmarc.utils import ( get_base_domain, get_reverse_dns, is_mbox, + load_ip_db, load_reverse_dns_map, ) @@ -388,6 +389,8 @@ def _parse_config(config: ConfigParser, opts): opts.ip_db_path = _expand_path(general_config["ip_db_path"]) else: opts.ip_db_path = None + if "ip_db_url" in general_config: + opts.ip_db_url = general_config["ip_db_url"] if "always_use_local_files" in general_config: opts.always_use_local_files = bool( general_config.getboolean("always_use_local_files") @@ -1806,6 +1809,7 @@ def _main(): log_file=args.log_file, n_procs=1, ip_db_path=None, + ip_db_url=None, always_use_local_files=False, reverse_dns_map_path=None, reverse_dns_map_url=None, @@ -1882,6 +1886,13 @@ def _main(): logger.info("Starting parsedmarc") + load_ip_db( + always_use_local_file=opts.always_use_local_files, + local_file_path=opts.ip_db_path, + url=opts.ip_db_url, + offline=opts.offline, + ) + # Initialize output clients (with retry for transient connection errors) clients = {} max_retries = 4 @@ -2296,6 +2307,15 @@ def _main(): offline=new_opts.offline, ) + # Reload the IP database so changes to the + # db path/URL in the config take effect. + load_ip_db( + always_use_local_file=new_opts.always_use_local_files, + local_file_path=new_opts.ip_db_path, + url=new_opts.ip_db_url, + offline=new_opts.offline, + ) + for k, v in vars(new_opts).items(): setattr(opts, k, v) diff --git a/parsedmarc/constants.py b/parsedmarc/constants.py index 234d443..204a56d 100644 --- a/parsedmarc/constants.py +++ b/parsedmarc/constants.py @@ -1,3 +1,3 @@ -__version__ = "9.5.5" +__version__ = "9.6.0" USER_AGENT = f"parsedmarc/{__version__}" diff --git a/parsedmarc/utils.py b/parsedmarc/utils.py index e763a71..6f2ec7e 100644 --- a/parsedmarc/utils.py +++ b/parsedmarc/utils.py @@ -271,6 +271,75 @@ def human_timestamp_to_unix_timestamp(human_timestamp: str) -> int: return int(human_timestamp_to_datetime(human_timestamp).timestamp()) +_IP_DB_PATH: Optional[str] = None + + +def load_ip_db( + *, + always_use_local_file: bool = False, + local_file_path: Optional[str] = None, + url: Optional[str] = None, + offline: bool = False, +) -> None: + """ + Downloads the IP-to-country MMDB database from a URL and caches it + locally. Falls back to the bundled copy on failure or when offline. + + Args: + always_use_local_file: Always use a local/bundled database file + local_file_path: Path to a local MMDB file + url: URL to the MMDB database file + offline: Do not make online requests + """ + global _IP_DB_PATH + + if url is None: + url = ( + "https://github.com/domainaware/parsedmarc/raw/" + "refs/heads/master/parsedmarc/resources/dbip/" + "dbip-country-lite.mmdb" + ) + + if local_file_path is not None and os.path.isfile(local_file_path): + _IP_DB_PATH = local_file_path + logger.info(f"Using local IP database at {local_file_path}") + return + + cache_dir = os.path.join(tempfile.gettempdir(), "parsedmarc") + cached_path = os.path.join(cache_dir, "dbip-country-lite.mmdb") + + if not (offline or always_use_local_file): + try: + logger.debug(f"Trying to fetch IP database from {url}...") + headers = {"User-Agent": USER_AGENT} + response = requests.get(url, headers=headers, timeout=60) + response.raise_for_status() + os.makedirs(cache_dir, exist_ok=True) + tmp_path = cached_path + ".tmp" + with open(tmp_path, "wb") as f: + f.write(response.content) + shutil.move(tmp_path, cached_path) + _IP_DB_PATH = cached_path + logger.info("IP database updated successfully") + return + except requests.exceptions.RequestException as e: + logger.warning(f"Failed to fetch IP database: {e}") + except Exception as e: + logger.warning(f"Failed to save IP database: {e}") + + # Fall back to a previously cached copy if available + if os.path.isfile(cached_path): + _IP_DB_PATH = cached_path + logger.info("Using cached IP database") + return + + # Final fallback: bundled copy + _IP_DB_PATH = str( + files(parsedmarc.resources.dbip).joinpath("dbip-country-lite.mmdb") + ) + logger.info("Using bundled IP database") + + def get_ip_address_country( ip_address: str, *, db_path: Optional[str] = None ) -> Optional[str]: @@ -315,9 +384,14 @@ def get_ip_address_country( break if db_path is None: - db_path = str( - files(parsedmarc.resources.dbip).joinpath("dbip-country-lite.mmdb") - ) + if _IP_DB_PATH is not None: + db_path = _IP_DB_PATH + else: + db_path = str( + files(parsedmarc.resources.dbip).joinpath( + "dbip-country-lite.mmdb" + ) + ) db_age = datetime.now() - datetime.fromtimestamp(os.stat(db_path).st_mtime) if db_age > timedelta(days=30):