Auto-update DB-IP Country Lite database at startup

Download the latest DB-IP Country Lite mmdb from GitHub on startup and
SIGHUP, caching it locally, with fallback to a previously cached or
bundled copy. Skipped when the offline flag is set. Adds ip_db_url
config option (PARSEDMARC_GENERAL_IP_DB_URL) to override the download
URL. Bumps version to 9.6.0.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
Sean Whalen
2026-04-06 11:50:06 -04:00
parent 648fb93d6d
commit d1e8d3b3d0
6 changed files with 123 additions and 11 deletions

View File

@@ -1,5 +1,13 @@
# Changelog
## 9.6.0
### Changes
- The included DB-IP Country Lite database is now automatically updated at startup (and on SIGHUP in watch mode) by downloading the latest copy from GitHub, unless the `offline` flag is set. Falls back to a previously cached copy or the bundled database on failure. This allows the IP-to-country database to stay current without requiring a new package release.
- Updated the included DB-IP Country Lite database to the 2026-04 release.
- Added the `ip_db_url` configuration option (`PARSEDMARC_GENERAL_IP_DB_URL` env var) to override the default download URL for the IP-to-country database.
## 9.5.5
### Fixed

View File

@@ -49,11 +49,17 @@ Starting in `parsedmarc` 7.1.0, a static copy of the
`parsedmarc`, under the terms of the
[Creative Commons Attribution 4.0 International License].
as a fallback if the [MaxMind GeoLite2 Country database] is not
installed. However, `parsedmarc` cannot install updated versions of
these databases as they are released, so MaxMind's databases and the
[geoipupdate] tool is still the preferable solution.
installed.
The location of the database file can be overridden by using the
Starting in `parsedmarc` 9.6.0, the bundled DB-IP database is
automatically updated at startup by downloading the latest copy from
GitHub, unless the `offline` flag is set. The database is cached
locally and refreshed on each run (or on `SIGHUP` in watch mode).
If the download fails, a previously cached copy or the bundled
database is used as a fallback.
The download URL can be overridden with the `ip_db_url` setting, and
the location of a local database file can be overridden with the
`ip_db_path` setting.
:::

View File

@@ -134,9 +134,13 @@ The full set of configuration options are:
JSON output file
- `ip_db_path` - str: An optional custom path to a MMDB file
from MaxMind or DBIP
- `ip_db_url` - str: Overrides the default download URL for the
IP-to-country database (env var: `PARSEDMARC_GENERAL_IP_DB_URL`)
- `offline` - bool: Do not use online queries for geolocation
or DNS
- `always_use_local_files` - Disables the download of the reverse DNS map
or DNS. Also disables automatic downloading of the IP-to-country
database and reverse DNS map.
- `always_use_local_files` - Disables the download of the
IP-to-country database and reverse DNS map
- `local_reverse_dns_map_path` - Overrides the default local file path to use for the reverse DNS map
- `reverse_dns_map_url` - Overrides the default download URL for the reverse DNS map
- `nameservers` - str: A comma separated list of
@@ -751,7 +755,7 @@ for that batch have completed. The following settings are reloaded:
- Multi-tenant index prefix domain map (`index_prefix_domain_map` —
the referenced YAML file is re-read on reload)
- DNS and GeoIP settings (`nameservers`, `dns_timeout`, `ip_db_path`,
`offline`, etc.)
`ip_db_url`, `offline`, etc.)
- Processing flags (`strip_attachment_payloads`, `batch_size`,
`check_timeout`, etc.)
- Log level (`debug`, `verbose`, `warnings`, `silent`)

View File

@@ -54,6 +54,7 @@ from parsedmarc.utils import (
get_base_domain,
get_reverse_dns,
is_mbox,
load_ip_db,
load_reverse_dns_map,
)
@@ -388,6 +389,8 @@ def _parse_config(config: ConfigParser, opts):
opts.ip_db_path = _expand_path(general_config["ip_db_path"])
else:
opts.ip_db_path = None
if "ip_db_url" in general_config:
opts.ip_db_url = general_config["ip_db_url"]
if "always_use_local_files" in general_config:
opts.always_use_local_files = bool(
general_config.getboolean("always_use_local_files")
@@ -1806,6 +1809,7 @@ def _main():
log_file=args.log_file,
n_procs=1,
ip_db_path=None,
ip_db_url=None,
always_use_local_files=False,
reverse_dns_map_path=None,
reverse_dns_map_url=None,
@@ -1882,6 +1886,13 @@ def _main():
logger.info("Starting parsedmarc")
load_ip_db(
always_use_local_file=opts.always_use_local_files,
local_file_path=opts.ip_db_path,
url=opts.ip_db_url,
offline=opts.offline,
)
# Initialize output clients (with retry for transient connection errors)
clients = {}
max_retries = 4
@@ -2296,6 +2307,15 @@ def _main():
offline=new_opts.offline,
)
# Reload the IP database so changes to the
# db path/URL in the config take effect.
load_ip_db(
always_use_local_file=new_opts.always_use_local_files,
local_file_path=new_opts.ip_db_path,
url=new_opts.ip_db_url,
offline=new_opts.offline,
)
for k, v in vars(new_opts).items():
setattr(opts, k, v)

View File

@@ -1,3 +1,3 @@
__version__ = "9.5.5"
__version__ = "9.6.0"
USER_AGENT = f"parsedmarc/{__version__}"

View File

@@ -271,6 +271,75 @@ def human_timestamp_to_unix_timestamp(human_timestamp: str) -> int:
return int(human_timestamp_to_datetime(human_timestamp).timestamp())
_IP_DB_PATH: Optional[str] = None
def load_ip_db(
*,
always_use_local_file: bool = False,
local_file_path: Optional[str] = None,
url: Optional[str] = None,
offline: bool = False,
) -> None:
"""
Downloads the IP-to-country MMDB database from a URL and caches it
locally. Falls back to the bundled copy on failure or when offline.
Args:
always_use_local_file: Always use a local/bundled database file
local_file_path: Path to a local MMDB file
url: URL to the MMDB database file
offline: Do not make online requests
"""
global _IP_DB_PATH
if url is None:
url = (
"https://github.com/domainaware/parsedmarc/raw/"
"refs/heads/master/parsedmarc/resources/dbip/"
"dbip-country-lite.mmdb"
)
if local_file_path is not None and os.path.isfile(local_file_path):
_IP_DB_PATH = local_file_path
logger.info(f"Using local IP database at {local_file_path}")
return
cache_dir = os.path.join(tempfile.gettempdir(), "parsedmarc")
cached_path = os.path.join(cache_dir, "dbip-country-lite.mmdb")
if not (offline or always_use_local_file):
try:
logger.debug(f"Trying to fetch IP database from {url}...")
headers = {"User-Agent": USER_AGENT}
response = requests.get(url, headers=headers, timeout=60)
response.raise_for_status()
os.makedirs(cache_dir, exist_ok=True)
tmp_path = cached_path + ".tmp"
with open(tmp_path, "wb") as f:
f.write(response.content)
shutil.move(tmp_path, cached_path)
_IP_DB_PATH = cached_path
logger.info("IP database updated successfully")
return
except requests.exceptions.RequestException as e:
logger.warning(f"Failed to fetch IP database: {e}")
except Exception as e:
logger.warning(f"Failed to save IP database: {e}")
# Fall back to a previously cached copy if available
if os.path.isfile(cached_path):
_IP_DB_PATH = cached_path
logger.info("Using cached IP database")
return
# Final fallback: bundled copy
_IP_DB_PATH = str(
files(parsedmarc.resources.dbip).joinpath("dbip-country-lite.mmdb")
)
logger.info("Using bundled IP database")
def get_ip_address_country(
ip_address: str, *, db_path: Optional[str] = None
) -> Optional[str]:
@@ -315,9 +384,14 @@ def get_ip_address_country(
break
if db_path is None:
db_path = str(
files(parsedmarc.resources.dbip).joinpath("dbip-country-lite.mmdb")
)
if _IP_DB_PATH is not None:
db_path = _IP_DB_PATH
else:
db_path = str(
files(parsedmarc.resources.dbip).joinpath(
"dbip-country-lite.mmdb"
)
)
db_age = datetime.now() - datetime.fromtimestamp(os.stat(db_path).st_mtime)
if db_age > timedelta(days=30):