Files
parsedmarc/_modules/parsedmarc/utils.html
T
Sean Whalen b0da277f0a Update docs
2026-06-13 20:45:07 -04:00

1395 lines
153 KiB
HTML

<!DOCTYPE html>
<html class="writer-html5" lang="en" data-content_root="../../">
<head>
<meta charset="utf-8" />
<meta name="viewport" content="width=device-width, initial-scale=1.0" />
<title>parsedmarc.utils &mdash; parsedmarc 10.1.1 documentation</title>
<link rel="stylesheet" type="text/css" href="../../_static/pygments.css?v=b86133f3" />
<link rel="stylesheet" type="text/css" href="../../_static/css/theme.css?v=e59714d7" />
<script src="../../_static/jquery.js?v=5d32c60e"></script>
<script src="../../_static/_sphinx_javascript_frameworks_compat.js?v=2cd50e6c"></script>
<script src="../../_static/documentation_options.js?v=e6f446a4"></script>
<script src="../../_static/doctools.js?v=9bcbadda"></script>
<script src="../../_static/sphinx_highlight.js?v=dc90522c"></script>
<script src="../../_static/js/theme.js"></script>
<link rel="index" title="Index" href="../../genindex.html" />
<link rel="search" title="Search" href="../../search.html" />
</head>
<body class="wy-body-for-nav">
<div class="wy-grid-for-nav">
<nav data-toggle="wy-nav-shift" class="wy-nav-side">
<div class="wy-side-scroll">
<div class="wy-side-nav-search" >
<a href="../../index.html" class="icon icon-home">
parsedmarc
</a>
<div role="search">
<form id="rtd-search-form" class="wy-form" action="../../search.html" method="get">
<input type="text" name="q" placeholder="Search docs" aria-label="Search docs" />
<input type="hidden" name="check_keywords" value="yes" />
<input type="hidden" name="area" value="default" />
</form>
</div>
</div><div class="wy-menu wy-menu-vertical" data-spy="affix" role="navigation" aria-label="Navigation menu">
<p class="caption" role="heading"><span class="caption-text">Contents</span></p>
<ul>
<li class="toctree-l1"><a class="reference internal" href="../../installation.html">Installation</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../usage.html">Using parsedmarc</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../output.html">Sample outputs</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../elasticsearch.html">Elasticsearch and Kibana</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../opensearch.html">OpenSearch and Grafana</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../kibana.html">Using the Kibana dashboards</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../splunk.html">Splunk</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../davmail.html">Accessing an inbox using OWA/EWS</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../dmarc.html">Understanding DMARC</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../contributing.html">Contributing to parsedmarc</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../api.html">API reference</a></li>
</ul>
</div>
</div>
</nav>
<section data-toggle="wy-nav-shift" class="wy-nav-content-wrap"><nav class="wy-nav-top" aria-label="Mobile navigation menu" >
<i data-toggle="wy-nav-top" class="fa fa-bars"></i>
<a href="../../index.html">parsedmarc</a>
</nav>
<div class="wy-nav-content">
<div class="rst-content">
<div role="navigation" aria-label="Page navigation">
<ul class="wy-breadcrumbs">
<li><a href="../../index.html" class="icon icon-home" aria-label="Home"></a></li>
<li class="breadcrumb-item"><a href="../index.html">Module code</a></li>
<li class="breadcrumb-item"><a href="../parsedmarc.html">parsedmarc</a></li>
<li class="breadcrumb-item active">parsedmarc.utils</li>
<li class="wy-breadcrumbs-aside">
</li>
</ul>
<hr/>
</div>
<div role="main" class="document" itemscope="itemscope" itemtype="http://schema.org/Article">
<div itemprop="articleBody">
<h1>Source code for parsedmarc.utils</h1><div class="highlight"><pre>
<span></span><span class="c1"># -*- coding: utf-8 -*-</span>
<span class="sd">&quot;&quot;&quot;Utility functions that might be useful for other projects&quot;&quot;&quot;</span>
<span class="kn">from</span><span class="w"> </span><span class="nn">__future__</span><span class="w"> </span><span class="kn">import</span> <span class="n">annotations</span>
<span class="kn">import</span><span class="w"> </span><span class="nn">base64</span>
<span class="kn">import</span><span class="w"> </span><span class="nn">csv</span>
<span class="kn">import</span><span class="w"> </span><span class="nn">hashlib</span>
<span class="kn">import</span><span class="w"> </span><span class="nn">io</span>
<span class="kn">import</span><span class="w"> </span><span class="nn">json</span>
<span class="kn">import</span><span class="w"> </span><span class="nn">logging</span>
<span class="kn">import</span><span class="w"> </span><span class="nn">mailbox</span>
<span class="kn">import</span><span class="w"> </span><span class="nn">os</span>
<span class="kn">import</span><span class="w"> </span><span class="nn">re</span>
<span class="kn">import</span><span class="w"> </span><span class="nn">shutil</span>
<span class="kn">import</span><span class="w"> </span><span class="nn">subprocess</span>
<span class="kn">import</span><span class="w"> </span><span class="nn">tempfile</span>
<span class="kn">from</span><span class="w"> </span><span class="nn">datetime</span><span class="w"> </span><span class="kn">import</span> <span class="n">datetime</span><span class="p">,</span> <span class="n">timedelta</span><span class="p">,</span> <span class="n">timezone</span>
<span class="kn">from</span><span class="w"> </span><span class="nn">typing</span><span class="w"> </span><span class="kn">import</span> <span class="n">Optional</span><span class="p">,</span> <span class="n">TypedDict</span><span class="p">,</span> <span class="n">Union</span><span class="p">,</span> <span class="n">cast</span>
<span class="kn">import</span><span class="w"> </span><span class="nn">mailparser</span>
<span class="kn">from</span><span class="w"> </span><span class="nn">expiringdict</span><span class="w"> </span><span class="kn">import</span> <span class="n">ExpiringDict</span>
<span class="k">try</span><span class="p">:</span>
<span class="kn">from</span><span class="w"> </span><span class="nn">importlib.resources</span><span class="w"> </span><span class="kn">import</span> <span class="n">files</span>
<span class="k">except</span> <span class="ne">ImportError</span><span class="p">:</span>
<span class="c1"># Try backported to PY&lt;3 `importlib_resources`</span>
<span class="kn">from</span><span class="w"> </span><span class="nn">importlib.resources</span><span class="w"> </span><span class="kn">import</span> <span class="n">files</span>
<span class="kn">import</span><span class="w"> </span><span class="nn">dns.exception</span>
<span class="kn">import</span><span class="w"> </span><span class="nn">dns.resolver</span>
<span class="kn">import</span><span class="w"> </span><span class="nn">dns.reversename</span>
<span class="kn">import</span><span class="w"> </span><span class="nn">maxminddb</span>
<span class="kn">import</span><span class="w"> </span><span class="nn">publicsuffixlist</span>
<span class="kn">import</span><span class="w"> </span><span class="nn">requests</span>
<span class="kn">from</span><span class="w"> </span><span class="nn">dateutil.parser</span><span class="w"> </span><span class="kn">import</span> <span class="n">parse</span> <span class="k">as</span> <span class="n">parse_date</span>
<span class="kn">import</span><span class="w"> </span><span class="nn">parsedmarc.resources.ipinfo</span>
<span class="kn">import</span><span class="w"> </span><span class="nn">parsedmarc.resources.maps</span>
<span class="kn">from</span><span class="w"> </span><span class="nn">parsedmarc.constants</span><span class="w"> </span><span class="kn">import</span> <span class="p">(</span>
<span class="n">DEFAULT_DNS_MAX_RETRIES</span><span class="p">,</span>
<span class="n">DEFAULT_DNS_TIMEOUT</span><span class="p">,</span>
<span class="n">USER_AGENT</span><span class="p">,</span>
<span class="p">)</span>
<span class="kn">from</span><span class="w"> </span><span class="nn">parsedmarc.log</span><span class="w"> </span><span class="kn">import</span> <span class="n">logger</span>
<span class="c1"># Errors considered transient and retryable by query_dns. LifetimeTimeout is</span>
<span class="c1"># dnspython&#39;s deadline expiry; NoNameservers typically wraps a SERVFAIL from</span>
<span class="c1"># upstream; OSError covers socket-level failures during TCP fallback.</span>
<span class="n">_RETRYABLE_DNS_ERRORS</span> <span class="o">=</span> <span class="p">(</span>
<span class="n">dns</span><span class="o">.</span><span class="n">resolver</span><span class="o">.</span><span class="n">LifetimeTimeout</span><span class="p">,</span>
<span class="n">dns</span><span class="o">.</span><span class="n">resolver</span><span class="o">.</span><span class="n">NoNameservers</span><span class="p">,</span>
<span class="ne">OSError</span><span class="p">,</span>
<span class="p">)</span>
<span class="n">parenthesis_regex</span> <span class="o">=</span> <span class="n">re</span><span class="o">.</span><span class="n">compile</span><span class="p">(</span><span class="sa">r</span><span class="s2">&quot;\s*\(.*\)\s*&quot;</span><span class="p">)</span>
<span class="n">null_file</span> <span class="o">=</span> <span class="n">subprocess</span><span class="o">.</span><span class="n">DEVNULL</span>
<span class="n">mailparser_logger</span> <span class="o">=</span> <span class="n">logging</span><span class="o">.</span><span class="n">getLogger</span><span class="p">(</span><span class="s2">&quot;mailparser&quot;</span><span class="p">)</span>
<span class="n">mailparser_logger</span><span class="o">.</span><span class="n">setLevel</span><span class="p">(</span><span class="n">logging</span><span class="o">.</span><span class="n">CRITICAL</span><span class="p">)</span>
<span class="n">psl</span> <span class="o">=</span> <span class="n">publicsuffixlist</span><span class="o">.</span><span class="n">PublicSuffixList</span><span class="p">()</span>
<span class="n">psl_overrides</span><span class="p">:</span> <span class="nb">list</span><span class="p">[</span><span class="nb">str</span><span class="p">]</span> <span class="o">=</span> <span class="p">[]</span>
<div class="viewcode-block" id="load_psl_overrides">
<a class="viewcode-back" href="../../api.html#parsedmarc.utils.load_psl_overrides">[docs]</a>
<span class="k">def</span><span class="w"> </span><span class="nf">load_psl_overrides</span><span class="p">(</span>
<span class="o">*</span><span class="p">,</span>
<span class="n">always_use_local_file</span><span class="p">:</span> <span class="nb">bool</span> <span class="o">=</span> <span class="kc">False</span><span class="p">,</span>
<span class="n">local_file_path</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="nb">str</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span>
<span class="n">url</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="nb">str</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span>
<span class="n">offline</span><span class="p">:</span> <span class="nb">bool</span> <span class="o">=</span> <span class="kc">False</span><span class="p">,</span>
<span class="p">)</span> <span class="o">-&gt;</span> <span class="nb">list</span><span class="p">[</span><span class="nb">str</span><span class="p">]:</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Loads the PSL overrides list from a URL or local file.</span>
<span class="sd"> Clears and repopulates the module-level ``psl_overrides`` list in place,</span>
<span class="sd"> then returns it. The URL is tried first; on failure (or when</span>
<span class="sd"> ``offline``/``always_use_local_file`` is set) the local path is used,</span>
<span class="sd"> defaulting to the bundled ``psl_overrides.txt``.</span>
<span class="sd"> Args:</span>
<span class="sd"> always_use_local_file (bool): Always use a local overrides file</span>
<span class="sd"> local_file_path (str): Path to a local overrides file</span>
<span class="sd"> url (str): URL to a PSL overrides file</span>
<span class="sd"> offline (bool): Use the built-in copy of the overrides</span>
<span class="sd"> Returns:</span>
<span class="sd"> list[str]: the module-level ``psl_overrides`` list</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="k">if</span> <span class="n">url</span> <span class="ow">is</span> <span class="kc">None</span><span class="p">:</span>
<span class="n">url</span> <span class="o">=</span> <span class="p">(</span>
<span class="s2">&quot;https://raw.githubusercontent.com/domainaware&quot;</span>
<span class="s2">&quot;/parsedmarc/master/parsedmarc/&quot;</span>
<span class="s2">&quot;resources/maps/psl_overrides.txt&quot;</span>
<span class="p">)</span>
<span class="n">psl_overrides</span><span class="o">.</span><span class="n">clear</span><span class="p">()</span>
<span class="k">def</span><span class="w"> </span><span class="nf">_load_text</span><span class="p">(</span><span class="n">text</span><span class="p">:</span> <span class="nb">str</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="kc">None</span><span class="p">:</span>
<span class="k">for</span> <span class="n">line</span> <span class="ow">in</span> <span class="n">text</span><span class="o">.</span><span class="n">splitlines</span><span class="p">():</span>
<span class="n">s</span> <span class="o">=</span> <span class="n">line</span><span class="o">.</span><span class="n">strip</span><span class="p">()</span>
<span class="k">if</span> <span class="n">s</span><span class="p">:</span>
<span class="n">psl_overrides</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="n">s</span><span class="p">)</span>
<span class="k">if</span> <span class="ow">not</span> <span class="p">(</span><span class="n">offline</span> <span class="ow">or</span> <span class="n">always_use_local_file</span><span class="p">):</span>
<span class="k">try</span><span class="p">:</span>
<span class="n">logger</span><span class="o">.</span><span class="n">debug</span><span class="p">(</span><span class="sa">f</span><span class="s2">&quot;Trying to fetch PSL overrides from </span><span class="si">{</span><span class="n">url</span><span class="si">}</span><span class="s2">...&quot;</span><span class="p">)</span>
<span class="n">headers</span> <span class="o">=</span> <span class="p">{</span><span class="s2">&quot;User-Agent&quot;</span><span class="p">:</span> <span class="n">USER_AGENT</span><span class="p">}</span>
<span class="n">response</span> <span class="o">=</span> <span class="n">requests</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="n">url</span><span class="p">,</span> <span class="n">headers</span><span class="o">=</span><span class="n">headers</span><span class="p">)</span>
<span class="n">response</span><span class="o">.</span><span class="n">raise_for_status</span><span class="p">()</span>
<span class="n">_load_text</span><span class="p">(</span><span class="n">response</span><span class="o">.</span><span class="n">text</span><span class="p">)</span>
<span class="k">except</span> <span class="n">requests</span><span class="o">.</span><span class="n">exceptions</span><span class="o">.</span><span class="n">RequestException</span> <span class="k">as</span> <span class="n">e</span><span class="p">:</span>
<span class="n">logger</span><span class="o">.</span><span class="n">warning</span><span class="p">(</span><span class="sa">f</span><span class="s2">&quot;Failed to fetch PSL overrides: </span><span class="si">{</span><span class="n">e</span><span class="si">}</span><span class="s2">&quot;</span><span class="p">)</span>
<span class="k">if</span> <span class="nb">len</span><span class="p">(</span><span class="n">psl_overrides</span><span class="p">)</span> <span class="o">==</span> <span class="mi">0</span><span class="p">:</span>
<span class="n">path</span> <span class="o">=</span> <span class="n">local_file_path</span> <span class="ow">or</span> <span class="nb">str</span><span class="p">(</span>
<span class="n">files</span><span class="p">(</span><span class="n">parsedmarc</span><span class="o">.</span><span class="n">resources</span><span class="o">.</span><span class="n">maps</span><span class="p">)</span><span class="o">.</span><span class="n">joinpath</span><span class="p">(</span><span class="s2">&quot;psl_overrides.txt&quot;</span><span class="p">)</span>
<span class="p">)</span>
<span class="n">logger</span><span class="o">.</span><span class="n">info</span><span class="p">(</span><span class="sa">f</span><span class="s2">&quot;Loading PSL overrides from </span><span class="si">{</span><span class="n">path</span><span class="si">}</span><span class="s2">&quot;</span><span class="p">)</span>
<span class="k">with</span> <span class="nb">open</span><span class="p">(</span><span class="n">path</span><span class="p">,</span> <span class="n">encoding</span><span class="o">=</span><span class="s2">&quot;utf-8&quot;</span><span class="p">)</span> <span class="k">as</span> <span class="n">f</span><span class="p">:</span>
<span class="n">_load_text</span><span class="p">(</span><span class="n">f</span><span class="o">.</span><span class="n">read</span><span class="p">())</span>
<span class="k">return</span> <span class="n">psl_overrides</span></div>
<span class="c1"># Bootstrap with the bundled file at import time — no network call.</span>
<span class="n">load_psl_overrides</span><span class="p">(</span><span class="n">offline</span><span class="o">=</span><span class="kc">True</span><span class="p">)</span>
<div class="viewcode-block" id="EmailParserError">
<a class="viewcode-back" href="../../api.html#parsedmarc.utils.EmailParserError">[docs]</a>
<span class="k">class</span><span class="w"> </span><span class="nc">EmailParserError</span><span class="p">(</span><span class="ne">RuntimeError</span><span class="p">):</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;Raised when an error parsing the email occurs&quot;&quot;&quot;</span></div>
<div class="viewcode-block" id="DownloadError">
<a class="viewcode-back" href="../../api.html#parsedmarc.utils.DownloadError">[docs]</a>
<span class="k">class</span><span class="w"> </span><span class="nc">DownloadError</span><span class="p">(</span><span class="ne">RuntimeError</span><span class="p">):</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;Raised when an error occurs when downloading a file&quot;&quot;&quot;</span></div>
<div class="viewcode-block" id="ReverseDNSService">
<a class="viewcode-back" href="../../api.html#parsedmarc.utils.ReverseDNSService">[docs]</a>
<span class="k">class</span><span class="w"> </span><span class="nc">ReverseDNSService</span><span class="p">(</span><span class="n">TypedDict</span><span class="p">):</span>
<span class="n">name</span><span class="p">:</span> <span class="nb">str</span>
<span class="nb">type</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="nb">str</span><span class="p">]</span></div>
<span class="n">ReverseDNSMap</span> <span class="o">=</span> <span class="nb">dict</span><span class="p">[</span><span class="nb">str</span><span class="p">,</span> <span class="n">ReverseDNSService</span><span class="p">]</span>
<div class="viewcode-block" id="IPAddressInfo">
<a class="viewcode-back" href="../../api.html#parsedmarc.utils.IPAddressInfo">[docs]</a>
<span class="k">class</span><span class="w"> </span><span class="nc">IPAddressInfo</span><span class="p">(</span><span class="n">TypedDict</span><span class="p">):</span>
<span class="n">ip_address</span><span class="p">:</span> <span class="nb">str</span>
<span class="n">reverse_dns</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="nb">str</span><span class="p">]</span>
<span class="n">country</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="nb">str</span><span class="p">]</span>
<span class="n">base_domain</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="nb">str</span><span class="p">]</span>
<span class="n">name</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="nb">str</span><span class="p">]</span>
<span class="nb">type</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="nb">str</span><span class="p">]</span>
<span class="n">asn</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="nb">int</span><span class="p">]</span>
<span class="n">as_name</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="nb">str</span><span class="p">]</span>
<span class="n">as_domain</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="nb">str</span><span class="p">]</span></div>
<div class="viewcode-block" id="decode_base64">
<a class="viewcode-back" href="../../api.html#parsedmarc.utils.decode_base64">[docs]</a>
<span class="k">def</span><span class="w"> </span><span class="nf">decode_base64</span><span class="p">(</span><span class="n">data</span><span class="p">:</span> <span class="nb">str</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="nb">bytes</span><span class="p">:</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Decodes a base64 string, with padding being optional</span>
<span class="sd"> Args:</span>
<span class="sd"> data (str): A base64 encoded string</span>
<span class="sd"> Returns:</span>
<span class="sd"> bytes: The decoded bytes</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="n">data_bytes</span> <span class="o">=</span> <span class="nb">bytes</span><span class="p">(</span><span class="n">data</span><span class="p">,</span> <span class="n">encoding</span><span class="o">=</span><span class="s2">&quot;ascii&quot;</span><span class="p">)</span>
<span class="n">missing_padding</span> <span class="o">=</span> <span class="nb">len</span><span class="p">(</span><span class="n">data_bytes</span><span class="p">)</span> <span class="o">%</span> <span class="mi">4</span>
<span class="k">if</span> <span class="n">missing_padding</span> <span class="o">!=</span> <span class="mi">0</span><span class="p">:</span>
<span class="n">data_bytes</span> <span class="o">+=</span> <span class="sa">b</span><span class="s2">&quot;=&quot;</span> <span class="o">*</span> <span class="p">(</span><span class="mi">4</span> <span class="o">-</span> <span class="n">missing_padding</span><span class="p">)</span>
<span class="k">return</span> <span class="n">base64</span><span class="o">.</span><span class="n">b64decode</span><span class="p">(</span><span class="n">data_bytes</span><span class="p">)</span></div>
<div class="viewcode-block" id="get_base_domain">
<a class="viewcode-back" href="../../api.html#parsedmarc.utils.get_base_domain">[docs]</a>
<span class="k">def</span><span class="w"> </span><span class="nf">get_base_domain</span><span class="p">(</span><span class="n">domain</span><span class="p">:</span> <span class="nb">str</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="n">Optional</span><span class="p">[</span><span class="nb">str</span><span class="p">]:</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Gets the base domain name for the given domain</span>
<span class="sd"> .. note::</span>
<span class="sd"> Results are based on a list of public domain suffixes at</span>
<span class="sd"> https://publicsuffix.org/list/public_suffix_list.dat and overrides included in</span>
<span class="sd"> parsedmarc.resources.maps.psl_overrides.txt</span>
<span class="sd"> Args:</span>
<span class="sd"> domain (str): A domain or subdomain</span>
<span class="sd"> Returns:</span>
<span class="sd"> str: The base domain of the given domain</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="n">domain</span> <span class="o">=</span> <span class="n">domain</span><span class="o">.</span><span class="n">lower</span><span class="p">()</span>
<span class="n">publicsuffix</span> <span class="o">=</span> <span class="n">psl</span><span class="o">.</span><span class="n">privatesuffix</span><span class="p">(</span><span class="n">domain</span><span class="p">)</span>
<span class="k">for</span> <span class="n">override</span> <span class="ow">in</span> <span class="n">psl_overrides</span><span class="p">:</span>
<span class="k">if</span> <span class="n">domain</span><span class="o">.</span><span class="n">endswith</span><span class="p">(</span><span class="n">override</span><span class="p">):</span>
<span class="k">return</span> <span class="n">override</span><span class="o">.</span><span class="n">strip</span><span class="p">(</span><span class="s2">&quot;.&quot;</span><span class="p">)</span><span class="o">.</span><span class="n">strip</span><span class="p">(</span><span class="s2">&quot;-&quot;</span><span class="p">)</span>
<span class="k">return</span> <span class="n">publicsuffix</span></div>
<div class="viewcode-block" id="query_dns">
<a class="viewcode-back" href="../../api.html#parsedmarc.utils.query_dns">[docs]</a>
<span class="k">def</span><span class="w"> </span><span class="nf">query_dns</span><span class="p">(</span>
<span class="n">domain</span><span class="p">:</span> <span class="nb">str</span><span class="p">,</span>
<span class="n">record_type</span><span class="p">:</span> <span class="nb">str</span><span class="p">,</span>
<span class="o">*</span><span class="p">,</span>
<span class="n">cache</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="n">ExpiringDict</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span>
<span class="n">nameservers</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="nb">list</span><span class="p">[</span><span class="nb">str</span><span class="p">]]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span>
<span class="n">timeout</span><span class="p">:</span> <span class="nb">float</span> <span class="o">=</span> <span class="n">DEFAULT_DNS_TIMEOUT</span><span class="p">,</span>
<span class="n">retries</span><span class="p">:</span> <span class="nb">int</span> <span class="o">=</span> <span class="n">DEFAULT_DNS_MAX_RETRIES</span><span class="p">,</span>
<span class="n">_attempt</span><span class="p">:</span> <span class="nb">int</span> <span class="o">=</span> <span class="mi">0</span><span class="p">,</span>
<span class="p">)</span> <span class="o">-&gt;</span> <span class="nb">list</span><span class="p">[</span><span class="nb">str</span><span class="p">]:</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Queries DNS</span>
<span class="sd"> Args:</span>
<span class="sd"> domain (str): The domain or subdomain to query about</span>
<span class="sd"> record_type (str): The record type to query for</span>
<span class="sd"> cache (ExpiringDict): Cache storage</span>
<span class="sd"> nameservers (list): A list of one or more nameservers to use</span>
<span class="sd"> (Cloudflare&#39;s public DNS resolvers by default). Pass</span>
<span class="sd"> ``parsedmarc.constants.RECOMMENDED_DNS_NAMESERVERS`` for a</span>
<span class="sd"> cross-provider mix that fails over when one provider&#39;s path is</span>
<span class="sd"> slow or broken.</span>
<span class="sd"> timeout (float): Overall DNS lifetime budget in seconds per</span>
<span class="sd"> configured nameserver. Per-query UDP attempts are capped at</span>
<span class="sd"> ``min(1.0, timeout)`` so dnspython retries within the lifetime on</span>
<span class="sd"> transient UDP packet loss (mirroring ``dig``&#39;s default</span>
<span class="sd"> ``+tries=3`` behavior); with multiple nameservers configured this</span>
<span class="sd"> same cap also makes a slow or broken nameserver fall through to</span>
<span class="sd"> the next quickly.</span>
<span class="sd"> retries (int): Number of times to retry the whole query after a</span>
<span class="sd"> timeout or other transient error (``LifetimeTimeout``,</span>
<span class="sd"> ``NoNameservers``, ``OSError``). Failover between configured</span>
<span class="sd"> nameservers happens within each attempt.</span>
<span class="sd"> Returns:</span>
<span class="sd"> list: A list of answers</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="n">domain</span> <span class="o">=</span> <span class="nb">str</span><span class="p">(</span><span class="n">domain</span><span class="p">)</span><span class="o">.</span><span class="n">lower</span><span class="p">()</span>
<span class="n">record_type</span> <span class="o">=</span> <span class="n">record_type</span><span class="o">.</span><span class="n">upper</span><span class="p">()</span>
<span class="n">cache_key</span> <span class="o">=</span> <span class="s2">&quot;</span><span class="si">{0}</span><span class="s2">_</span><span class="si">{1}</span><span class="s2">&quot;</span><span class="o">.</span><span class="n">format</span><span class="p">(</span><span class="n">domain</span><span class="p">,</span> <span class="n">record_type</span><span class="p">)</span>
<span class="k">if</span> <span class="n">cache</span><span class="p">:</span>
<span class="n">cached_records</span> <span class="o">=</span> <span class="n">cache</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="n">cache_key</span><span class="p">,</span> <span class="kc">None</span><span class="p">)</span>
<span class="k">if</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">cached_records</span><span class="p">,</span> <span class="nb">list</span><span class="p">):</span>
<span class="k">return</span> <span class="n">cast</span><span class="p">(</span><span class="nb">list</span><span class="p">[</span><span class="nb">str</span><span class="p">],</span> <span class="n">cached_records</span><span class="p">)</span>
<span class="n">resolver</span> <span class="o">=</span> <span class="n">dns</span><span class="o">.</span><span class="n">resolver</span><span class="o">.</span><span class="n">Resolver</span><span class="p">()</span>
<span class="n">timeout</span> <span class="o">=</span> <span class="nb">float</span><span class="p">(</span><span class="n">timeout</span><span class="p">)</span>
<span class="k">if</span> <span class="n">nameservers</span> <span class="ow">is</span> <span class="kc">None</span><span class="p">:</span>
<span class="n">nameservers</span> <span class="o">=</span> <span class="p">[</span>
<span class="s2">&quot;1.1.1.1&quot;</span><span class="p">,</span>
<span class="s2">&quot;1.0.0.1&quot;</span><span class="p">,</span>
<span class="s2">&quot;2606:4700:4700::1111&quot;</span><span class="p">,</span>
<span class="s2">&quot;2606:4700:4700::1001&quot;</span><span class="p">,</span>
<span class="p">]</span>
<span class="n">resolver</span><span class="o">.</span><span class="n">nameservers</span> <span class="o">=</span> <span class="n">nameservers</span>
<span class="c1"># Cap per-query UDP timeout at 1s so dnspython retries within the</span>
<span class="c1"># lifetime window on transient packet loss — otherwise with a single</span>
<span class="c1"># nameserver and timeout == lifetime, one dropped UDP datagram consumes</span>
<span class="c1"># the whole budget and raises LifetimeTimeout without a retry (dig&#39;s</span>
<span class="c1"># default +tries=3 masks this case). With multiple nameservers the same</span>
<span class="c1"># cap lets a slow/broken one fall through.</span>
<span class="n">resolver</span><span class="o">.</span><span class="n">timeout</span> <span class="o">=</span> <span class="nb">min</span><span class="p">(</span><span class="mf">1.0</span><span class="p">,</span> <span class="n">timeout</span><span class="p">)</span>
<span class="k">if</span> <span class="nb">len</span><span class="p">(</span><span class="n">resolver</span><span class="o">.</span><span class="n">nameservers</span><span class="p">)</span> <span class="o">&gt;</span> <span class="mi">1</span><span class="p">:</span>
<span class="n">resolver</span><span class="o">.</span><span class="n">lifetime</span> <span class="o">=</span> <span class="n">timeout</span> <span class="o">*</span> <span class="nb">len</span><span class="p">(</span><span class="n">resolver</span><span class="o">.</span><span class="n">nameservers</span><span class="p">)</span>
<span class="k">else</span><span class="p">:</span>
<span class="n">resolver</span><span class="o">.</span><span class="n">lifetime</span> <span class="o">=</span> <span class="n">timeout</span>
<span class="k">try</span><span class="p">:</span>
<span class="n">answers</span> <span class="o">=</span> <span class="n">resolver</span><span class="o">.</span><span class="n">resolve</span><span class="p">(</span><span class="n">domain</span><span class="p">,</span> <span class="n">record_type</span><span class="p">,</span> <span class="n">lifetime</span><span class="o">=</span><span class="n">resolver</span><span class="o">.</span><span class="n">lifetime</span><span class="p">)</span>
<span class="k">except</span> <span class="n">_RETRYABLE_DNS_ERRORS</span> <span class="k">as</span> <span class="n">e</span><span class="p">:</span>
<span class="n">_attempt</span> <span class="o">+=</span> <span class="mi">1</span>
<span class="k">if</span> <span class="n">_attempt</span> <span class="o">&gt;</span> <span class="n">retries</span><span class="p">:</span>
<span class="k">raise</span> <span class="n">e</span>
<span class="k">return</span> <span class="n">query_dns</span><span class="p">(</span>
<span class="n">domain</span><span class="p">,</span>
<span class="n">record_type</span><span class="p">,</span>
<span class="n">cache</span><span class="o">=</span><span class="n">cache</span><span class="p">,</span>
<span class="n">nameservers</span><span class="o">=</span><span class="n">nameservers</span><span class="p">,</span>
<span class="n">timeout</span><span class="o">=</span><span class="n">timeout</span><span class="p">,</span>
<span class="n">retries</span><span class="o">=</span><span class="n">retries</span><span class="p">,</span>
<span class="n">_attempt</span><span class="o">=</span><span class="n">_attempt</span><span class="p">,</span>
<span class="p">)</span>
<span class="n">records</span> <span class="o">=</span> <span class="nb">list</span><span class="p">(</span>
<span class="nb">map</span><span class="p">(</span>
<span class="k">lambda</span> <span class="n">r</span><span class="p">:</span> <span class="n">r</span><span class="o">.</span><span class="n">to_text</span><span class="p">()</span><span class="o">.</span><span class="n">replace</span><span class="p">(</span><span class="s1">&#39;&quot;&#39;</span><span class="p">,</span> <span class="s2">&quot;&quot;</span><span class="p">)</span><span class="o">.</span><span class="n">rstrip</span><span class="p">(</span><span class="s2">&quot;.&quot;</span><span class="p">),</span>
<span class="n">answers</span><span class="p">,</span>
<span class="p">)</span>
<span class="p">)</span>
<span class="k">if</span> <span class="n">cache</span><span class="p">:</span>
<span class="n">cache</span><span class="p">[</span><span class="n">cache_key</span><span class="p">]</span> <span class="o">=</span> <span class="n">records</span>
<span class="k">return</span> <span class="n">records</span></div>
<div class="viewcode-block" id="get_reverse_dns">
<a class="viewcode-back" href="../../api.html#parsedmarc.utils.get_reverse_dns">[docs]</a>
<span class="k">def</span><span class="w"> </span><span class="nf">get_reverse_dns</span><span class="p">(</span>
<span class="n">ip_address</span><span class="p">,</span>
<span class="o">*</span><span class="p">,</span>
<span class="n">cache</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="n">ExpiringDict</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span>
<span class="n">nameservers</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="nb">list</span><span class="p">[</span><span class="nb">str</span><span class="p">]]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span>
<span class="n">timeout</span><span class="p">:</span> <span class="nb">float</span> <span class="o">=</span> <span class="n">DEFAULT_DNS_TIMEOUT</span><span class="p">,</span>
<span class="n">retries</span><span class="p">:</span> <span class="nb">int</span> <span class="o">=</span> <span class="n">DEFAULT_DNS_MAX_RETRIES</span><span class="p">,</span>
<span class="p">)</span> <span class="o">-&gt;</span> <span class="n">Optional</span><span class="p">[</span><span class="nb">str</span><span class="p">]:</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Resolves an IP address to a hostname using a reverse DNS query</span>
<span class="sd"> Args:</span>
<span class="sd"> ip_address (str): The IP address to resolve</span>
<span class="sd"> cache (ExpiringDict): Cache storage</span>
<span class="sd"> nameservers (list): A list of one or more nameservers to use</span>
<span class="sd"> (Cloudflare&#39;s public DNS resolvers by default)</span>
<span class="sd"> timeout (float): Sets the DNS query timeout in seconds</span>
<span class="sd"> retries (int): Number of times to retry on timeout or other transient</span>
<span class="sd"> errors</span>
<span class="sd"> Returns:</span>
<span class="sd"> str: The reverse DNS hostname (if any)</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="n">hostname</span> <span class="o">=</span> <span class="kc">None</span>
<span class="k">try</span><span class="p">:</span>
<span class="n">address</span> <span class="o">=</span> <span class="n">dns</span><span class="o">.</span><span class="n">reversename</span><span class="o">.</span><span class="n">from_address</span><span class="p">(</span><span class="n">ip_address</span><span class="p">)</span>
<span class="n">hostname</span> <span class="o">=</span> <span class="n">query_dns</span><span class="p">(</span>
<span class="nb">str</span><span class="p">(</span><span class="n">address</span><span class="p">),</span>
<span class="s2">&quot;PTR&quot;</span><span class="p">,</span>
<span class="n">cache</span><span class="o">=</span><span class="n">cache</span><span class="p">,</span>
<span class="n">nameservers</span><span class="o">=</span><span class="n">nameservers</span><span class="p">,</span>
<span class="n">timeout</span><span class="o">=</span><span class="n">timeout</span><span class="p">,</span>
<span class="n">retries</span><span class="o">=</span><span class="n">retries</span><span class="p">,</span>
<span class="p">)[</span><span class="mi">0</span><span class="p">]</span>
<span class="k">except</span> <span class="n">dns</span><span class="o">.</span><span class="n">exception</span><span class="o">.</span><span class="n">DNSException</span> <span class="k">as</span> <span class="n">e</span><span class="p">:</span>
<span class="n">logger</span><span class="o">.</span><span class="n">debug</span><span class="p">(</span><span class="sa">f</span><span class="s2">&quot;get_reverse_dns(</span><span class="si">{</span><span class="n">ip_address</span><span class="si">}</span><span class="s2">) exception: </span><span class="si">{</span><span class="n">e</span><span class="si">}</span><span class="s2">&quot;</span><span class="p">)</span>
<span class="k">return</span> <span class="n">hostname</span></div>
<div class="viewcode-block" id="timestamp_to_datetime">
<a class="viewcode-back" href="../../api.html#parsedmarc.utils.timestamp_to_datetime">[docs]</a>
<span class="k">def</span><span class="w"> </span><span class="nf">timestamp_to_datetime</span><span class="p">(</span><span class="n">timestamp</span><span class="p">:</span> <span class="nb">int</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="n">datetime</span><span class="p">:</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Converts a UNIX/DMARC timestamp to a Python ``datetime`` object</span>
<span class="sd"> Args:</span>
<span class="sd"> timestamp (int): The timestamp</span>
<span class="sd"> Returns:</span>
<span class="sd"> datetime: The converted timestamp as a Python ``datetime`` object</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="k">return</span> <span class="n">datetime</span><span class="o">.</span><span class="n">fromtimestamp</span><span class="p">(</span><span class="nb">int</span><span class="p">(</span><span class="n">timestamp</span><span class="p">))</span></div>
<div class="viewcode-block" id="timestamp_to_human">
<a class="viewcode-back" href="../../api.html#parsedmarc.utils.timestamp_to_human">[docs]</a>
<span class="k">def</span><span class="w"> </span><span class="nf">timestamp_to_human</span><span class="p">(</span><span class="n">timestamp</span><span class="p">:</span> <span class="nb">int</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="nb">str</span><span class="p">:</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Converts a UNIX/DMARC timestamp to a human-readable string</span>
<span class="sd"> Args:</span>
<span class="sd"> timestamp: The timestamp</span>
<span class="sd"> Returns:</span>
<span class="sd"> str: The converted timestamp in ``YYYY-MM-DD HH:MM:SS`` format</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="k">return</span> <span class="n">timestamp_to_datetime</span><span class="p">(</span><span class="n">timestamp</span><span class="p">)</span><span class="o">.</span><span class="n">strftime</span><span class="p">(</span><span class="s2">&quot;%Y-%m-</span><span class="si">%d</span><span class="s2"> %H:%M:%S&quot;</span><span class="p">)</span></div>
<div class="viewcode-block" id="human_timestamp_to_datetime">
<a class="viewcode-back" href="../../api.html#parsedmarc.utils.human_timestamp_to_datetime">[docs]</a>
<span class="k">def</span><span class="w"> </span><span class="nf">human_timestamp_to_datetime</span><span class="p">(</span>
<span class="n">human_timestamp</span><span class="p">:</span> <span class="nb">str</span><span class="p">,</span> <span class="o">*</span><span class="p">,</span> <span class="n">to_utc</span><span class="p">:</span> <span class="nb">bool</span> <span class="o">=</span> <span class="kc">False</span>
<span class="p">)</span> <span class="o">-&gt;</span> <span class="n">datetime</span><span class="p">:</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Converts a human-readable timestamp into a Python ``datetime`` object</span>
<span class="sd"> Args:</span>
<span class="sd"> human_timestamp (str): A timestamp string</span>
<span class="sd"> to_utc (bool): Convert the timestamp to UTC</span>
<span class="sd"> Returns:</span>
<span class="sd"> datetime: The converted timestamp</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="n">human_timestamp</span> <span class="o">=</span> <span class="n">human_timestamp</span><span class="o">.</span><span class="n">replace</span><span class="p">(</span><span class="s2">&quot;-0000&quot;</span><span class="p">,</span> <span class="s2">&quot;&quot;</span><span class="p">)</span>
<span class="n">human_timestamp</span> <span class="o">=</span> <span class="n">parenthesis_regex</span><span class="o">.</span><span class="n">sub</span><span class="p">(</span><span class="s2">&quot;&quot;</span><span class="p">,</span> <span class="n">human_timestamp</span><span class="p">)</span>
<span class="n">dt</span> <span class="o">=</span> <span class="n">parse_date</span><span class="p">(</span><span class="n">human_timestamp</span><span class="p">)</span>
<span class="k">return</span> <span class="n">dt</span><span class="o">.</span><span class="n">astimezone</span><span class="p">(</span><span class="n">timezone</span><span class="o">.</span><span class="n">utc</span><span class="p">)</span> <span class="k">if</span> <span class="n">to_utc</span> <span class="k">else</span> <span class="n">dt</span></div>
<div class="viewcode-block" id="human_timestamp_to_unix_timestamp">
<a class="viewcode-back" href="../../api.html#parsedmarc.utils.human_timestamp_to_unix_timestamp">[docs]</a>
<span class="k">def</span><span class="w"> </span><span class="nf">human_timestamp_to_unix_timestamp</span><span class="p">(</span><span class="n">human_timestamp</span><span class="p">:</span> <span class="nb">str</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="nb">int</span><span class="p">:</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Converts a human-readable timestamp into a UNIX timestamp</span>
<span class="sd"> Args:</span>
<span class="sd"> human_timestamp (str): A timestamp in `YYYY-MM-DD HH:MM:SS`` format</span>
<span class="sd"> Returns:</span>
<span class="sd"> float: The converted timestamp</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="n">human_timestamp</span> <span class="o">=</span> <span class="n">human_timestamp</span><span class="o">.</span><span class="n">replace</span><span class="p">(</span><span class="s2">&quot;T&quot;</span><span class="p">,</span> <span class="s2">&quot; &quot;</span><span class="p">)</span>
<span class="k">return</span> <span class="nb">int</span><span class="p">(</span><span class="n">human_timestamp_to_datetime</span><span class="p">(</span><span class="n">human_timestamp</span><span class="p">)</span><span class="o">.</span><span class="n">timestamp</span><span class="p">())</span></div>
<span class="n">_IP_DB_PATH</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="nb">str</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span>
<div class="viewcode-block" id="load_ip_db">
<a class="viewcode-back" href="../../api.html#parsedmarc.utils.load_ip_db">[docs]</a>
<span class="k">def</span><span class="w"> </span><span class="nf">load_ip_db</span><span class="p">(</span>
<span class="o">*</span><span class="p">,</span>
<span class="n">always_use_local_file</span><span class="p">:</span> <span class="nb">bool</span> <span class="o">=</span> <span class="kc">False</span><span class="p">,</span>
<span class="n">local_file_path</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="nb">str</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span>
<span class="n">url</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="nb">str</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span>
<span class="n">offline</span><span class="p">:</span> <span class="nb">bool</span> <span class="o">=</span> <span class="kc">False</span><span class="p">,</span>
<span class="p">)</span> <span class="o">-&gt;</span> <span class="kc">None</span><span class="p">:</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Downloads the IP-to-country MMDB database from a URL and caches it</span>
<span class="sd"> locally. Falls back to the bundled copy on failure or when offline.</span>
<span class="sd"> Args:</span>
<span class="sd"> always_use_local_file: Always use a local/bundled database file</span>
<span class="sd"> local_file_path: Path to a local MMDB file</span>
<span class="sd"> url: URL to the MMDB database file</span>
<span class="sd"> offline: Do not make online requests</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="k">global</span> <span class="n">_IP_DB_PATH</span>
<span class="k">if</span> <span class="n">url</span> <span class="ow">is</span> <span class="kc">None</span><span class="p">:</span>
<span class="n">url</span> <span class="o">=</span> <span class="p">(</span>
<span class="s2">&quot;https://github.com/domainaware/parsedmarc/raw/&quot;</span>
<span class="s2">&quot;refs/heads/master/parsedmarc/resources/ipinfo/&quot;</span>
<span class="s2">&quot;ipinfo_lite.mmdb&quot;</span>
<span class="p">)</span>
<span class="k">if</span> <span class="n">local_file_path</span> <span class="ow">is</span> <span class="ow">not</span> <span class="kc">None</span> <span class="ow">and</span> <span class="n">os</span><span class="o">.</span><span class="n">path</span><span class="o">.</span><span class="n">isfile</span><span class="p">(</span><span class="n">local_file_path</span><span class="p">):</span>
<span class="n">_IP_DB_PATH</span> <span class="o">=</span> <span class="n">local_file_path</span>
<span class="n">logger</span><span class="o">.</span><span class="n">info</span><span class="p">(</span><span class="sa">f</span><span class="s2">&quot;Using local IP database at </span><span class="si">{</span><span class="n">local_file_path</span><span class="si">}</span><span class="s2">&quot;</span><span class="p">)</span>
<span class="k">return</span>
<span class="n">cache_dir</span> <span class="o">=</span> <span class="n">os</span><span class="o">.</span><span class="n">path</span><span class="o">.</span><span class="n">join</span><span class="p">(</span><span class="n">tempfile</span><span class="o">.</span><span class="n">gettempdir</span><span class="p">(),</span> <span class="s2">&quot;parsedmarc&quot;</span><span class="p">)</span>
<span class="n">cached_path</span> <span class="o">=</span> <span class="n">os</span><span class="o">.</span><span class="n">path</span><span class="o">.</span><span class="n">join</span><span class="p">(</span><span class="n">cache_dir</span><span class="p">,</span> <span class="s2">&quot;ipinfo_lite.mmdb&quot;</span><span class="p">)</span>
<span class="k">if</span> <span class="ow">not</span> <span class="p">(</span><span class="n">offline</span> <span class="ow">or</span> <span class="n">always_use_local_file</span><span class="p">):</span>
<span class="k">try</span><span class="p">:</span>
<span class="n">logger</span><span class="o">.</span><span class="n">debug</span><span class="p">(</span><span class="sa">f</span><span class="s2">&quot;Trying to fetch IP database from </span><span class="si">{</span><span class="n">url</span><span class="si">}</span><span class="s2">...&quot;</span><span class="p">)</span>
<span class="n">headers</span> <span class="o">=</span> <span class="p">{</span><span class="s2">&quot;User-Agent&quot;</span><span class="p">:</span> <span class="n">USER_AGENT</span><span class="p">}</span>
<span class="n">response</span> <span class="o">=</span> <span class="n">requests</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="n">url</span><span class="p">,</span> <span class="n">headers</span><span class="o">=</span><span class="n">headers</span><span class="p">,</span> <span class="n">timeout</span><span class="o">=</span><span class="mi">60</span><span class="p">)</span>
<span class="n">response</span><span class="o">.</span><span class="n">raise_for_status</span><span class="p">()</span>
<span class="n">os</span><span class="o">.</span><span class="n">makedirs</span><span class="p">(</span><span class="n">cache_dir</span><span class="p">,</span> <span class="n">exist_ok</span><span class="o">=</span><span class="kc">True</span><span class="p">)</span>
<span class="n">tmp_path</span> <span class="o">=</span> <span class="n">cached_path</span> <span class="o">+</span> <span class="s2">&quot;.tmp&quot;</span>
<span class="k">with</span> <span class="nb">open</span><span class="p">(</span><span class="n">tmp_path</span><span class="p">,</span> <span class="s2">&quot;wb&quot;</span><span class="p">)</span> <span class="k">as</span> <span class="n">f</span><span class="p">:</span>
<span class="n">f</span><span class="o">.</span><span class="n">write</span><span class="p">(</span><span class="n">response</span><span class="o">.</span><span class="n">content</span><span class="p">)</span>
<span class="n">shutil</span><span class="o">.</span><span class="n">move</span><span class="p">(</span><span class="n">tmp_path</span><span class="p">,</span> <span class="n">cached_path</span><span class="p">)</span>
<span class="n">_IP_DB_PATH</span> <span class="o">=</span> <span class="n">cached_path</span>
<span class="n">logger</span><span class="o">.</span><span class="n">info</span><span class="p">(</span><span class="s2">&quot;IP database updated successfully&quot;</span><span class="p">)</span>
<span class="k">return</span>
<span class="k">except</span> <span class="n">requests</span><span class="o">.</span><span class="n">exceptions</span><span class="o">.</span><span class="n">RequestException</span> <span class="k">as</span> <span class="n">e</span><span class="p">:</span>
<span class="n">logger</span><span class="o">.</span><span class="n">warning</span><span class="p">(</span><span class="sa">f</span><span class="s2">&quot;Failed to fetch IP database: </span><span class="si">{</span><span class="n">e</span><span class="si">}</span><span class="s2">&quot;</span><span class="p">)</span>
<span class="k">except</span> <span class="ne">Exception</span> <span class="k">as</span> <span class="n">e</span><span class="p">:</span>
<span class="n">logger</span><span class="o">.</span><span class="n">warning</span><span class="p">(</span><span class="sa">f</span><span class="s2">&quot;Failed to save IP database: </span><span class="si">{</span><span class="n">e</span><span class="si">}</span><span class="s2">&quot;</span><span class="p">)</span>
<span class="c1"># Fall back to a previously cached copy if available</span>
<span class="k">if</span> <span class="n">os</span><span class="o">.</span><span class="n">path</span><span class="o">.</span><span class="n">isfile</span><span class="p">(</span><span class="n">cached_path</span><span class="p">):</span>
<span class="n">_IP_DB_PATH</span> <span class="o">=</span> <span class="n">cached_path</span>
<span class="n">logger</span><span class="o">.</span><span class="n">info</span><span class="p">(</span><span class="s2">&quot;Using cached IP database&quot;</span><span class="p">)</span>
<span class="k">return</span>
<span class="c1"># Final fallback: bundled copy</span>
<span class="n">_IP_DB_PATH</span> <span class="o">=</span> <span class="nb">str</span><span class="p">(</span><span class="n">files</span><span class="p">(</span><span class="n">parsedmarc</span><span class="o">.</span><span class="n">resources</span><span class="o">.</span><span class="n">ipinfo</span><span class="p">)</span><span class="o">.</span><span class="n">joinpath</span><span class="p">(</span><span class="s2">&quot;ipinfo_lite.mmdb&quot;</span><span class="p">))</span>
<span class="n">logger</span><span class="o">.</span><span class="n">info</span><span class="p">(</span><span class="s2">&quot;Using bundled IP database&quot;</span><span class="p">)</span></div>
<span class="k">class</span><span class="w"> </span><span class="nc">_IPDatabaseRecord</span><span class="p">(</span><span class="n">TypedDict</span><span class="p">):</span>
<span class="n">country</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="nb">str</span><span class="p">]</span>
<span class="n">asn</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="nb">int</span><span class="p">]</span>
<span class="n">as_name</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="nb">str</span><span class="p">]</span>
<span class="n">as_domain</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="nb">str</span><span class="p">]</span>
<div class="viewcode-block" id="InvalidIPinfoAPIKey">
<a class="viewcode-back" href="../../api.html#parsedmarc.utils.InvalidIPinfoAPIKey">[docs]</a>
<span class="k">class</span><span class="w"> </span><span class="nc">InvalidIPinfoAPIKey</span><span class="p">(</span><span class="ne">Exception</span><span class="p">):</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;Raised when the IPinfo API rejects the configured token.&quot;&quot;&quot;</span></div>
<span class="c1"># IPinfo Lite REST API. When ``_IPINFO_API_TOKEN`` is set,</span>
<span class="c1"># ``get_ip_address_db_record()`` queries the API first and falls back to the</span>
<span class="c1"># bundled/cached MMDB on any non-2xx response or network error. A 401/403</span>
<span class="c1"># propagates as ``InvalidIPinfoAPIKey`` so the CLI exits fatally.</span>
<span class="c1">#</span>
<span class="c1"># The IPinfo Lite API is documented as having no daily or monthly request</span>
<span class="c1"># limit (&quot;unlimited access&quot;), so there is no rate-limit or quota handling</span>
<span class="c1"># here — adding it would be inventing behavior the service doesn&#39;t document.</span>
<span class="c1"># Authentication uses the documented ``?token=`` query parameter.</span>
<span class="n">_IPINFO_API_URL</span> <span class="o">=</span> <span class="s2">&quot;https://api.ipinfo.io/lite&quot;</span>
<span class="n">_IPINFO_API_TOKEN</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="nb">str</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span>
<span class="n">_IPINFO_API_TIMEOUT</span><span class="p">:</span> <span class="nb">float</span> <span class="o">=</span> <span class="mf">5.0</span>
<div class="viewcode-block" id="configure_ipinfo_api">
<a class="viewcode-back" href="../../api.html#parsedmarc.utils.configure_ipinfo_api">[docs]</a>
<span class="k">def</span><span class="w"> </span><span class="nf">configure_ipinfo_api</span><span class="p">(</span>
<span class="n">token</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="nb">str</span><span class="p">],</span>
<span class="o">*</span><span class="p">,</span>
<span class="n">probe</span><span class="p">:</span> <span class="nb">bool</span> <span class="o">=</span> <span class="kc">True</span><span class="p">,</span>
<span class="p">)</span> <span class="o">-&gt;</span> <span class="kc">None</span><span class="p">:</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;Configure the IPinfo Lite REST API as the primary source for IP lookups.</span>
<span class="sd"> When a token is configured, ``get_ip_address_db_record()`` hits the API</span>
<span class="sd"> first for every lookup and falls back to the MMDB on network errors. An</span>
<span class="sd"> invalid token raises ``InvalidIPinfoAPIKey`` — the CLI catches that and</span>
<span class="sd"> exits fatally.</span>
<span class="sd"> Args:</span>
<span class="sd"> token: IPinfo API token. ``None`` or empty disables the API.</span>
<span class="sd"> probe: If ``True``, verify the token by looking up ``1.1.1.1``. A</span>
<span class="sd"> 401/403 raises ``InvalidIPinfoAPIKey``; other errors are logged</span>
<span class="sd"> and the token is still accepted so per-request fallback can take</span>
<span class="sd"> over.</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="k">global</span> <span class="n">_IPINFO_API_TOKEN</span>
<span class="n">_IPINFO_API_TOKEN</span> <span class="o">=</span> <span class="n">token</span> <span class="ow">or</span> <span class="kc">None</span>
<span class="k">if</span> <span class="ow">not</span> <span class="n">_IPINFO_API_TOKEN</span> <span class="ow">or</span> <span class="ow">not</span> <span class="n">probe</span><span class="p">:</span>
<span class="k">return</span>
<span class="k">try</span><span class="p">:</span>
<span class="n">_ipinfo_api_lookup</span><span class="p">(</span><span class="s2">&quot;1.1.1.1&quot;</span><span class="p">)</span>
<span class="k">except</span> <span class="n">InvalidIPinfoAPIKey</span><span class="p">:</span>
<span class="k">raise</span>
<span class="k">except</span> <span class="ne">Exception</span> <span class="k">as</span> <span class="n">e</span><span class="p">:</span>
<span class="n">logger</span><span class="o">.</span><span class="n">warning</span><span class="p">(</span><span class="sa">f</span><span class="s2">&quot;IPinfo API probe failed (will fall back per-request): </span><span class="si">{</span><span class="n">e</span><span class="si">}</span><span class="s2">&quot;</span><span class="p">)</span>
<span class="k">else</span><span class="p">:</span>
<span class="n">logger</span><span class="o">.</span><span class="n">info</span><span class="p">(</span><span class="s2">&quot;IPinfo API configured&quot;</span><span class="p">)</span></div>
<span class="k">def</span><span class="w"> </span><span class="nf">_ipinfo_api_lookup</span><span class="p">(</span><span class="n">ip_address</span><span class="p">:</span> <span class="nb">str</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="n">Optional</span><span class="p">[</span><span class="n">_IPDatabaseRecord</span><span class="p">]:</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;Look up an IP via the IPinfo Lite REST API.</span>
<span class="sd"> Returns the normalized record on success, or ``None`` on network error or</span>
<span class="sd"> any non-2xx response (other than 401/403). 401/403 raises</span>
<span class="sd"> ``InvalidIPinfoAPIKey``.</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="k">if</span> <span class="ow">not</span> <span class="n">_IPINFO_API_TOKEN</span><span class="p">:</span>
<span class="k">return</span> <span class="kc">None</span>
<span class="n">url</span> <span class="o">=</span> <span class="sa">f</span><span class="s2">&quot;</span><span class="si">{</span><span class="n">_IPINFO_API_URL</span><span class="si">}</span><span class="s2">/</span><span class="si">{</span><span class="n">ip_address</span><span class="si">}</span><span class="s2">&quot;</span>
<span class="n">params</span> <span class="o">=</span> <span class="p">{</span><span class="s2">&quot;token&quot;</span><span class="p">:</span> <span class="n">_IPINFO_API_TOKEN</span><span class="p">}</span>
<span class="n">headers</span> <span class="o">=</span> <span class="p">{</span><span class="s2">&quot;User-Agent&quot;</span><span class="p">:</span> <span class="n">USER_AGENT</span><span class="p">,</span> <span class="s2">&quot;Accept&quot;</span><span class="p">:</span> <span class="s2">&quot;application/json&quot;</span><span class="p">}</span>
<span class="k">try</span><span class="p">:</span>
<span class="n">response</span> <span class="o">=</span> <span class="n">requests</span><span class="o">.</span><span class="n">get</span><span class="p">(</span>
<span class="n">url</span><span class="p">,</span> <span class="n">headers</span><span class="o">=</span><span class="n">headers</span><span class="p">,</span> <span class="n">params</span><span class="o">=</span><span class="n">params</span><span class="p">,</span> <span class="n">timeout</span><span class="o">=</span><span class="n">_IPINFO_API_TIMEOUT</span>
<span class="p">)</span>
<span class="k">except</span> <span class="n">requests</span><span class="o">.</span><span class="n">exceptions</span><span class="o">.</span><span class="n">RequestException</span> <span class="k">as</span> <span class="n">e</span><span class="p">:</span>
<span class="n">logger</span><span class="o">.</span><span class="n">debug</span><span class="p">(</span><span class="sa">f</span><span class="s2">&quot;IPinfo API request for </span><span class="si">{</span><span class="n">ip_address</span><span class="si">}</span><span class="s2"> failed: </span><span class="si">{</span><span class="n">e</span><span class="si">}</span><span class="s2">&quot;</span><span class="p">)</span>
<span class="k">return</span> <span class="kc">None</span>
<span class="k">if</span> <span class="n">response</span><span class="o">.</span><span class="n">status_code</span> <span class="ow">in</span> <span class="p">(</span><span class="mi">401</span><span class="p">,</span> <span class="mi">403</span><span class="p">):</span>
<span class="k">raise</span> <span class="n">InvalidIPinfoAPIKey</span><span class="p">(</span>
<span class="sa">f</span><span class="s2">&quot;IPinfo API rejected the configured token (HTTP </span><span class="si">{</span><span class="n">response</span><span class="o">.</span><span class="n">status_code</span><span class="si">}</span><span class="s2">)&quot;</span>
<span class="p">)</span>
<span class="k">if</span> <span class="ow">not</span> <span class="n">response</span><span class="o">.</span><span class="n">ok</span><span class="p">:</span>
<span class="n">logger</span><span class="o">.</span><span class="n">debug</span><span class="p">(</span>
<span class="sa">f</span><span class="s2">&quot;IPinfo API returned HTTP </span><span class="si">{</span><span class="n">response</span><span class="o">.</span><span class="n">status_code</span><span class="si">}</span><span class="s2"> for </span><span class="si">{</span><span class="n">ip_address</span><span class="si">}</span><span class="s2">&quot;</span>
<span class="p">)</span>
<span class="k">return</span> <span class="kc">None</span>
<span class="k">try</span><span class="p">:</span>
<span class="n">payload</span> <span class="o">=</span> <span class="n">response</span><span class="o">.</span><span class="n">json</span><span class="p">()</span>
<span class="k">except</span> <span class="ne">ValueError</span><span class="p">:</span>
<span class="n">logger</span><span class="o">.</span><span class="n">debug</span><span class="p">(</span><span class="sa">f</span><span class="s2">&quot;IPinfo API returned non-JSON for </span><span class="si">{</span><span class="n">ip_address</span><span class="si">}</span><span class="s2">&quot;</span><span class="p">)</span>
<span class="k">return</span> <span class="kc">None</span>
<span class="k">if</span> <span class="ow">not</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">payload</span><span class="p">,</span> <span class="nb">dict</span><span class="p">):</span>
<span class="k">return</span> <span class="kc">None</span>
<span class="k">return</span> <span class="n">_normalize_ip_record</span><span class="p">(</span><span class="n">payload</span><span class="p">)</span>
<span class="k">def</span><span class="w"> </span><span class="nf">_normalize_ip_record</span><span class="p">(</span><span class="n">record</span><span class="p">:</span> <span class="nb">dict</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="n">_IPDatabaseRecord</span><span class="p">:</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;Normalize an IPinfo / MaxMind record to the internal shape.</span>
<span class="sd"> Shared between the API path and the MMDB path so both schemas produce the</span>
<span class="sd"> same output: country as ISO code, ASN as plain int, as_name string,</span>
<span class="sd"> as_domain lowercased.</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="n">country</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="nb">str</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span>
<span class="n">asn</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="nb">int</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span>
<span class="n">as_name</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="nb">str</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span>
<span class="n">as_domain</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="nb">str</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span>
<span class="n">code</span> <span class="o">=</span> <span class="n">record</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s2">&quot;country_code&quot;</span><span class="p">)</span>
<span class="k">if</span> <span class="n">code</span> <span class="ow">is</span> <span class="kc">None</span><span class="p">:</span>
<span class="n">nested</span> <span class="o">=</span> <span class="n">record</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s2">&quot;country&quot;</span><span class="p">)</span>
<span class="k">if</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">nested</span><span class="p">,</span> <span class="nb">dict</span><span class="p">):</span>
<span class="n">code</span> <span class="o">=</span> <span class="n">nested</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s2">&quot;iso_code&quot;</span><span class="p">)</span>
<span class="k">if</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">code</span><span class="p">,</span> <span class="nb">str</span><span class="p">):</span>
<span class="n">country</span> <span class="o">=</span> <span class="n">code</span>
<span class="n">raw_asn</span> <span class="o">=</span> <span class="n">record</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s2">&quot;asn&quot;</span><span class="p">)</span>
<span class="k">if</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">raw_asn</span><span class="p">,</span> <span class="nb">int</span><span class="p">):</span>
<span class="n">asn</span> <span class="o">=</span> <span class="n">raw_asn</span>
<span class="k">elif</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">raw_asn</span><span class="p">,</span> <span class="nb">str</span><span class="p">)</span> <span class="ow">and</span> <span class="n">raw_asn</span><span class="p">:</span>
<span class="n">digits</span> <span class="o">=</span> <span class="n">raw_asn</span><span class="o">.</span><span class="n">removeprefix</span><span class="p">(</span><span class="s2">&quot;AS&quot;</span><span class="p">)</span><span class="o">.</span><span class="n">removeprefix</span><span class="p">(</span><span class="s2">&quot;as&quot;</span><span class="p">)</span>
<span class="k">if</span> <span class="n">digits</span><span class="o">.</span><span class="n">isdigit</span><span class="p">():</span>
<span class="n">asn</span> <span class="o">=</span> <span class="nb">int</span><span class="p">(</span><span class="n">digits</span><span class="p">)</span>
<span class="k">if</span> <span class="n">asn</span> <span class="ow">is</span> <span class="kc">None</span><span class="p">:</span>
<span class="n">mm_asn</span> <span class="o">=</span> <span class="n">record</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s2">&quot;autonomous_system_number&quot;</span><span class="p">)</span>
<span class="k">if</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">mm_asn</span><span class="p">,</span> <span class="nb">int</span><span class="p">):</span>
<span class="n">asn</span> <span class="o">=</span> <span class="n">mm_asn</span>
<span class="n">name</span> <span class="o">=</span> <span class="n">record</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s2">&quot;as_name&quot;</span><span class="p">)</span> <span class="ow">or</span> <span class="n">record</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s2">&quot;autonomous_system_organization&quot;</span><span class="p">)</span>
<span class="k">if</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">name</span><span class="p">,</span> <span class="nb">str</span><span class="p">)</span> <span class="ow">and</span> <span class="n">name</span><span class="p">:</span>
<span class="n">as_name</span> <span class="o">=</span> <span class="n">name</span>
<span class="n">domain</span> <span class="o">=</span> <span class="n">record</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s2">&quot;as_domain&quot;</span><span class="p">)</span>
<span class="k">if</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">domain</span><span class="p">,</span> <span class="nb">str</span><span class="p">)</span> <span class="ow">and</span> <span class="n">domain</span><span class="p">:</span>
<span class="n">as_domain</span> <span class="o">=</span> <span class="n">domain</span><span class="o">.</span><span class="n">lower</span><span class="p">()</span>
<span class="k">return</span> <span class="p">{</span>
<span class="s2">&quot;country&quot;</span><span class="p">:</span> <span class="n">country</span><span class="p">,</span>
<span class="s2">&quot;asn&quot;</span><span class="p">:</span> <span class="n">asn</span><span class="p">,</span>
<span class="s2">&quot;as_name&quot;</span><span class="p">:</span> <span class="n">as_name</span><span class="p">,</span>
<span class="s2">&quot;as_domain&quot;</span><span class="p">:</span> <span class="n">as_domain</span><span class="p">,</span>
<span class="p">}</span>
<span class="k">def</span><span class="w"> </span><span class="nf">_get_ip_database_path</span><span class="p">(</span><span class="n">db_path</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="nb">str</span><span class="p">])</span> <span class="o">-&gt;</span> <span class="nb">str</span><span class="p">:</span>
<span class="n">db_paths</span> <span class="o">=</span> <span class="p">[</span>
<span class="s2">&quot;ipinfo_lite.mmdb&quot;</span><span class="p">,</span>
<span class="s2">&quot;GeoLite2-Country.mmdb&quot;</span><span class="p">,</span>
<span class="s2">&quot;/usr/local/share/GeoIP/GeoLite2-Country.mmdb&quot;</span><span class="p">,</span>
<span class="s2">&quot;/usr/share/GeoIP/GeoLite2-Country.mmdb&quot;</span><span class="p">,</span>
<span class="s2">&quot;/var/lib/GeoIP/GeoLite2-Country.mmdb&quot;</span><span class="p">,</span>
<span class="s2">&quot;/var/local/lib/GeoIP/GeoLite2-Country.mmdb&quot;</span><span class="p">,</span>
<span class="s2">&quot;/usr/local/var/GeoIP/GeoLite2-Country.mmdb&quot;</span><span class="p">,</span>
<span class="s2">&quot;%SystemDrive%</span><span class="se">\\</span><span class="s2">ProgramData</span><span class="se">\\</span><span class="s2">MaxMind</span><span class="se">\\</span><span class="s2">GeoIPUpdate</span><span class="se">\\</span><span class="s2">GeoIP</span><span class="se">\\</span><span class="s2">&quot;</span>
<span class="s2">&quot;GeoLite2-Country.mmdb&quot;</span><span class="p">,</span>
<span class="s2">&quot;C:</span><span class="se">\\</span><span class="s2">GeoIP</span><span class="se">\\</span><span class="s2">GeoLite2-Country.mmdb&quot;</span><span class="p">,</span>
<span class="s2">&quot;dbip-country-lite.mmdb&quot;</span><span class="p">,</span>
<span class="s2">&quot;dbip-country.mmdb&quot;</span><span class="p">,</span>
<span class="p">]</span>
<span class="k">if</span> <span class="n">db_path</span> <span class="ow">is</span> <span class="ow">not</span> <span class="kc">None</span> <span class="ow">and</span> <span class="ow">not</span> <span class="n">os</span><span class="o">.</span><span class="n">path</span><span class="o">.</span><span class="n">isfile</span><span class="p">(</span><span class="n">db_path</span><span class="p">):</span>
<span class="n">logger</span><span class="o">.</span><span class="n">warning</span><span class="p">(</span>
<span class="sa">f</span><span class="s2">&quot;No file exists at </span><span class="si">{</span><span class="n">db_path</span><span class="si">}</span><span class="s2">. Falling back to an &quot;</span>
<span class="s2">&quot;included copy of the IPinfo IP to Country &quot;</span>
<span class="s2">&quot;Lite database.&quot;</span>
<span class="p">)</span>
<span class="n">db_path</span> <span class="o">=</span> <span class="kc">None</span>
<span class="k">if</span> <span class="n">db_path</span> <span class="ow">is</span> <span class="kc">None</span><span class="p">:</span>
<span class="k">for</span> <span class="n">system_path</span> <span class="ow">in</span> <span class="n">db_paths</span><span class="p">:</span>
<span class="k">if</span> <span class="n">os</span><span class="o">.</span><span class="n">path</span><span class="o">.</span><span class="n">exists</span><span class="p">(</span><span class="n">system_path</span><span class="p">):</span>
<span class="n">db_path</span> <span class="o">=</span> <span class="n">system_path</span>
<span class="k">break</span>
<span class="k">if</span> <span class="n">db_path</span> <span class="ow">is</span> <span class="kc">None</span><span class="p">:</span>
<span class="k">if</span> <span class="n">_IP_DB_PATH</span> <span class="ow">is</span> <span class="ow">not</span> <span class="kc">None</span><span class="p">:</span>
<span class="n">db_path</span> <span class="o">=</span> <span class="n">_IP_DB_PATH</span>
<span class="k">else</span><span class="p">:</span>
<span class="n">db_path</span> <span class="o">=</span> <span class="nb">str</span><span class="p">(</span>
<span class="n">files</span><span class="p">(</span><span class="n">parsedmarc</span><span class="o">.</span><span class="n">resources</span><span class="o">.</span><span class="n">ipinfo</span><span class="p">)</span><span class="o">.</span><span class="n">joinpath</span><span class="p">(</span><span class="s2">&quot;ipinfo_lite.mmdb&quot;</span><span class="p">)</span>
<span class="p">)</span>
<span class="n">db_age</span> <span class="o">=</span> <span class="n">datetime</span><span class="o">.</span><span class="n">now</span><span class="p">()</span> <span class="o">-</span> <span class="n">datetime</span><span class="o">.</span><span class="n">fromtimestamp</span><span class="p">(</span><span class="n">os</span><span class="o">.</span><span class="n">stat</span><span class="p">(</span><span class="n">db_path</span><span class="p">)</span><span class="o">.</span><span class="n">st_mtime</span><span class="p">)</span>
<span class="k">if</span> <span class="n">db_age</span> <span class="o">&gt;</span> <span class="n">timedelta</span><span class="p">(</span><span class="n">days</span><span class="o">=</span><span class="mi">30</span><span class="p">):</span>
<span class="n">logger</span><span class="o">.</span><span class="n">warning</span><span class="p">(</span><span class="s2">&quot;IP database is more than a month old&quot;</span><span class="p">)</span>
<span class="k">return</span> <span class="n">db_path</span>
<div class="viewcode-block" id="get_ip_address_db_record">
<a class="viewcode-back" href="../../api.html#parsedmarc.utils.get_ip_address_db_record">[docs]</a>
<span class="k">def</span><span class="w"> </span><span class="nf">get_ip_address_db_record</span><span class="p">(</span>
<span class="n">ip_address</span><span class="p">:</span> <span class="nb">str</span><span class="p">,</span> <span class="o">*</span><span class="p">,</span> <span class="n">db_path</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="nb">str</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span>
<span class="p">)</span> <span class="o">-&gt;</span> <span class="n">_IPDatabaseRecord</span><span class="p">:</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;Look up an IP and return country + ASN fields.</span>
<span class="sd"> If the IPinfo Lite API is configured via ``configure_ipinfo_api()``, the</span>
<span class="sd"> API is queried first; any non-fatal failure (rate limit, quota, network)</span>
<span class="sd"> falls through to the MMDB. An invalid API token raises</span>
<span class="sd"> ``InvalidIPinfoAPIKey`` and is not caught here.</span>
<span class="sd"> IPinfo Lite carries ``country_code``, ``as_name``, and ``as_domain`` on</span>
<span class="sd"> every record. MaxMind/DBIP country-only databases carry only country, so</span>
<span class="sd"> ``as_name`` / ``as_domain`` come back None for those users.</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="n">api_record</span> <span class="o">=</span> <span class="n">_ipinfo_api_lookup</span><span class="p">(</span><span class="n">ip_address</span><span class="p">)</span>
<span class="k">if</span> <span class="n">api_record</span> <span class="ow">is</span> <span class="ow">not</span> <span class="kc">None</span><span class="p">:</span>
<span class="k">return</span> <span class="n">api_record</span>
<span class="n">resolved_path</span> <span class="o">=</span> <span class="n">_get_ip_database_path</span><span class="p">(</span><span class="n">db_path</span><span class="p">)</span>
<span class="n">db_reader</span> <span class="o">=</span> <span class="n">maxminddb</span><span class="o">.</span><span class="n">open_database</span><span class="p">(</span><span class="n">resolved_path</span><span class="p">)</span>
<span class="n">record</span> <span class="o">=</span> <span class="n">db_reader</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="n">ip_address</span><span class="p">)</span>
<span class="k">if</span> <span class="ow">not</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">record</span><span class="p">,</span> <span class="nb">dict</span><span class="p">):</span>
<span class="k">return</span> <span class="p">{</span>
<span class="s2">&quot;country&quot;</span><span class="p">:</span> <span class="kc">None</span><span class="p">,</span>
<span class="s2">&quot;asn&quot;</span><span class="p">:</span> <span class="kc">None</span><span class="p">,</span>
<span class="s2">&quot;as_name&quot;</span><span class="p">:</span> <span class="kc">None</span><span class="p">,</span>
<span class="s2">&quot;as_domain&quot;</span><span class="p">:</span> <span class="kc">None</span><span class="p">,</span>
<span class="p">}</span>
<span class="k">return</span> <span class="n">_normalize_ip_record</span><span class="p">(</span><span class="n">record</span><span class="p">)</span></div>
<div class="viewcode-block" id="get_ip_address_country">
<a class="viewcode-back" href="../../api.html#parsedmarc.utils.get_ip_address_country">[docs]</a>
<span class="k">def</span><span class="w"> </span><span class="nf">get_ip_address_country</span><span class="p">(</span>
<span class="n">ip_address</span><span class="p">:</span> <span class="nb">str</span><span class="p">,</span> <span class="o">*</span><span class="p">,</span> <span class="n">db_path</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="nb">str</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span>
<span class="p">)</span> <span class="o">-&gt;</span> <span class="n">Optional</span><span class="p">[</span><span class="nb">str</span><span class="p">]:</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Returns the ISO code for the country associated</span>
<span class="sd"> with the given IPv4 or IPv6 address.</span>
<span class="sd"> Args:</span>
<span class="sd"> ip_address (str): The IP address to query for</span>
<span class="sd"> db_path (str): Path to a MMDB file from IPinfo, MaxMind, or DBIP</span>
<span class="sd"> Returns:</span>
<span class="sd"> str: And ISO country code associated with the given IP address</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="k">return</span> <span class="n">get_ip_address_db_record</span><span class="p">(</span><span class="n">ip_address</span><span class="p">,</span> <span class="n">db_path</span><span class="o">=</span><span class="n">db_path</span><span class="p">)[</span><span class="s2">&quot;country&quot;</span><span class="p">]</span></div>
<div class="viewcode-block" id="load_reverse_dns_map">
<a class="viewcode-back" href="../../api.html#parsedmarc.utils.load_reverse_dns_map">[docs]</a>
<span class="k">def</span><span class="w"> </span><span class="nf">load_reverse_dns_map</span><span class="p">(</span>
<span class="n">reverse_dns_map</span><span class="p">:</span> <span class="n">ReverseDNSMap</span><span class="p">,</span>
<span class="o">*</span><span class="p">,</span>
<span class="n">always_use_local_file</span><span class="p">:</span> <span class="nb">bool</span> <span class="o">=</span> <span class="kc">False</span><span class="p">,</span>
<span class="n">local_file_path</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="nb">str</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span>
<span class="n">url</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="nb">str</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span>
<span class="n">offline</span><span class="p">:</span> <span class="nb">bool</span> <span class="o">=</span> <span class="kc">False</span><span class="p">,</span>
<span class="n">psl_overrides_path</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="nb">str</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span>
<span class="n">psl_overrides_url</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="nb">str</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span>
<span class="p">)</span> <span class="o">-&gt;</span> <span class="kc">None</span><span class="p">:</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Loads the reverse DNS map from a URL or local file.</span>
<span class="sd"> Clears and repopulates the given map dict in place. If the map is</span>
<span class="sd"> fetched from a URL, that is tried first; on failure (or if offline/local</span>
<span class="sd"> mode is selected) the bundled CSV is used as a fallback.</span>
<span class="sd"> ``psl_overrides.txt`` is reloaded at the same time using the same</span>
<span class="sd"> ``offline`` / ``always_use_local_file`` flags (with separate path/URL</span>
<span class="sd"> kwargs), so map entries that depend on a recent overrides entry fold</span>
<span class="sd"> correctly.</span>
<span class="sd"> Args:</span>
<span class="sd"> reverse_dns_map (dict): The map dict to populate (modified in place)</span>
<span class="sd"> always_use_local_file (bool): Always use a local map file</span>
<span class="sd"> local_file_path (str): Path to a local map file</span>
<span class="sd"> url (str): URL to a reverse DNS map</span>
<span class="sd"> offline (bool): Use the built-in copy of the reverse DNS map</span>
<span class="sd"> psl_overrides_path (str): Path to a local PSL overrides file</span>
<span class="sd"> psl_overrides_url (str): URL to a PSL overrides file</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="c1"># Reload PSL overrides first so any map entry that depends on a folded</span>
<span class="c1"># base domain resolves correctly against the current overrides list.</span>
<span class="n">load_psl_overrides</span><span class="p">(</span>
<span class="n">always_use_local_file</span><span class="o">=</span><span class="n">always_use_local_file</span><span class="p">,</span>
<span class="n">local_file_path</span><span class="o">=</span><span class="n">psl_overrides_path</span><span class="p">,</span>
<span class="n">url</span><span class="o">=</span><span class="n">psl_overrides_url</span><span class="p">,</span>
<span class="n">offline</span><span class="o">=</span><span class="n">offline</span><span class="p">,</span>
<span class="p">)</span>
<span class="k">if</span> <span class="n">url</span> <span class="ow">is</span> <span class="kc">None</span><span class="p">:</span>
<span class="n">url</span> <span class="o">=</span> <span class="p">(</span>
<span class="s2">&quot;https://raw.githubusercontent.com/domainaware&quot;</span>
<span class="s2">&quot;/parsedmarc/master/parsedmarc/&quot;</span>
<span class="s2">&quot;resources/maps/base_reverse_dns_map.csv&quot;</span>
<span class="p">)</span>
<span class="n">reverse_dns_map</span><span class="o">.</span><span class="n">clear</span><span class="p">()</span>
<span class="k">def</span><span class="w"> </span><span class="nf">load_csv</span><span class="p">(</span><span class="n">_csv_file</span><span class="p">):</span>
<span class="n">reader</span> <span class="o">=</span> <span class="n">csv</span><span class="o">.</span><span class="n">DictReader</span><span class="p">(</span><span class="n">_csv_file</span><span class="p">)</span>
<span class="k">for</span> <span class="n">row</span> <span class="ow">in</span> <span class="n">reader</span><span class="p">:</span>
<span class="n">key</span> <span class="o">=</span> <span class="n">row</span><span class="p">[</span><span class="s2">&quot;base_reverse_dns&quot;</span><span class="p">]</span><span class="o">.</span><span class="n">lower</span><span class="p">()</span><span class="o">.</span><span class="n">strip</span><span class="p">()</span>
<span class="n">reverse_dns_map</span><span class="p">[</span><span class="n">key</span><span class="p">]</span> <span class="o">=</span> <span class="p">{</span>
<span class="s2">&quot;name&quot;</span><span class="p">:</span> <span class="n">row</span><span class="p">[</span><span class="s2">&quot;name&quot;</span><span class="p">]</span><span class="o">.</span><span class="n">strip</span><span class="p">(),</span>
<span class="s2">&quot;type&quot;</span><span class="p">:</span> <span class="n">row</span><span class="p">[</span><span class="s2">&quot;type&quot;</span><span class="p">]</span><span class="o">.</span><span class="n">strip</span><span class="p">(),</span>
<span class="p">}</span>
<span class="n">csv_file</span> <span class="o">=</span> <span class="n">io</span><span class="o">.</span><span class="n">StringIO</span><span class="p">()</span>
<span class="k">if</span> <span class="ow">not</span> <span class="p">(</span><span class="n">offline</span> <span class="ow">or</span> <span class="n">always_use_local_file</span><span class="p">):</span>
<span class="k">try</span><span class="p">:</span>
<span class="n">logger</span><span class="o">.</span><span class="n">debug</span><span class="p">(</span><span class="sa">f</span><span class="s2">&quot;Trying to fetch reverse DNS map from </span><span class="si">{</span><span class="n">url</span><span class="si">}</span><span class="s2">...&quot;</span><span class="p">)</span>
<span class="n">headers</span> <span class="o">=</span> <span class="p">{</span><span class="s2">&quot;User-Agent&quot;</span><span class="p">:</span> <span class="n">USER_AGENT</span><span class="p">}</span>
<span class="n">response</span> <span class="o">=</span> <span class="n">requests</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="n">url</span><span class="p">,</span> <span class="n">headers</span><span class="o">=</span><span class="n">headers</span><span class="p">)</span>
<span class="n">response</span><span class="o">.</span><span class="n">raise_for_status</span><span class="p">()</span>
<span class="n">csv_file</span><span class="o">.</span><span class="n">write</span><span class="p">(</span><span class="n">response</span><span class="o">.</span><span class="n">text</span><span class="p">)</span>
<span class="n">csv_file</span><span class="o">.</span><span class="n">seek</span><span class="p">(</span><span class="mi">0</span><span class="p">)</span>
<span class="n">load_csv</span><span class="p">(</span><span class="n">csv_file</span><span class="p">)</span>
<span class="k">except</span> <span class="n">requests</span><span class="o">.</span><span class="n">exceptions</span><span class="o">.</span><span class="n">RequestException</span> <span class="k">as</span> <span class="n">e</span><span class="p">:</span>
<span class="n">logger</span><span class="o">.</span><span class="n">warning</span><span class="p">(</span><span class="sa">f</span><span class="s2">&quot;Failed to fetch reverse DNS map: </span><span class="si">{</span><span class="n">e</span><span class="si">}</span><span class="s2">&quot;</span><span class="p">)</span>
<span class="k">except</span> <span class="ne">Exception</span><span class="p">:</span>
<span class="n">logger</span><span class="o">.</span><span class="n">warning</span><span class="p">(</span><span class="s2">&quot;Not a valid CSV file&quot;</span><span class="p">)</span>
<span class="n">csv_file</span><span class="o">.</span><span class="n">seek</span><span class="p">(</span><span class="mi">0</span><span class="p">)</span>
<span class="n">logging</span><span class="o">.</span><span class="n">debug</span><span class="p">(</span><span class="s2">&quot;Response body:&quot;</span><span class="p">)</span>
<span class="n">logger</span><span class="o">.</span><span class="n">debug</span><span class="p">(</span><span class="n">csv_file</span><span class="o">.</span><span class="n">read</span><span class="p">())</span>
<span class="k">if</span> <span class="nb">len</span><span class="p">(</span><span class="n">reverse_dns_map</span><span class="p">)</span> <span class="o">==</span> <span class="mi">0</span><span class="p">:</span>
<span class="n">logger</span><span class="o">.</span><span class="n">info</span><span class="p">(</span><span class="s2">&quot;Loading included reverse DNS map...&quot;</span><span class="p">)</span>
<span class="n">path</span> <span class="o">=</span> <span class="nb">str</span><span class="p">(</span>
<span class="n">files</span><span class="p">(</span><span class="n">parsedmarc</span><span class="o">.</span><span class="n">resources</span><span class="o">.</span><span class="n">maps</span><span class="p">)</span><span class="o">.</span><span class="n">joinpath</span><span class="p">(</span><span class="s2">&quot;base_reverse_dns_map.csv&quot;</span><span class="p">)</span>
<span class="p">)</span>
<span class="k">if</span> <span class="n">local_file_path</span> <span class="ow">is</span> <span class="ow">not</span> <span class="kc">None</span><span class="p">:</span>
<span class="n">path</span> <span class="o">=</span> <span class="n">local_file_path</span>
<span class="k">with</span> <span class="nb">open</span><span class="p">(</span><span class="n">path</span><span class="p">)</span> <span class="k">as</span> <span class="n">csv_file</span><span class="p">:</span>
<span class="n">load_csv</span><span class="p">(</span><span class="n">csv_file</span><span class="p">)</span></div>
<div class="viewcode-block" id="get_service_from_reverse_dns_base_domain">
<a class="viewcode-back" href="../../api.html#parsedmarc.utils.get_service_from_reverse_dns_base_domain">[docs]</a>
<span class="k">def</span><span class="w"> </span><span class="nf">get_service_from_reverse_dns_base_domain</span><span class="p">(</span>
<span class="n">base_domain</span><span class="p">,</span>
<span class="o">*</span><span class="p">,</span>
<span class="n">always_use_local_file</span><span class="p">:</span> <span class="nb">bool</span> <span class="o">=</span> <span class="kc">False</span><span class="p">,</span>
<span class="n">local_file_path</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="nb">str</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span>
<span class="n">url</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="nb">str</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span>
<span class="n">offline</span><span class="p">:</span> <span class="nb">bool</span> <span class="o">=</span> <span class="kc">False</span><span class="p">,</span>
<span class="n">reverse_dns_map</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="n">ReverseDNSMap</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span>
<span class="p">)</span> <span class="o">-&gt;</span> <span class="n">ReverseDNSService</span><span class="p">:</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Returns the service name of a given base domain name from reverse DNS.</span>
<span class="sd"> Args:</span>
<span class="sd"> base_domain (str): The base domain of the reverse DNS lookup</span>
<span class="sd"> always_use_local_file (bool): Always use a local map file</span>
<span class="sd"> local_file_path (str): Path to a local map file</span>
<span class="sd"> url (str): URL ro a reverse DNS map</span>
<span class="sd"> offline (bool): Use the built-in copy of the reverse DNS map</span>
<span class="sd"> reverse_dns_map (dict): A reverse DNS map</span>
<span class="sd"> Returns:</span>
<span class="sd"> dict: A dictionary containing name and type.</span>
<span class="sd"> If the service is unknown, the name will be</span>
<span class="sd"> the supplied reverse_dns_base_domain and the type will be None</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="n">base_domain</span> <span class="o">=</span> <span class="n">base_domain</span><span class="o">.</span><span class="n">lower</span><span class="p">()</span><span class="o">.</span><span class="n">strip</span><span class="p">()</span>
<span class="n">reverse_dns_map_value</span><span class="p">:</span> <span class="n">ReverseDNSMap</span>
<span class="k">if</span> <span class="n">reverse_dns_map</span> <span class="ow">is</span> <span class="kc">None</span><span class="p">:</span>
<span class="n">reverse_dns_map_value</span> <span class="o">=</span> <span class="p">{}</span>
<span class="k">else</span><span class="p">:</span>
<span class="n">reverse_dns_map_value</span> <span class="o">=</span> <span class="n">reverse_dns_map</span>
<span class="k">if</span> <span class="nb">len</span><span class="p">(</span><span class="n">reverse_dns_map_value</span><span class="p">)</span> <span class="o">==</span> <span class="mi">0</span><span class="p">:</span>
<span class="n">load_reverse_dns_map</span><span class="p">(</span>
<span class="n">reverse_dns_map_value</span><span class="p">,</span>
<span class="n">always_use_local_file</span><span class="o">=</span><span class="n">always_use_local_file</span><span class="p">,</span>
<span class="n">local_file_path</span><span class="o">=</span><span class="n">local_file_path</span><span class="p">,</span>
<span class="n">url</span><span class="o">=</span><span class="n">url</span><span class="p">,</span>
<span class="n">offline</span><span class="o">=</span><span class="n">offline</span><span class="p">,</span>
<span class="p">)</span>
<span class="n">service</span><span class="p">:</span> <span class="n">ReverseDNSService</span>
<span class="k">try</span><span class="p">:</span>
<span class="n">service</span> <span class="o">=</span> <span class="n">reverse_dns_map_value</span><span class="p">[</span><span class="n">base_domain</span><span class="p">]</span>
<span class="k">except</span> <span class="ne">KeyError</span><span class="p">:</span>
<span class="n">service</span> <span class="o">=</span> <span class="p">{</span><span class="s2">&quot;name&quot;</span><span class="p">:</span> <span class="n">base_domain</span><span class="p">,</span> <span class="s2">&quot;type&quot;</span><span class="p">:</span> <span class="kc">None</span><span class="p">}</span>
<span class="k">return</span> <span class="n">service</span></div>
<div class="viewcode-block" id="get_ip_address_info">
<a class="viewcode-back" href="../../api.html#parsedmarc.utils.get_ip_address_info">[docs]</a>
<span class="k">def</span><span class="w"> </span><span class="nf">get_ip_address_info</span><span class="p">(</span>
<span class="n">ip_address</span><span class="p">,</span>
<span class="o">*</span><span class="p">,</span>
<span class="n">ip_db_path</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="nb">str</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span>
<span class="n">reverse_dns_map_path</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="nb">str</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span>
<span class="n">always_use_local_files</span><span class="p">:</span> <span class="nb">bool</span> <span class="o">=</span> <span class="kc">False</span><span class="p">,</span>
<span class="n">reverse_dns_map_url</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="nb">str</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span>
<span class="n">cache</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="n">ExpiringDict</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span>
<span class="n">reverse_dns_map</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="n">ReverseDNSMap</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span>
<span class="n">offline</span><span class="p">:</span> <span class="nb">bool</span> <span class="o">=</span> <span class="kc">False</span><span class="p">,</span>
<span class="n">nameservers</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="nb">list</span><span class="p">[</span><span class="nb">str</span><span class="p">]]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span>
<span class="n">timeout</span><span class="p">:</span> <span class="nb">float</span> <span class="o">=</span> <span class="n">DEFAULT_DNS_TIMEOUT</span><span class="p">,</span>
<span class="n">retries</span><span class="p">:</span> <span class="nb">int</span> <span class="o">=</span> <span class="n">DEFAULT_DNS_MAX_RETRIES</span><span class="p">,</span>
<span class="p">)</span> <span class="o">-&gt;</span> <span class="n">IPAddressInfo</span><span class="p">:</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Returns reverse DNS and country information for the given IP address</span>
<span class="sd"> Args:</span>
<span class="sd"> ip_address (str): The IP address to check</span>
<span class="sd"> ip_db_path (str): path to a MMDB file from MaxMind or DBIP</span>
<span class="sd"> reverse_dns_map_path (str): Path to a reverse DNS map file</span>
<span class="sd"> reverse_dns_map_url (str): URL to the reverse DNS map file</span>
<span class="sd"> always_use_local_files (bool): Do not download files</span>
<span class="sd"> cache (ExpiringDict): Cache storage</span>
<span class="sd"> reverse_dns_map (dict): A reverse DNS map</span>
<span class="sd"> offline (bool): Do not make online queries for geolocation or DNS</span>
<span class="sd"> nameservers (list): A list of one or more nameservers to use</span>
<span class="sd"> (Cloudflare&#39;s public DNS resolvers by default)</span>
<span class="sd"> timeout (float): Sets the DNS timeout in seconds</span>
<span class="sd"> retries (int): Number of times to retry on timeout or other transient</span>
<span class="sd"> errors</span>
<span class="sd"> Returns:</span>
<span class="sd"> dict: ``ip_address``, ``reverse_dns``, ``country``</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="n">ip_address</span> <span class="o">=</span> <span class="n">ip_address</span><span class="o">.</span><span class="n">lower</span><span class="p">()</span>
<span class="k">if</span> <span class="n">cache</span> <span class="ow">is</span> <span class="ow">not</span> <span class="kc">None</span><span class="p">:</span>
<span class="n">cached_info</span> <span class="o">=</span> <span class="n">cache</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="n">ip_address</span><span class="p">,</span> <span class="kc">None</span><span class="p">)</span>
<span class="k">if</span> <span class="p">(</span>
<span class="n">cached_info</span>
<span class="ow">and</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">cached_info</span><span class="p">,</span> <span class="nb">dict</span><span class="p">)</span>
<span class="ow">and</span> <span class="s2">&quot;ip_address&quot;</span> <span class="ow">in</span> <span class="n">cached_info</span>
<span class="p">):</span>
<span class="n">logger</span><span class="o">.</span><span class="n">debug</span><span class="p">(</span><span class="sa">f</span><span class="s2">&quot;IP address </span><span class="si">{</span><span class="n">ip_address</span><span class="si">}</span><span class="s2"> was found in cache&quot;</span><span class="p">)</span>
<span class="k">return</span> <span class="n">cast</span><span class="p">(</span><span class="n">IPAddressInfo</span><span class="p">,</span> <span class="n">cached_info</span><span class="p">)</span>
<span class="n">info</span><span class="p">:</span> <span class="n">IPAddressInfo</span> <span class="o">=</span> <span class="p">{</span>
<span class="s2">&quot;ip_address&quot;</span><span class="p">:</span> <span class="n">ip_address</span><span class="p">,</span>
<span class="s2">&quot;reverse_dns&quot;</span><span class="p">:</span> <span class="kc">None</span><span class="p">,</span>
<span class="s2">&quot;country&quot;</span><span class="p">:</span> <span class="kc">None</span><span class="p">,</span>
<span class="s2">&quot;base_domain&quot;</span><span class="p">:</span> <span class="kc">None</span><span class="p">,</span>
<span class="s2">&quot;name&quot;</span><span class="p">:</span> <span class="kc">None</span><span class="p">,</span>
<span class="s2">&quot;type&quot;</span><span class="p">:</span> <span class="kc">None</span><span class="p">,</span>
<span class="s2">&quot;asn&quot;</span><span class="p">:</span> <span class="kc">None</span><span class="p">,</span>
<span class="s2">&quot;as_name&quot;</span><span class="p">:</span> <span class="kc">None</span><span class="p">,</span>
<span class="s2">&quot;as_domain&quot;</span><span class="p">:</span> <span class="kc">None</span><span class="p">,</span>
<span class="p">}</span>
<span class="k">if</span> <span class="n">offline</span><span class="p">:</span>
<span class="n">reverse_dns</span> <span class="o">=</span> <span class="kc">None</span>
<span class="k">else</span><span class="p">:</span>
<span class="n">reverse_dns</span> <span class="o">=</span> <span class="n">get_reverse_dns</span><span class="p">(</span>
<span class="n">ip_address</span><span class="p">,</span>
<span class="n">nameservers</span><span class="o">=</span><span class="n">nameservers</span><span class="p">,</span>
<span class="n">timeout</span><span class="o">=</span><span class="n">timeout</span><span class="p">,</span>
<span class="n">retries</span><span class="o">=</span><span class="n">retries</span><span class="p">,</span>
<span class="p">)</span>
<span class="n">db_record</span> <span class="o">=</span> <span class="n">get_ip_address_db_record</span><span class="p">(</span><span class="n">ip_address</span><span class="p">,</span> <span class="n">db_path</span><span class="o">=</span><span class="n">ip_db_path</span><span class="p">)</span>
<span class="n">info</span><span class="p">[</span><span class="s2">&quot;country&quot;</span><span class="p">]</span> <span class="o">=</span> <span class="n">db_record</span><span class="p">[</span><span class="s2">&quot;country&quot;</span><span class="p">]</span>
<span class="n">info</span><span class="p">[</span><span class="s2">&quot;asn&quot;</span><span class="p">]</span> <span class="o">=</span> <span class="n">db_record</span><span class="p">[</span><span class="s2">&quot;asn&quot;</span><span class="p">]</span>
<span class="n">info</span><span class="p">[</span><span class="s2">&quot;as_name&quot;</span><span class="p">]</span> <span class="o">=</span> <span class="n">db_record</span><span class="p">[</span><span class="s2">&quot;as_name&quot;</span><span class="p">]</span>
<span class="n">info</span><span class="p">[</span><span class="s2">&quot;as_domain&quot;</span><span class="p">]</span> <span class="o">=</span> <span class="n">db_record</span><span class="p">[</span><span class="s2">&quot;as_domain&quot;</span><span class="p">]</span>
<span class="n">info</span><span class="p">[</span><span class="s2">&quot;reverse_dns&quot;</span><span class="p">]</span> <span class="o">=</span> <span class="n">reverse_dns</span>
<span class="k">if</span> <span class="n">reverse_dns</span> <span class="ow">is</span> <span class="ow">not</span> <span class="kc">None</span><span class="p">:</span>
<span class="n">base_domain</span> <span class="o">=</span> <span class="n">get_base_domain</span><span class="p">(</span><span class="n">reverse_dns</span><span class="p">)</span>
<span class="k">if</span> <span class="n">base_domain</span> <span class="ow">is</span> <span class="ow">not</span> <span class="kc">None</span><span class="p">:</span>
<span class="n">service</span> <span class="o">=</span> <span class="n">get_service_from_reverse_dns_base_domain</span><span class="p">(</span>
<span class="n">base_domain</span><span class="p">,</span>
<span class="n">offline</span><span class="o">=</span><span class="n">offline</span><span class="p">,</span>
<span class="n">local_file_path</span><span class="o">=</span><span class="n">reverse_dns_map_path</span><span class="p">,</span>
<span class="n">url</span><span class="o">=</span><span class="n">reverse_dns_map_url</span><span class="p">,</span>
<span class="n">always_use_local_file</span><span class="o">=</span><span class="n">always_use_local_files</span><span class="p">,</span>
<span class="n">reverse_dns_map</span><span class="o">=</span><span class="n">reverse_dns_map</span><span class="p">,</span>
<span class="p">)</span>
<span class="n">info</span><span class="p">[</span><span class="s2">&quot;base_domain&quot;</span><span class="p">]</span> <span class="o">=</span> <span class="n">base_domain</span>
<span class="n">info</span><span class="p">[</span><span class="s2">&quot;type&quot;</span><span class="p">]</span> <span class="o">=</span> <span class="n">service</span><span class="p">[</span><span class="s2">&quot;type&quot;</span><span class="p">]</span>
<span class="n">info</span><span class="p">[</span><span class="s2">&quot;name&quot;</span><span class="p">]</span> <span class="o">=</span> <span class="n">service</span><span class="p">[</span><span class="s2">&quot;name&quot;</span><span class="p">]</span>
<span class="k">else</span><span class="p">:</span>
<span class="n">logger</span><span class="o">.</span><span class="n">debug</span><span class="p">(</span><span class="sa">f</span><span class="s2">&quot;IP address </span><span class="si">{</span><span class="n">ip_address</span><span class="si">}</span><span class="s2"> reverse_dns not found&quot;</span><span class="p">)</span>
<span class="c1"># Fall back to ASN data for source attribution. ``reverse_dns`` and</span>
<span class="c1"># ``base_domain`` are left null so consumers can still tell an</span>
<span class="c1"># ASN-derived row apart from one resolved via a real PTR.</span>
<span class="n">map_value</span><span class="p">:</span> <span class="n">ReverseDNSMap</span> <span class="o">=</span> <span class="p">(</span>
<span class="n">reverse_dns_map</span> <span class="k">if</span> <span class="n">reverse_dns_map</span> <span class="ow">is</span> <span class="ow">not</span> <span class="kc">None</span> <span class="k">else</span> <span class="p">{}</span>
<span class="p">)</span>
<span class="k">if</span> <span class="nb">len</span><span class="p">(</span><span class="n">map_value</span><span class="p">)</span> <span class="o">==</span> <span class="mi">0</span><span class="p">:</span>
<span class="n">load_reverse_dns_map</span><span class="p">(</span>
<span class="n">map_value</span><span class="p">,</span>
<span class="n">always_use_local_file</span><span class="o">=</span><span class="n">always_use_local_files</span><span class="p">,</span>
<span class="n">local_file_path</span><span class="o">=</span><span class="n">reverse_dns_map_path</span><span class="p">,</span>
<span class="n">url</span><span class="o">=</span><span class="n">reverse_dns_map_url</span><span class="p">,</span>
<span class="n">offline</span><span class="o">=</span><span class="n">offline</span><span class="p">,</span>
<span class="p">)</span>
<span class="k">if</span> <span class="n">info</span><span class="p">[</span><span class="s2">&quot;as_domain&quot;</span><span class="p">]</span> <span class="ow">and</span> <span class="n">info</span><span class="p">[</span><span class="s2">&quot;as_domain&quot;</span><span class="p">]</span> <span class="ow">in</span> <span class="n">map_value</span><span class="p">:</span>
<span class="n">service</span> <span class="o">=</span> <span class="n">map_value</span><span class="p">[</span><span class="n">info</span><span class="p">[</span><span class="s2">&quot;as_domain&quot;</span><span class="p">]]</span>
<span class="n">info</span><span class="p">[</span><span class="s2">&quot;name&quot;</span><span class="p">]</span> <span class="o">=</span> <span class="n">service</span><span class="p">[</span><span class="s2">&quot;name&quot;</span><span class="p">]</span>
<span class="n">info</span><span class="p">[</span><span class="s2">&quot;type&quot;</span><span class="p">]</span> <span class="o">=</span> <span class="n">service</span><span class="p">[</span><span class="s2">&quot;type&quot;</span><span class="p">]</span>
<span class="k">elif</span> <span class="n">info</span><span class="p">[</span><span class="s2">&quot;as_name&quot;</span><span class="p">]:</span>
<span class="c1"># ASN-domain not in the map: surface the raw AS name with no</span>
<span class="c1"># classification. Better than leaving the row unattributed.</span>
<span class="n">info</span><span class="p">[</span><span class="s2">&quot;name&quot;</span><span class="p">]</span> <span class="o">=</span> <span class="n">info</span><span class="p">[</span><span class="s2">&quot;as_name&quot;</span><span class="p">]</span>
<span class="c1"># Don&#39;t cache weak-fallback attributions — rows where we had no PTR AND</span>
<span class="c1"># the ASN domain wasn&#39;t in the map, so ``name`` is just the raw ``as_name``</span>
<span class="c1"># from the MMDB. ``get_reverse_dns()`` swallows every ``DNSException`` as</span>
<span class="c1"># ``None``, so a transient PTR lookup failure (timeout, SERVFAIL, OSError)</span>
<span class="c1"># is indistinguishable from a real no-PTR case at this point. Caching the</span>
<span class="c1"># weak result would poison the 4-hour cache with a misattribution even</span>
<span class="c1"># after the PTR becomes resolvable again. Re-running on the next lookup</span>
<span class="c1"># is cheap and either produces a proper PTR-backed match or the same</span>
<span class="c1"># (still-best-effort) ASN attribution.</span>
<span class="n">weak_fallback</span> <span class="o">=</span> <span class="p">(</span>
<span class="n">info</span><span class="p">[</span><span class="s2">&quot;reverse_dns&quot;</span><span class="p">]</span> <span class="ow">is</span> <span class="kc">None</span>
<span class="ow">and</span> <span class="n">info</span><span class="p">[</span><span class="s2">&quot;type&quot;</span><span class="p">]</span> <span class="ow">is</span> <span class="kc">None</span>
<span class="ow">and</span> <span class="n">info</span><span class="p">[</span><span class="s2">&quot;name&quot;</span><span class="p">]</span> <span class="ow">is</span> <span class="ow">not</span> <span class="kc">None</span>
<span class="ow">and</span> <span class="n">info</span><span class="p">[</span><span class="s2">&quot;name&quot;</span><span class="p">]</span> <span class="o">==</span> <span class="n">info</span><span class="p">[</span><span class="s2">&quot;as_name&quot;</span><span class="p">]</span>
<span class="p">)</span>
<span class="k">if</span> <span class="n">cache</span> <span class="ow">is</span> <span class="ow">not</span> <span class="kc">None</span> <span class="ow">and</span> <span class="ow">not</span> <span class="n">weak_fallback</span><span class="p">:</span>
<span class="n">cache</span><span class="p">[</span><span class="n">ip_address</span><span class="p">]</span> <span class="o">=</span> <span class="n">info</span>
<span class="n">logger</span><span class="o">.</span><span class="n">debug</span><span class="p">(</span><span class="sa">f</span><span class="s2">&quot;IP address </span><span class="si">{</span><span class="n">ip_address</span><span class="si">}</span><span class="s2"> added to cache&quot;</span><span class="p">)</span>
<span class="k">return</span> <span class="n">info</span></div>
<span class="k">def</span><span class="w"> </span><span class="nf">parse_email_address</span><span class="p">(</span><span class="n">original_address</span><span class="p">:</span> <span class="nb">str</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="nb">dict</span><span class="p">[</span><span class="nb">str</span><span class="p">,</span> <span class="n">Optional</span><span class="p">[</span><span class="nb">str</span><span class="p">]]:</span>
<span class="k">if</span> <span class="n">original_address</span><span class="p">[</span><span class="mi">0</span><span class="p">]</span> <span class="o">==</span> <span class="s2">&quot;&quot;</span><span class="p">:</span>
<span class="n">display_name</span> <span class="o">=</span> <span class="kc">None</span>
<span class="k">else</span><span class="p">:</span>
<span class="n">display_name</span> <span class="o">=</span> <span class="n">original_address</span><span class="p">[</span><span class="mi">0</span><span class="p">]</span>
<span class="n">address</span> <span class="o">=</span> <span class="n">original_address</span><span class="p">[</span><span class="mi">1</span><span class="p">]</span>
<span class="n">address_parts</span> <span class="o">=</span> <span class="n">address</span><span class="o">.</span><span class="n">split</span><span class="p">(</span><span class="s2">&quot;@&quot;</span><span class="p">)</span>
<span class="n">local</span> <span class="o">=</span> <span class="kc">None</span>
<span class="n">domain</span> <span class="o">=</span> <span class="kc">None</span>
<span class="k">if</span> <span class="nb">len</span><span class="p">(</span><span class="n">address_parts</span><span class="p">)</span> <span class="o">&gt;</span> <span class="mi">1</span><span class="p">:</span>
<span class="n">local</span> <span class="o">=</span> <span class="n">address_parts</span><span class="p">[</span><span class="mi">0</span><span class="p">]</span><span class="o">.</span><span class="n">lower</span><span class="p">()</span>
<span class="n">domain</span> <span class="o">=</span> <span class="n">address_parts</span><span class="p">[</span><span class="o">-</span><span class="mi">1</span><span class="p">]</span><span class="o">.</span><span class="n">lower</span><span class="p">()</span>
<span class="k">return</span> <span class="p">{</span>
<span class="s2">&quot;display_name&quot;</span><span class="p">:</span> <span class="n">display_name</span><span class="p">,</span>
<span class="s2">&quot;address&quot;</span><span class="p">:</span> <span class="n">address</span><span class="p">,</span>
<span class="s2">&quot;local&quot;</span><span class="p">:</span> <span class="n">local</span><span class="p">,</span>
<span class="s2">&quot;domain&quot;</span><span class="p">:</span> <span class="n">domain</span><span class="p">,</span>
<span class="p">}</span>
<div class="viewcode-block" id="get_filename_safe_string">
<a class="viewcode-back" href="../../api.html#parsedmarc.utils.get_filename_safe_string">[docs]</a>
<span class="k">def</span><span class="w"> </span><span class="nf">get_filename_safe_string</span><span class="p">(</span><span class="n">string</span><span class="p">:</span> <span class="nb">str</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="nb">str</span><span class="p">:</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Converts a string to a string that is safe for a filename</span>
<span class="sd"> Args:</span>
<span class="sd"> string (str): A string to make safe for a filename</span>
<span class="sd"> Returns:</span>
<span class="sd"> str: A string safe for a filename</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="n">invalid_filename_chars</span> <span class="o">=</span> <span class="p">[</span><span class="s2">&quot;</span><span class="se">\\</span><span class="s2">&quot;</span><span class="p">,</span> <span class="s2">&quot;/&quot;</span><span class="p">,</span> <span class="s2">&quot;:&quot;</span><span class="p">,</span> <span class="s1">&#39;&quot;&#39;</span><span class="p">,</span> <span class="s2">&quot;*&quot;</span><span class="p">,</span> <span class="s2">&quot;?&quot;</span><span class="p">,</span> <span class="s2">&quot;|&quot;</span><span class="p">,</span> <span class="s2">&quot;</span><span class="se">\n</span><span class="s2">&quot;</span><span class="p">,</span> <span class="s2">&quot;</span><span class="se">\r</span><span class="s2">&quot;</span><span class="p">]</span>
<span class="k">if</span> <span class="n">string</span> <span class="ow">is</span> <span class="kc">None</span><span class="p">:</span>
<span class="n">string</span> <span class="o">=</span> <span class="s2">&quot;None&quot;</span>
<span class="k">for</span> <span class="n">char</span> <span class="ow">in</span> <span class="n">invalid_filename_chars</span><span class="p">:</span>
<span class="n">string</span> <span class="o">=</span> <span class="n">string</span><span class="o">.</span><span class="n">replace</span><span class="p">(</span><span class="n">char</span><span class="p">,</span> <span class="s2">&quot;&quot;</span><span class="p">)</span>
<span class="n">string</span> <span class="o">=</span> <span class="n">string</span><span class="o">.</span><span class="n">rstrip</span><span class="p">(</span><span class="s2">&quot;.&quot;</span><span class="p">)</span>
<span class="n">string</span> <span class="o">=</span> <span class="p">(</span><span class="n">string</span><span class="p">[:</span><span class="mi">100</span><span class="p">])</span> <span class="k">if</span> <span class="nb">len</span><span class="p">(</span><span class="n">string</span><span class="p">)</span> <span class="o">&gt;</span> <span class="mi">100</span> <span class="k">else</span> <span class="n">string</span>
<span class="k">return</span> <span class="n">string</span></div>
<div class="viewcode-block" id="is_mbox">
<a class="viewcode-back" href="../../api.html#parsedmarc.utils.is_mbox">[docs]</a>
<span class="k">def</span><span class="w"> </span><span class="nf">is_mbox</span><span class="p">(</span><span class="n">path</span><span class="p">:</span> <span class="nb">str</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="nb">bool</span><span class="p">:</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Checks if the given content is an MBOX mailbox file</span>
<span class="sd"> Args:</span>
<span class="sd"> path: Content to check</span>
<span class="sd"> Returns:</span>
<span class="sd"> bool: A flag that indicates if the file is an MBOX mailbox file</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="n">_is_mbox</span> <span class="o">=</span> <span class="kc">False</span>
<span class="k">try</span><span class="p">:</span>
<span class="n">mbox</span> <span class="o">=</span> <span class="n">mailbox</span><span class="o">.</span><span class="n">mbox</span><span class="p">(</span><span class="n">path</span><span class="p">)</span>
<span class="k">if</span> <span class="nb">len</span><span class="p">(</span><span class="n">mbox</span><span class="o">.</span><span class="n">keys</span><span class="p">())</span> <span class="o">&gt;</span> <span class="mi">0</span><span class="p">:</span>
<span class="n">_is_mbox</span> <span class="o">=</span> <span class="kc">True</span>
<span class="k">except</span> <span class="ne">Exception</span> <span class="k">as</span> <span class="n">e</span><span class="p">:</span>
<span class="n">logger</span><span class="o">.</span><span class="n">debug</span><span class="p">(</span><span class="s2">&quot;Error checking for MBOX file: </span><span class="si">{0}</span><span class="s2">&quot;</span><span class="o">.</span><span class="n">format</span><span class="p">(</span><span class="n">e</span><span class="o">.</span><span class="fm">__str__</span><span class="p">()))</span>
<span class="k">return</span> <span class="n">_is_mbox</span></div>
<div class="viewcode-block" id="is_outlook_msg">
<a class="viewcode-back" href="../../api.html#parsedmarc.utils.is_outlook_msg">[docs]</a>
<span class="k">def</span><span class="w"> </span><span class="nf">is_outlook_msg</span><span class="p">(</span><span class="n">content</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="nb">bool</span><span class="p">:</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Checks if the given content is an Outlook msg OLE/MSG file</span>
<span class="sd"> Args:</span>
<span class="sd"> content: Content to check</span>
<span class="sd"> Returns:</span>
<span class="sd"> bool: A flag that indicates if the file is an Outlook MSG file</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="k">return</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">content</span><span class="p">,</span> <span class="nb">bytes</span><span class="p">)</span> <span class="ow">and</span> <span class="n">content</span><span class="o">.</span><span class="n">startswith</span><span class="p">(</span>
<span class="sa">b</span><span class="s2">&quot;</span><span class="se">\xd0\xcf\x11\xe0\xa1\xb1\x1a\xe1</span><span class="s2">&quot;</span>
<span class="p">)</span></div>
<div class="viewcode-block" id="convert_outlook_msg">
<a class="viewcode-back" href="../../api.html#parsedmarc.utils.convert_outlook_msg">[docs]</a>
<span class="k">def</span><span class="w"> </span><span class="nf">convert_outlook_msg</span><span class="p">(</span><span class="n">msg_bytes</span><span class="p">:</span> <span class="nb">bytes</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="nb">bytes</span><span class="p">:</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Uses the ``msgconvert`` Perl utility to convert an Outlook MS file to</span>
<span class="sd"> standard RFC 822 format</span>
<span class="sd"> Args:</span>
<span class="sd"> msg_bytes (bytes): the content of the .msg file</span>
<span class="sd"> Returns:</span>
<span class="sd"> A RFC 822 bytes payload</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="k">if</span> <span class="ow">not</span> <span class="n">is_outlook_msg</span><span class="p">(</span><span class="n">msg_bytes</span><span class="p">):</span>
<span class="k">raise</span> <span class="ne">ValueError</span><span class="p">(</span><span class="s2">&quot;The supplied bytes are not an Outlook MSG file&quot;</span><span class="p">)</span>
<span class="n">orig_dir</span> <span class="o">=</span> <span class="n">os</span><span class="o">.</span><span class="n">getcwd</span><span class="p">()</span>
<span class="n">tmp_dir</span> <span class="o">=</span> <span class="n">tempfile</span><span class="o">.</span><span class="n">mkdtemp</span><span class="p">()</span>
<span class="n">os</span><span class="o">.</span><span class="n">chdir</span><span class="p">(</span><span class="n">tmp_dir</span><span class="p">)</span>
<span class="k">with</span> <span class="nb">open</span><span class="p">(</span><span class="s2">&quot;sample.msg&quot;</span><span class="p">,</span> <span class="s2">&quot;wb&quot;</span><span class="p">)</span> <span class="k">as</span> <span class="n">msg_file</span><span class="p">:</span>
<span class="n">msg_file</span><span class="o">.</span><span class="n">write</span><span class="p">(</span><span class="n">msg_bytes</span><span class="p">)</span>
<span class="k">try</span><span class="p">:</span>
<span class="n">subprocess</span><span class="o">.</span><span class="n">check_call</span><span class="p">(</span>
<span class="p">[</span><span class="s2">&quot;msgconvert&quot;</span><span class="p">,</span> <span class="s2">&quot;sample.msg&quot;</span><span class="p">],</span> <span class="n">stdout</span><span class="o">=</span><span class="n">null_file</span><span class="p">,</span> <span class="n">stderr</span><span class="o">=</span><span class="n">null_file</span>
<span class="p">)</span>
<span class="n">eml_path</span> <span class="o">=</span> <span class="s2">&quot;sample.eml&quot;</span>
<span class="k">with</span> <span class="nb">open</span><span class="p">(</span><span class="n">eml_path</span><span class="p">,</span> <span class="s2">&quot;rb&quot;</span><span class="p">)</span> <span class="k">as</span> <span class="n">eml_file</span><span class="p">:</span>
<span class="n">rfc822</span> <span class="o">=</span> <span class="n">eml_file</span><span class="o">.</span><span class="n">read</span><span class="p">()</span>
<span class="k">except</span> <span class="ne">FileNotFoundError</span><span class="p">:</span>
<span class="k">raise</span> <span class="n">EmailParserError</span><span class="p">(</span>
<span class="s2">&quot;Failed to convert Outlook MSG: msgconvert utility not found&quot;</span>
<span class="p">)</span>
<span class="k">finally</span><span class="p">:</span>
<span class="n">os</span><span class="o">.</span><span class="n">chdir</span><span class="p">(</span><span class="n">orig_dir</span><span class="p">)</span>
<span class="n">shutil</span><span class="o">.</span><span class="n">rmtree</span><span class="p">(</span><span class="n">tmp_dir</span><span class="p">)</span>
<span class="k">return</span> <span class="n">rfc822</span></div>
<div class="viewcode-block" id="parse_email">
<a class="viewcode-back" href="../../api.html#parsedmarc.utils.parse_email">[docs]</a>
<span class="k">def</span><span class="w"> </span><span class="nf">parse_email</span><span class="p">(</span>
<span class="n">data</span><span class="p">:</span> <span class="n">Union</span><span class="p">[</span><span class="nb">bytes</span><span class="p">,</span> <span class="nb">str</span><span class="p">],</span> <span class="o">*</span><span class="p">,</span> <span class="n">strip_attachment_payloads</span><span class="p">:</span> <span class="nb">bool</span> <span class="o">=</span> <span class="kc">False</span>
<span class="p">)</span> <span class="o">-&gt;</span> <span class="nb">dict</span><span class="p">:</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> A simplified email parser</span>
<span class="sd"> Args:</span>
<span class="sd"> data: The RFC 822 message string, or MSG binary</span>
<span class="sd"> strip_attachment_payloads (bool): Remove attachment payloads</span>
<span class="sd"> Returns:</span>
<span class="sd"> dict: Parsed email data</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="k">if</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">data</span><span class="p">,</span> <span class="nb">bytes</span><span class="p">):</span>
<span class="k">if</span> <span class="n">is_outlook_msg</span><span class="p">(</span><span class="n">data</span><span class="p">):</span>
<span class="n">data</span> <span class="o">=</span> <span class="n">convert_outlook_msg</span><span class="p">(</span><span class="n">data</span><span class="p">)</span>
<span class="n">data</span> <span class="o">=</span> <span class="n">data</span><span class="o">.</span><span class="n">decode</span><span class="p">(</span><span class="s2">&quot;utf-8&quot;</span><span class="p">,</span> <span class="n">errors</span><span class="o">=</span><span class="s2">&quot;replace&quot;</span><span class="p">)</span>
<span class="n">parsed_email</span> <span class="o">=</span> <span class="n">mailparser</span><span class="o">.</span><span class="n">parse_from_string</span><span class="p">(</span><span class="n">data</span><span class="p">)</span>
<span class="n">headers</span> <span class="o">=</span> <span class="n">json</span><span class="o">.</span><span class="n">loads</span><span class="p">(</span><span class="n">parsed_email</span><span class="o">.</span><span class="n">headers_json</span><span class="p">)</span><span class="o">.</span><span class="n">copy</span><span class="p">()</span>
<span class="n">parsed_email</span> <span class="o">=</span> <span class="n">json</span><span class="o">.</span><span class="n">loads</span><span class="p">(</span><span class="n">parsed_email</span><span class="o">.</span><span class="n">mail_json</span><span class="p">)</span><span class="o">.</span><span class="n">copy</span><span class="p">()</span>
<span class="n">parsed_email</span><span class="p">[</span><span class="s2">&quot;headers&quot;</span><span class="p">]</span> <span class="o">=</span> <span class="n">headers</span>
<span class="k">if</span> <span class="s2">&quot;received&quot;</span> <span class="ow">in</span> <span class="n">parsed_email</span><span class="p">:</span>
<span class="k">for</span> <span class="n">received</span> <span class="ow">in</span> <span class="n">parsed_email</span><span class="p">[</span><span class="s2">&quot;received&quot;</span><span class="p">]:</span>
<span class="k">if</span> <span class="s2">&quot;date_utc&quot;</span> <span class="ow">in</span> <span class="n">received</span><span class="p">:</span>
<span class="k">if</span> <span class="n">received</span><span class="p">[</span><span class="s2">&quot;date_utc&quot;</span><span class="p">]</span> <span class="ow">is</span> <span class="kc">None</span><span class="p">:</span>
<span class="k">del</span> <span class="n">received</span><span class="p">[</span><span class="s2">&quot;date_utc&quot;</span><span class="p">]</span>
<span class="k">else</span><span class="p">:</span>
<span class="n">received</span><span class="p">[</span><span class="s2">&quot;date_utc&quot;</span><span class="p">]</span> <span class="o">=</span> <span class="n">received</span><span class="p">[</span><span class="s2">&quot;date_utc&quot;</span><span class="p">]</span><span class="o">.</span><span class="n">replace</span><span class="p">(</span><span class="s2">&quot;T&quot;</span><span class="p">,</span> <span class="s2">&quot; &quot;</span><span class="p">)</span>
<span class="k">if</span> <span class="s2">&quot;from&quot;</span> <span class="ow">not</span> <span class="ow">in</span> <span class="n">parsed_email</span><span class="p">:</span>
<span class="k">if</span> <span class="s2">&quot;From&quot;</span> <span class="ow">in</span> <span class="n">parsed_email</span><span class="p">[</span><span class="s2">&quot;headers&quot;</span><span class="p">]:</span>
<span class="n">parsed_email</span><span class="p">[</span><span class="s2">&quot;from&quot;</span><span class="p">]</span> <span class="o">=</span> <span class="n">parsed_email</span><span class="p">[</span><span class="s2">&quot;Headers&quot;</span><span class="p">][</span><span class="s2">&quot;From&quot;</span><span class="p">]</span>
<span class="k">else</span><span class="p">:</span>
<span class="n">parsed_email</span><span class="p">[</span><span class="s2">&quot;from&quot;</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span>
<span class="k">if</span> <span class="n">parsed_email</span><span class="p">[</span><span class="s2">&quot;from&quot;</span><span class="p">]</span> <span class="ow">is</span> <span class="ow">not</span> <span class="kc">None</span><span class="p">:</span>
<span class="n">parsed_email</span><span class="p">[</span><span class="s2">&quot;from&quot;</span><span class="p">]</span> <span class="o">=</span> <span class="n">parse_email_address</span><span class="p">(</span><span class="n">parsed_email</span><span class="p">[</span><span class="s2">&quot;from&quot;</span><span class="p">][</span><span class="mi">0</span><span class="p">])</span>
<span class="k">if</span> <span class="s2">&quot;date&quot;</span> <span class="ow">in</span> <span class="n">parsed_email</span><span class="p">:</span>
<span class="n">parsed_email</span><span class="p">[</span><span class="s2">&quot;date&quot;</span><span class="p">]</span> <span class="o">=</span> <span class="n">parsed_email</span><span class="p">[</span><span class="s2">&quot;date&quot;</span><span class="p">]</span><span class="o">.</span><span class="n">replace</span><span class="p">(</span><span class="s2">&quot;T&quot;</span><span class="p">,</span> <span class="s2">&quot; &quot;</span><span class="p">)</span>
<span class="k">else</span><span class="p">:</span>
<span class="n">parsed_email</span><span class="p">[</span><span class="s2">&quot;date&quot;</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span>
<span class="c1"># mailparser&#39;s mail_json names these headers with hyphens</span>
<span class="c1"># (&quot;reply-to&quot;, &quot;delivered-to&quot;), not underscores. Reading the</span>
<span class="c1"># underscored key always missed, so every Reply-To address was</span>
<span class="c1"># silently dropped. Convert under the underscored name consumers</span>
<span class="c1"># expect and drop the raw hyphenated key so the body carries a</span>
<span class="c1"># single representation, matching how &quot;to&quot;/&quot;cc&quot;/&quot;bcc&quot; are handled.</span>
<span class="k">if</span> <span class="s2">&quot;reply-to&quot;</span> <span class="ow">in</span> <span class="n">parsed_email</span><span class="p">:</span>
<span class="n">parsed_email</span><span class="p">[</span><span class="s2">&quot;reply_to&quot;</span><span class="p">]</span> <span class="o">=</span> <span class="nb">list</span><span class="p">(</span>
<span class="nb">map</span><span class="p">(</span><span class="k">lambda</span> <span class="n">x</span><span class="p">:</span> <span class="n">parse_email_address</span><span class="p">(</span><span class="n">x</span><span class="p">),</span> <span class="n">parsed_email</span><span class="o">.</span><span class="n">pop</span><span class="p">(</span><span class="s2">&quot;reply-to&quot;</span><span class="p">))</span>
<span class="p">)</span>
<span class="k">else</span><span class="p">:</span>
<span class="n">parsed_email</span><span class="p">[</span><span class="s2">&quot;reply_to&quot;</span><span class="p">]</span> <span class="o">=</span> <span class="p">[]</span>
<span class="k">if</span> <span class="s2">&quot;to&quot;</span> <span class="ow">in</span> <span class="n">parsed_email</span><span class="p">:</span>
<span class="n">parsed_email</span><span class="p">[</span><span class="s2">&quot;to&quot;</span><span class="p">]</span> <span class="o">=</span> <span class="nb">list</span><span class="p">(</span>
<span class="nb">map</span><span class="p">(</span><span class="k">lambda</span> <span class="n">x</span><span class="p">:</span> <span class="n">parse_email_address</span><span class="p">(</span><span class="n">x</span><span class="p">),</span> <span class="n">parsed_email</span><span class="p">[</span><span class="s2">&quot;to&quot;</span><span class="p">])</span>
<span class="p">)</span>
<span class="k">else</span><span class="p">:</span>
<span class="n">parsed_email</span><span class="p">[</span><span class="s2">&quot;to&quot;</span><span class="p">]</span> <span class="o">=</span> <span class="p">[]</span>
<span class="k">if</span> <span class="s2">&quot;cc&quot;</span> <span class="ow">in</span> <span class="n">parsed_email</span><span class="p">:</span>
<span class="n">parsed_email</span><span class="p">[</span><span class="s2">&quot;cc&quot;</span><span class="p">]</span> <span class="o">=</span> <span class="nb">list</span><span class="p">(</span>
<span class="nb">map</span><span class="p">(</span><span class="k">lambda</span> <span class="n">x</span><span class="p">:</span> <span class="n">parse_email_address</span><span class="p">(</span><span class="n">x</span><span class="p">),</span> <span class="n">parsed_email</span><span class="p">[</span><span class="s2">&quot;cc&quot;</span><span class="p">])</span>
<span class="p">)</span>
<span class="k">else</span><span class="p">:</span>
<span class="n">parsed_email</span><span class="p">[</span><span class="s2">&quot;cc&quot;</span><span class="p">]</span> <span class="o">=</span> <span class="p">[]</span>
<span class="k">if</span> <span class="s2">&quot;bcc&quot;</span> <span class="ow">in</span> <span class="n">parsed_email</span><span class="p">:</span>
<span class="n">parsed_email</span><span class="p">[</span><span class="s2">&quot;bcc&quot;</span><span class="p">]</span> <span class="o">=</span> <span class="nb">list</span><span class="p">(</span>
<span class="nb">map</span><span class="p">(</span><span class="k">lambda</span> <span class="n">x</span><span class="p">:</span> <span class="n">parse_email_address</span><span class="p">(</span><span class="n">x</span><span class="p">),</span> <span class="n">parsed_email</span><span class="p">[</span><span class="s2">&quot;bcc&quot;</span><span class="p">])</span>
<span class="p">)</span>
<span class="k">else</span><span class="p">:</span>
<span class="n">parsed_email</span><span class="p">[</span><span class="s2">&quot;bcc&quot;</span><span class="p">]</span> <span class="o">=</span> <span class="p">[]</span>
<span class="k">if</span> <span class="s2">&quot;delivered-to&quot;</span> <span class="ow">in</span> <span class="n">parsed_email</span><span class="p">:</span>
<span class="n">parsed_email</span><span class="p">[</span><span class="s2">&quot;delivered_to&quot;</span><span class="p">]</span> <span class="o">=</span> <span class="nb">list</span><span class="p">(</span>
<span class="nb">map</span><span class="p">(</span><span class="k">lambda</span> <span class="n">x</span><span class="p">:</span> <span class="n">parse_email_address</span><span class="p">(</span><span class="n">x</span><span class="p">),</span> <span class="n">parsed_email</span><span class="o">.</span><span class="n">pop</span><span class="p">(</span><span class="s2">&quot;delivered-to&quot;</span><span class="p">))</span>
<span class="p">)</span>
<span class="k">if</span> <span class="s2">&quot;attachments&quot;</span> <span class="ow">not</span> <span class="ow">in</span> <span class="n">parsed_email</span><span class="p">:</span>
<span class="n">parsed_email</span><span class="p">[</span><span class="s2">&quot;attachments&quot;</span><span class="p">]</span> <span class="o">=</span> <span class="p">[]</span>
<span class="k">else</span><span class="p">:</span>
<span class="k">for</span> <span class="n">attachment</span> <span class="ow">in</span> <span class="n">parsed_email</span><span class="p">[</span><span class="s2">&quot;attachments&quot;</span><span class="p">]:</span>
<span class="k">if</span> <span class="s2">&quot;payload&quot;</span> <span class="ow">in</span> <span class="n">attachment</span><span class="p">:</span>
<span class="n">payload</span> <span class="o">=</span> <span class="n">attachment</span><span class="p">[</span><span class="s2">&quot;payload&quot;</span><span class="p">]</span>
<span class="k">try</span><span class="p">:</span>
<span class="k">if</span> <span class="s2">&quot;content_transfer_encoding&quot;</span> <span class="ow">in</span> <span class="n">attachment</span><span class="p">:</span>
<span class="k">if</span> <span class="n">attachment</span><span class="p">[</span><span class="s2">&quot;content_transfer_encoding&quot;</span><span class="p">]</span> <span class="o">==</span> <span class="s2">&quot;base64&quot;</span><span class="p">:</span>
<span class="n">payload</span> <span class="o">=</span> <span class="n">decode_base64</span><span class="p">(</span><span class="n">payload</span><span class="p">)</span>
<span class="k">else</span><span class="p">:</span>
<span class="n">payload</span> <span class="o">=</span> <span class="nb">str</span><span class="o">.</span><span class="n">encode</span><span class="p">(</span><span class="n">payload</span><span class="p">)</span>
<span class="n">attachment</span><span class="p">[</span><span class="s2">&quot;sha256&quot;</span><span class="p">]</span> <span class="o">=</span> <span class="n">hashlib</span><span class="o">.</span><span class="n">sha256</span><span class="p">(</span><span class="n">payload</span><span class="p">)</span><span class="o">.</span><span class="n">hexdigest</span><span class="p">()</span>
<span class="k">except</span> <span class="ne">Exception</span> <span class="k">as</span> <span class="n">e</span><span class="p">:</span>
<span class="n">logger</span><span class="o">.</span><span class="n">debug</span><span class="p">(</span><span class="s2">&quot;Unable to decode attachment: </span><span class="si">{0}</span><span class="s2">&quot;</span><span class="o">.</span><span class="n">format</span><span class="p">(</span><span class="n">e</span><span class="o">.</span><span class="fm">__str__</span><span class="p">()))</span>
<span class="k">if</span> <span class="n">strip_attachment_payloads</span><span class="p">:</span>
<span class="k">for</span> <span class="n">attachment</span> <span class="ow">in</span> <span class="n">parsed_email</span><span class="p">[</span><span class="s2">&quot;attachments&quot;</span><span class="p">]:</span>
<span class="k">if</span> <span class="s2">&quot;payload&quot;</span> <span class="ow">in</span> <span class="n">attachment</span><span class="p">:</span>
<span class="k">del</span> <span class="n">attachment</span><span class="p">[</span><span class="s2">&quot;payload&quot;</span><span class="p">]</span>
<span class="k">if</span> <span class="s2">&quot;subject&quot;</span> <span class="ow">not</span> <span class="ow">in</span> <span class="n">parsed_email</span><span class="p">:</span>
<span class="n">parsed_email</span><span class="p">[</span><span class="s2">&quot;subject&quot;</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span>
<span class="n">parsed_email</span><span class="p">[</span><span class="s2">&quot;filename_safe_subject&quot;</span><span class="p">]</span> <span class="o">=</span> <span class="n">get_filename_safe_string</span><span class="p">(</span>
<span class="n">parsed_email</span><span class="p">[</span><span class="s2">&quot;subject&quot;</span><span class="p">]</span>
<span class="p">)</span>
<span class="k">if</span> <span class="s2">&quot;body&quot;</span> <span class="ow">not</span> <span class="ow">in</span> <span class="n">parsed_email</span><span class="p">:</span>
<span class="n">parsed_email</span><span class="p">[</span><span class="s2">&quot;body&quot;</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span>
<span class="k">return</span> <span class="n">parsed_email</span></div>
</pre></div>
</div>
</div>
<footer>
<hr/>
<div role="contentinfo">
<p>&#169; Copyright 2018 - 2025, Sean Whalen and contributors.</p>
</div>
Built with <a href="https://www.sphinx-doc.org/">Sphinx</a> using a
<a href="https://github.com/readthedocs/sphinx_rtd_theme">theme</a>
provided by <a href="https://readthedocs.org">Read the Docs</a>.
</footer>
</div>
</div>
</section>
</div>
<script>
jQuery(function () {
SphinxRtdTheme.Navigation.enable(true);
});
</script>
</body>
</html>