mirror of
https://github.com/domainaware/parsedmarc.git
synced 2026-06-23 10:34:18 +00:00
1395 lines
153 KiB
HTML
1395 lines
153 KiB
HTML
|
|
|
|
<!DOCTYPE html>
|
|
<html class="writer-html5" lang="en" data-content_root="../../">
|
|
<head>
|
|
<meta charset="utf-8" />
|
|
<meta name="viewport" content="width=device-width, initial-scale=1.0" />
|
|
<title>parsedmarc.utils — parsedmarc 10.1.1 documentation</title>
|
|
<link rel="stylesheet" type="text/css" href="../../_static/pygments.css?v=b86133f3" />
|
|
<link rel="stylesheet" type="text/css" href="../../_static/css/theme.css?v=e59714d7" />
|
|
|
|
|
|
<script src="../../_static/jquery.js?v=5d32c60e"></script>
|
|
<script src="../../_static/_sphinx_javascript_frameworks_compat.js?v=2cd50e6c"></script>
|
|
<script src="../../_static/documentation_options.js?v=e6f446a4"></script>
|
|
<script src="../../_static/doctools.js?v=9bcbadda"></script>
|
|
<script src="../../_static/sphinx_highlight.js?v=dc90522c"></script>
|
|
<script src="../../_static/js/theme.js"></script>
|
|
<link rel="index" title="Index" href="../../genindex.html" />
|
|
<link rel="search" title="Search" href="../../search.html" />
|
|
</head>
|
|
|
|
<body class="wy-body-for-nav">
|
|
<div class="wy-grid-for-nav">
|
|
<nav data-toggle="wy-nav-shift" class="wy-nav-side">
|
|
<div class="wy-side-scroll">
|
|
<div class="wy-side-nav-search" >
|
|
|
|
|
|
|
|
<a href="../../index.html" class="icon icon-home">
|
|
parsedmarc
|
|
</a>
|
|
<div role="search">
|
|
<form id="rtd-search-form" class="wy-form" action="../../search.html" method="get">
|
|
<input type="text" name="q" placeholder="Search docs" aria-label="Search docs" />
|
|
<input type="hidden" name="check_keywords" value="yes" />
|
|
<input type="hidden" name="area" value="default" />
|
|
</form>
|
|
</div>
|
|
</div><div class="wy-menu wy-menu-vertical" data-spy="affix" role="navigation" aria-label="Navigation menu">
|
|
<p class="caption" role="heading"><span class="caption-text">Contents</span></p>
|
|
<ul>
|
|
<li class="toctree-l1"><a class="reference internal" href="../../installation.html">Installation</a></li>
|
|
<li class="toctree-l1"><a class="reference internal" href="../../usage.html">Using parsedmarc</a></li>
|
|
<li class="toctree-l1"><a class="reference internal" href="../../output.html">Sample outputs</a></li>
|
|
<li class="toctree-l1"><a class="reference internal" href="../../elasticsearch.html">Elasticsearch and Kibana</a></li>
|
|
<li class="toctree-l1"><a class="reference internal" href="../../opensearch.html">OpenSearch and Grafana</a></li>
|
|
<li class="toctree-l1"><a class="reference internal" href="../../kibana.html">Using the Kibana dashboards</a></li>
|
|
<li class="toctree-l1"><a class="reference internal" href="../../splunk.html">Splunk</a></li>
|
|
<li class="toctree-l1"><a class="reference internal" href="../../davmail.html">Accessing an inbox using OWA/EWS</a></li>
|
|
<li class="toctree-l1"><a class="reference internal" href="../../dmarc.html">Understanding DMARC</a></li>
|
|
<li class="toctree-l1"><a class="reference internal" href="../../contributing.html">Contributing to parsedmarc</a></li>
|
|
<li class="toctree-l1"><a class="reference internal" href="../../api.html">API reference</a></li>
|
|
</ul>
|
|
|
|
</div>
|
|
</div>
|
|
</nav>
|
|
|
|
<section data-toggle="wy-nav-shift" class="wy-nav-content-wrap"><nav class="wy-nav-top" aria-label="Mobile navigation menu" >
|
|
<i data-toggle="wy-nav-top" class="fa fa-bars"></i>
|
|
<a href="../../index.html">parsedmarc</a>
|
|
</nav>
|
|
|
|
<div class="wy-nav-content">
|
|
<div class="rst-content">
|
|
<div role="navigation" aria-label="Page navigation">
|
|
<ul class="wy-breadcrumbs">
|
|
<li><a href="../../index.html" class="icon icon-home" aria-label="Home"></a></li>
|
|
<li class="breadcrumb-item"><a href="../index.html">Module code</a></li>
|
|
<li class="breadcrumb-item"><a href="../parsedmarc.html">parsedmarc</a></li>
|
|
<li class="breadcrumb-item active">parsedmarc.utils</li>
|
|
<li class="wy-breadcrumbs-aside">
|
|
</li>
|
|
</ul>
|
|
<hr/>
|
|
</div>
|
|
<div role="main" class="document" itemscope="itemscope" itemtype="http://schema.org/Article">
|
|
<div itemprop="articleBody">
|
|
|
|
<h1>Source code for parsedmarc.utils</h1><div class="highlight"><pre>
|
|
<span></span><span class="c1"># -*- coding: utf-8 -*-</span>
|
|
|
|
<span class="sd">"""Utility functions that might be useful for other projects"""</span>
|
|
|
|
<span class="kn">from</span><span class="w"> </span><span class="nn">__future__</span><span class="w"> </span><span class="kn">import</span> <span class="n">annotations</span>
|
|
|
|
<span class="kn">import</span><span class="w"> </span><span class="nn">base64</span>
|
|
<span class="kn">import</span><span class="w"> </span><span class="nn">csv</span>
|
|
<span class="kn">import</span><span class="w"> </span><span class="nn">hashlib</span>
|
|
<span class="kn">import</span><span class="w"> </span><span class="nn">io</span>
|
|
<span class="kn">import</span><span class="w"> </span><span class="nn">json</span>
|
|
<span class="kn">import</span><span class="w"> </span><span class="nn">logging</span>
|
|
<span class="kn">import</span><span class="w"> </span><span class="nn">mailbox</span>
|
|
<span class="kn">import</span><span class="w"> </span><span class="nn">os</span>
|
|
<span class="kn">import</span><span class="w"> </span><span class="nn">re</span>
|
|
<span class="kn">import</span><span class="w"> </span><span class="nn">shutil</span>
|
|
<span class="kn">import</span><span class="w"> </span><span class="nn">subprocess</span>
|
|
<span class="kn">import</span><span class="w"> </span><span class="nn">tempfile</span>
|
|
<span class="kn">from</span><span class="w"> </span><span class="nn">datetime</span><span class="w"> </span><span class="kn">import</span> <span class="n">datetime</span><span class="p">,</span> <span class="n">timedelta</span><span class="p">,</span> <span class="n">timezone</span>
|
|
<span class="kn">from</span><span class="w"> </span><span class="nn">typing</span><span class="w"> </span><span class="kn">import</span> <span class="n">Optional</span><span class="p">,</span> <span class="n">TypedDict</span><span class="p">,</span> <span class="n">Union</span><span class="p">,</span> <span class="n">cast</span>
|
|
|
|
<span class="kn">import</span><span class="w"> </span><span class="nn">mailparser</span>
|
|
<span class="kn">from</span><span class="w"> </span><span class="nn">expiringdict</span><span class="w"> </span><span class="kn">import</span> <span class="n">ExpiringDict</span>
|
|
|
|
<span class="k">try</span><span class="p">:</span>
|
|
<span class="kn">from</span><span class="w"> </span><span class="nn">importlib.resources</span><span class="w"> </span><span class="kn">import</span> <span class="n">files</span>
|
|
<span class="k">except</span> <span class="ne">ImportError</span><span class="p">:</span>
|
|
<span class="c1"># Try backported to PY<3 `importlib_resources`</span>
|
|
<span class="kn">from</span><span class="w"> </span><span class="nn">importlib.resources</span><span class="w"> </span><span class="kn">import</span> <span class="n">files</span>
|
|
|
|
|
|
<span class="kn">import</span><span class="w"> </span><span class="nn">dns.exception</span>
|
|
<span class="kn">import</span><span class="w"> </span><span class="nn">dns.resolver</span>
|
|
<span class="kn">import</span><span class="w"> </span><span class="nn">dns.reversename</span>
|
|
<span class="kn">import</span><span class="w"> </span><span class="nn">maxminddb</span>
|
|
<span class="kn">import</span><span class="w"> </span><span class="nn">publicsuffixlist</span>
|
|
<span class="kn">import</span><span class="w"> </span><span class="nn">requests</span>
|
|
<span class="kn">from</span><span class="w"> </span><span class="nn">dateutil.parser</span><span class="w"> </span><span class="kn">import</span> <span class="n">parse</span> <span class="k">as</span> <span class="n">parse_date</span>
|
|
|
|
<span class="kn">import</span><span class="w"> </span><span class="nn">parsedmarc.resources.ipinfo</span>
|
|
<span class="kn">import</span><span class="w"> </span><span class="nn">parsedmarc.resources.maps</span>
|
|
<span class="kn">from</span><span class="w"> </span><span class="nn">parsedmarc.constants</span><span class="w"> </span><span class="kn">import</span> <span class="p">(</span>
|
|
<span class="n">DEFAULT_DNS_MAX_RETRIES</span><span class="p">,</span>
|
|
<span class="n">DEFAULT_DNS_TIMEOUT</span><span class="p">,</span>
|
|
<span class="n">USER_AGENT</span><span class="p">,</span>
|
|
<span class="p">)</span>
|
|
<span class="kn">from</span><span class="w"> </span><span class="nn">parsedmarc.log</span><span class="w"> </span><span class="kn">import</span> <span class="n">logger</span>
|
|
|
|
<span class="c1"># Errors considered transient and retryable by query_dns. LifetimeTimeout is</span>
|
|
<span class="c1"># dnspython's deadline expiry; NoNameservers typically wraps a SERVFAIL from</span>
|
|
<span class="c1"># upstream; OSError covers socket-level failures during TCP fallback.</span>
|
|
<span class="n">_RETRYABLE_DNS_ERRORS</span> <span class="o">=</span> <span class="p">(</span>
|
|
<span class="n">dns</span><span class="o">.</span><span class="n">resolver</span><span class="o">.</span><span class="n">LifetimeTimeout</span><span class="p">,</span>
|
|
<span class="n">dns</span><span class="o">.</span><span class="n">resolver</span><span class="o">.</span><span class="n">NoNameservers</span><span class="p">,</span>
|
|
<span class="ne">OSError</span><span class="p">,</span>
|
|
<span class="p">)</span>
|
|
|
|
<span class="n">parenthesis_regex</span> <span class="o">=</span> <span class="n">re</span><span class="o">.</span><span class="n">compile</span><span class="p">(</span><span class="sa">r</span><span class="s2">"\s*\(.*\)\s*"</span><span class="p">)</span>
|
|
|
|
<span class="n">null_file</span> <span class="o">=</span> <span class="n">subprocess</span><span class="o">.</span><span class="n">DEVNULL</span>
|
|
<span class="n">mailparser_logger</span> <span class="o">=</span> <span class="n">logging</span><span class="o">.</span><span class="n">getLogger</span><span class="p">(</span><span class="s2">"mailparser"</span><span class="p">)</span>
|
|
<span class="n">mailparser_logger</span><span class="o">.</span><span class="n">setLevel</span><span class="p">(</span><span class="n">logging</span><span class="o">.</span><span class="n">CRITICAL</span><span class="p">)</span>
|
|
<span class="n">psl</span> <span class="o">=</span> <span class="n">publicsuffixlist</span><span class="o">.</span><span class="n">PublicSuffixList</span><span class="p">()</span>
|
|
<span class="n">psl_overrides</span><span class="p">:</span> <span class="nb">list</span><span class="p">[</span><span class="nb">str</span><span class="p">]</span> <span class="o">=</span> <span class="p">[]</span>
|
|
|
|
|
|
<div class="viewcode-block" id="load_psl_overrides">
|
|
<a class="viewcode-back" href="../../api.html#parsedmarc.utils.load_psl_overrides">[docs]</a>
|
|
<span class="k">def</span><span class="w"> </span><span class="nf">load_psl_overrides</span><span class="p">(</span>
|
|
<span class="o">*</span><span class="p">,</span>
|
|
<span class="n">always_use_local_file</span><span class="p">:</span> <span class="nb">bool</span> <span class="o">=</span> <span class="kc">False</span><span class="p">,</span>
|
|
<span class="n">local_file_path</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="nb">str</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span>
|
|
<span class="n">url</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="nb">str</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span>
|
|
<span class="n">offline</span><span class="p">:</span> <span class="nb">bool</span> <span class="o">=</span> <span class="kc">False</span><span class="p">,</span>
|
|
<span class="p">)</span> <span class="o">-></span> <span class="nb">list</span><span class="p">[</span><span class="nb">str</span><span class="p">]:</span>
|
|
<span class="w"> </span><span class="sd">"""</span>
|
|
<span class="sd"> Loads the PSL overrides list from a URL or local file.</span>
|
|
|
|
<span class="sd"> Clears and repopulates the module-level ``psl_overrides`` list in place,</span>
|
|
<span class="sd"> then returns it. The URL is tried first; on failure (or when</span>
|
|
<span class="sd"> ``offline``/``always_use_local_file`` is set) the local path is used,</span>
|
|
<span class="sd"> defaulting to the bundled ``psl_overrides.txt``.</span>
|
|
|
|
<span class="sd"> Args:</span>
|
|
<span class="sd"> always_use_local_file (bool): Always use a local overrides file</span>
|
|
<span class="sd"> local_file_path (str): Path to a local overrides file</span>
|
|
<span class="sd"> url (str): URL to a PSL overrides file</span>
|
|
<span class="sd"> offline (bool): Use the built-in copy of the overrides</span>
|
|
|
|
<span class="sd"> Returns:</span>
|
|
<span class="sd"> list[str]: the module-level ``psl_overrides`` list</span>
|
|
<span class="sd"> """</span>
|
|
<span class="k">if</span> <span class="n">url</span> <span class="ow">is</span> <span class="kc">None</span><span class="p">:</span>
|
|
<span class="n">url</span> <span class="o">=</span> <span class="p">(</span>
|
|
<span class="s2">"https://raw.githubusercontent.com/domainaware"</span>
|
|
<span class="s2">"/parsedmarc/master/parsedmarc/"</span>
|
|
<span class="s2">"resources/maps/psl_overrides.txt"</span>
|
|
<span class="p">)</span>
|
|
|
|
<span class="n">psl_overrides</span><span class="o">.</span><span class="n">clear</span><span class="p">()</span>
|
|
|
|
<span class="k">def</span><span class="w"> </span><span class="nf">_load_text</span><span class="p">(</span><span class="n">text</span><span class="p">:</span> <span class="nb">str</span><span class="p">)</span> <span class="o">-></span> <span class="kc">None</span><span class="p">:</span>
|
|
<span class="k">for</span> <span class="n">line</span> <span class="ow">in</span> <span class="n">text</span><span class="o">.</span><span class="n">splitlines</span><span class="p">():</span>
|
|
<span class="n">s</span> <span class="o">=</span> <span class="n">line</span><span class="o">.</span><span class="n">strip</span><span class="p">()</span>
|
|
<span class="k">if</span> <span class="n">s</span><span class="p">:</span>
|
|
<span class="n">psl_overrides</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="n">s</span><span class="p">)</span>
|
|
|
|
<span class="k">if</span> <span class="ow">not</span> <span class="p">(</span><span class="n">offline</span> <span class="ow">or</span> <span class="n">always_use_local_file</span><span class="p">):</span>
|
|
<span class="k">try</span><span class="p">:</span>
|
|
<span class="n">logger</span><span class="o">.</span><span class="n">debug</span><span class="p">(</span><span class="sa">f</span><span class="s2">"Trying to fetch PSL overrides from </span><span class="si">{</span><span class="n">url</span><span class="si">}</span><span class="s2">..."</span><span class="p">)</span>
|
|
<span class="n">headers</span> <span class="o">=</span> <span class="p">{</span><span class="s2">"User-Agent"</span><span class="p">:</span> <span class="n">USER_AGENT</span><span class="p">}</span>
|
|
<span class="n">response</span> <span class="o">=</span> <span class="n">requests</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="n">url</span><span class="p">,</span> <span class="n">headers</span><span class="o">=</span><span class="n">headers</span><span class="p">)</span>
|
|
<span class="n">response</span><span class="o">.</span><span class="n">raise_for_status</span><span class="p">()</span>
|
|
<span class="n">_load_text</span><span class="p">(</span><span class="n">response</span><span class="o">.</span><span class="n">text</span><span class="p">)</span>
|
|
<span class="k">except</span> <span class="n">requests</span><span class="o">.</span><span class="n">exceptions</span><span class="o">.</span><span class="n">RequestException</span> <span class="k">as</span> <span class="n">e</span><span class="p">:</span>
|
|
<span class="n">logger</span><span class="o">.</span><span class="n">warning</span><span class="p">(</span><span class="sa">f</span><span class="s2">"Failed to fetch PSL overrides: </span><span class="si">{</span><span class="n">e</span><span class="si">}</span><span class="s2">"</span><span class="p">)</span>
|
|
|
|
<span class="k">if</span> <span class="nb">len</span><span class="p">(</span><span class="n">psl_overrides</span><span class="p">)</span> <span class="o">==</span> <span class="mi">0</span><span class="p">:</span>
|
|
<span class="n">path</span> <span class="o">=</span> <span class="n">local_file_path</span> <span class="ow">or</span> <span class="nb">str</span><span class="p">(</span>
|
|
<span class="n">files</span><span class="p">(</span><span class="n">parsedmarc</span><span class="o">.</span><span class="n">resources</span><span class="o">.</span><span class="n">maps</span><span class="p">)</span><span class="o">.</span><span class="n">joinpath</span><span class="p">(</span><span class="s2">"psl_overrides.txt"</span><span class="p">)</span>
|
|
<span class="p">)</span>
|
|
<span class="n">logger</span><span class="o">.</span><span class="n">info</span><span class="p">(</span><span class="sa">f</span><span class="s2">"Loading PSL overrides from </span><span class="si">{</span><span class="n">path</span><span class="si">}</span><span class="s2">"</span><span class="p">)</span>
|
|
<span class="k">with</span> <span class="nb">open</span><span class="p">(</span><span class="n">path</span><span class="p">,</span> <span class="n">encoding</span><span class="o">=</span><span class="s2">"utf-8"</span><span class="p">)</span> <span class="k">as</span> <span class="n">f</span><span class="p">:</span>
|
|
<span class="n">_load_text</span><span class="p">(</span><span class="n">f</span><span class="o">.</span><span class="n">read</span><span class="p">())</span>
|
|
|
|
<span class="k">return</span> <span class="n">psl_overrides</span></div>
|
|
|
|
|
|
|
|
<span class="c1"># Bootstrap with the bundled file at import time — no network call.</span>
|
|
<span class="n">load_psl_overrides</span><span class="p">(</span><span class="n">offline</span><span class="o">=</span><span class="kc">True</span><span class="p">)</span>
|
|
|
|
|
|
<div class="viewcode-block" id="EmailParserError">
|
|
<a class="viewcode-back" href="../../api.html#parsedmarc.utils.EmailParserError">[docs]</a>
|
|
<span class="k">class</span><span class="w"> </span><span class="nc">EmailParserError</span><span class="p">(</span><span class="ne">RuntimeError</span><span class="p">):</span>
|
|
<span class="w"> </span><span class="sd">"""Raised when an error parsing the email occurs"""</span></div>
|
|
|
|
|
|
|
|
<div class="viewcode-block" id="DownloadError">
|
|
<a class="viewcode-back" href="../../api.html#parsedmarc.utils.DownloadError">[docs]</a>
|
|
<span class="k">class</span><span class="w"> </span><span class="nc">DownloadError</span><span class="p">(</span><span class="ne">RuntimeError</span><span class="p">):</span>
|
|
<span class="w"> </span><span class="sd">"""Raised when an error occurs when downloading a file"""</span></div>
|
|
|
|
|
|
|
|
<div class="viewcode-block" id="ReverseDNSService">
|
|
<a class="viewcode-back" href="../../api.html#parsedmarc.utils.ReverseDNSService">[docs]</a>
|
|
<span class="k">class</span><span class="w"> </span><span class="nc">ReverseDNSService</span><span class="p">(</span><span class="n">TypedDict</span><span class="p">):</span>
|
|
<span class="n">name</span><span class="p">:</span> <span class="nb">str</span>
|
|
<span class="nb">type</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="nb">str</span><span class="p">]</span></div>
|
|
|
|
|
|
|
|
<span class="n">ReverseDNSMap</span> <span class="o">=</span> <span class="nb">dict</span><span class="p">[</span><span class="nb">str</span><span class="p">,</span> <span class="n">ReverseDNSService</span><span class="p">]</span>
|
|
|
|
|
|
<div class="viewcode-block" id="IPAddressInfo">
|
|
<a class="viewcode-back" href="../../api.html#parsedmarc.utils.IPAddressInfo">[docs]</a>
|
|
<span class="k">class</span><span class="w"> </span><span class="nc">IPAddressInfo</span><span class="p">(</span><span class="n">TypedDict</span><span class="p">):</span>
|
|
<span class="n">ip_address</span><span class="p">:</span> <span class="nb">str</span>
|
|
<span class="n">reverse_dns</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="nb">str</span><span class="p">]</span>
|
|
<span class="n">country</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="nb">str</span><span class="p">]</span>
|
|
<span class="n">base_domain</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="nb">str</span><span class="p">]</span>
|
|
<span class="n">name</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="nb">str</span><span class="p">]</span>
|
|
<span class="nb">type</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="nb">str</span><span class="p">]</span>
|
|
<span class="n">asn</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="nb">int</span><span class="p">]</span>
|
|
<span class="n">as_name</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="nb">str</span><span class="p">]</span>
|
|
<span class="n">as_domain</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="nb">str</span><span class="p">]</span></div>
|
|
|
|
|
|
|
|
<div class="viewcode-block" id="decode_base64">
|
|
<a class="viewcode-back" href="../../api.html#parsedmarc.utils.decode_base64">[docs]</a>
|
|
<span class="k">def</span><span class="w"> </span><span class="nf">decode_base64</span><span class="p">(</span><span class="n">data</span><span class="p">:</span> <span class="nb">str</span><span class="p">)</span> <span class="o">-></span> <span class="nb">bytes</span><span class="p">:</span>
|
|
<span class="w"> </span><span class="sd">"""</span>
|
|
<span class="sd"> Decodes a base64 string, with padding being optional</span>
|
|
|
|
<span class="sd"> Args:</span>
|
|
<span class="sd"> data (str): A base64 encoded string</span>
|
|
|
|
<span class="sd"> Returns:</span>
|
|
<span class="sd"> bytes: The decoded bytes</span>
|
|
|
|
<span class="sd"> """</span>
|
|
<span class="n">data_bytes</span> <span class="o">=</span> <span class="nb">bytes</span><span class="p">(</span><span class="n">data</span><span class="p">,</span> <span class="n">encoding</span><span class="o">=</span><span class="s2">"ascii"</span><span class="p">)</span>
|
|
<span class="n">missing_padding</span> <span class="o">=</span> <span class="nb">len</span><span class="p">(</span><span class="n">data_bytes</span><span class="p">)</span> <span class="o">%</span> <span class="mi">4</span>
|
|
<span class="k">if</span> <span class="n">missing_padding</span> <span class="o">!=</span> <span class="mi">0</span><span class="p">:</span>
|
|
<span class="n">data_bytes</span> <span class="o">+=</span> <span class="sa">b</span><span class="s2">"="</span> <span class="o">*</span> <span class="p">(</span><span class="mi">4</span> <span class="o">-</span> <span class="n">missing_padding</span><span class="p">)</span>
|
|
<span class="k">return</span> <span class="n">base64</span><span class="o">.</span><span class="n">b64decode</span><span class="p">(</span><span class="n">data_bytes</span><span class="p">)</span></div>
|
|
|
|
|
|
|
|
<div class="viewcode-block" id="get_base_domain">
|
|
<a class="viewcode-back" href="../../api.html#parsedmarc.utils.get_base_domain">[docs]</a>
|
|
<span class="k">def</span><span class="w"> </span><span class="nf">get_base_domain</span><span class="p">(</span><span class="n">domain</span><span class="p">:</span> <span class="nb">str</span><span class="p">)</span> <span class="o">-></span> <span class="n">Optional</span><span class="p">[</span><span class="nb">str</span><span class="p">]:</span>
|
|
<span class="w"> </span><span class="sd">"""</span>
|
|
<span class="sd"> Gets the base domain name for the given domain</span>
|
|
|
|
<span class="sd"> .. note::</span>
|
|
<span class="sd"> Results are based on a list of public domain suffixes at</span>
|
|
<span class="sd"> https://publicsuffix.org/list/public_suffix_list.dat and overrides included in</span>
|
|
<span class="sd"> parsedmarc.resources.maps.psl_overrides.txt</span>
|
|
|
|
<span class="sd"> Args:</span>
|
|
<span class="sd"> domain (str): A domain or subdomain</span>
|
|
|
|
<span class="sd"> Returns:</span>
|
|
<span class="sd"> str: The base domain of the given domain</span>
|
|
|
|
<span class="sd"> """</span>
|
|
<span class="n">domain</span> <span class="o">=</span> <span class="n">domain</span><span class="o">.</span><span class="n">lower</span><span class="p">()</span>
|
|
<span class="n">publicsuffix</span> <span class="o">=</span> <span class="n">psl</span><span class="o">.</span><span class="n">privatesuffix</span><span class="p">(</span><span class="n">domain</span><span class="p">)</span>
|
|
<span class="k">for</span> <span class="n">override</span> <span class="ow">in</span> <span class="n">psl_overrides</span><span class="p">:</span>
|
|
<span class="k">if</span> <span class="n">domain</span><span class="o">.</span><span class="n">endswith</span><span class="p">(</span><span class="n">override</span><span class="p">):</span>
|
|
<span class="k">return</span> <span class="n">override</span><span class="o">.</span><span class="n">strip</span><span class="p">(</span><span class="s2">"."</span><span class="p">)</span><span class="o">.</span><span class="n">strip</span><span class="p">(</span><span class="s2">"-"</span><span class="p">)</span>
|
|
<span class="k">return</span> <span class="n">publicsuffix</span></div>
|
|
|
|
|
|
|
|
<div class="viewcode-block" id="query_dns">
|
|
<a class="viewcode-back" href="../../api.html#parsedmarc.utils.query_dns">[docs]</a>
|
|
<span class="k">def</span><span class="w"> </span><span class="nf">query_dns</span><span class="p">(</span>
|
|
<span class="n">domain</span><span class="p">:</span> <span class="nb">str</span><span class="p">,</span>
|
|
<span class="n">record_type</span><span class="p">:</span> <span class="nb">str</span><span class="p">,</span>
|
|
<span class="o">*</span><span class="p">,</span>
|
|
<span class="n">cache</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="n">ExpiringDict</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span>
|
|
<span class="n">nameservers</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="nb">list</span><span class="p">[</span><span class="nb">str</span><span class="p">]]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span>
|
|
<span class="n">timeout</span><span class="p">:</span> <span class="nb">float</span> <span class="o">=</span> <span class="n">DEFAULT_DNS_TIMEOUT</span><span class="p">,</span>
|
|
<span class="n">retries</span><span class="p">:</span> <span class="nb">int</span> <span class="o">=</span> <span class="n">DEFAULT_DNS_MAX_RETRIES</span><span class="p">,</span>
|
|
<span class="n">_attempt</span><span class="p">:</span> <span class="nb">int</span> <span class="o">=</span> <span class="mi">0</span><span class="p">,</span>
|
|
<span class="p">)</span> <span class="o">-></span> <span class="nb">list</span><span class="p">[</span><span class="nb">str</span><span class="p">]:</span>
|
|
<span class="w"> </span><span class="sd">"""</span>
|
|
<span class="sd"> Queries DNS</span>
|
|
|
|
<span class="sd"> Args:</span>
|
|
<span class="sd"> domain (str): The domain or subdomain to query about</span>
|
|
<span class="sd"> record_type (str): The record type to query for</span>
|
|
<span class="sd"> cache (ExpiringDict): Cache storage</span>
|
|
<span class="sd"> nameservers (list): A list of one or more nameservers to use</span>
|
|
<span class="sd"> (Cloudflare's public DNS resolvers by default). Pass</span>
|
|
<span class="sd"> ``parsedmarc.constants.RECOMMENDED_DNS_NAMESERVERS`` for a</span>
|
|
<span class="sd"> cross-provider mix that fails over when one provider's path is</span>
|
|
<span class="sd"> slow or broken.</span>
|
|
<span class="sd"> timeout (float): Overall DNS lifetime budget in seconds per</span>
|
|
<span class="sd"> configured nameserver. Per-query UDP attempts are capped at</span>
|
|
<span class="sd"> ``min(1.0, timeout)`` so dnspython retries within the lifetime on</span>
|
|
<span class="sd"> transient UDP packet loss (mirroring ``dig``'s default</span>
|
|
<span class="sd"> ``+tries=3`` behavior); with multiple nameservers configured this</span>
|
|
<span class="sd"> same cap also makes a slow or broken nameserver fall through to</span>
|
|
<span class="sd"> the next quickly.</span>
|
|
<span class="sd"> retries (int): Number of times to retry the whole query after a</span>
|
|
<span class="sd"> timeout or other transient error (``LifetimeTimeout``,</span>
|
|
<span class="sd"> ``NoNameservers``, ``OSError``). Failover between configured</span>
|
|
<span class="sd"> nameservers happens within each attempt.</span>
|
|
|
|
<span class="sd"> Returns:</span>
|
|
<span class="sd"> list: A list of answers</span>
|
|
<span class="sd"> """</span>
|
|
<span class="n">domain</span> <span class="o">=</span> <span class="nb">str</span><span class="p">(</span><span class="n">domain</span><span class="p">)</span><span class="o">.</span><span class="n">lower</span><span class="p">()</span>
|
|
<span class="n">record_type</span> <span class="o">=</span> <span class="n">record_type</span><span class="o">.</span><span class="n">upper</span><span class="p">()</span>
|
|
<span class="n">cache_key</span> <span class="o">=</span> <span class="s2">"</span><span class="si">{0}</span><span class="s2">_</span><span class="si">{1}</span><span class="s2">"</span><span class="o">.</span><span class="n">format</span><span class="p">(</span><span class="n">domain</span><span class="p">,</span> <span class="n">record_type</span><span class="p">)</span>
|
|
<span class="k">if</span> <span class="n">cache</span><span class="p">:</span>
|
|
<span class="n">cached_records</span> <span class="o">=</span> <span class="n">cache</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="n">cache_key</span><span class="p">,</span> <span class="kc">None</span><span class="p">)</span>
|
|
<span class="k">if</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">cached_records</span><span class="p">,</span> <span class="nb">list</span><span class="p">):</span>
|
|
<span class="k">return</span> <span class="n">cast</span><span class="p">(</span><span class="nb">list</span><span class="p">[</span><span class="nb">str</span><span class="p">],</span> <span class="n">cached_records</span><span class="p">)</span>
|
|
|
|
<span class="n">resolver</span> <span class="o">=</span> <span class="n">dns</span><span class="o">.</span><span class="n">resolver</span><span class="o">.</span><span class="n">Resolver</span><span class="p">()</span>
|
|
<span class="n">timeout</span> <span class="o">=</span> <span class="nb">float</span><span class="p">(</span><span class="n">timeout</span><span class="p">)</span>
|
|
<span class="k">if</span> <span class="n">nameservers</span> <span class="ow">is</span> <span class="kc">None</span><span class="p">:</span>
|
|
<span class="n">nameservers</span> <span class="o">=</span> <span class="p">[</span>
|
|
<span class="s2">"1.1.1.1"</span><span class="p">,</span>
|
|
<span class="s2">"1.0.0.1"</span><span class="p">,</span>
|
|
<span class="s2">"2606:4700:4700::1111"</span><span class="p">,</span>
|
|
<span class="s2">"2606:4700:4700::1001"</span><span class="p">,</span>
|
|
<span class="p">]</span>
|
|
<span class="n">resolver</span><span class="o">.</span><span class="n">nameservers</span> <span class="o">=</span> <span class="n">nameservers</span>
|
|
<span class="c1"># Cap per-query UDP timeout at 1s so dnspython retries within the</span>
|
|
<span class="c1"># lifetime window on transient packet loss — otherwise with a single</span>
|
|
<span class="c1"># nameserver and timeout == lifetime, one dropped UDP datagram consumes</span>
|
|
<span class="c1"># the whole budget and raises LifetimeTimeout without a retry (dig's</span>
|
|
<span class="c1"># default +tries=3 masks this case). With multiple nameservers the same</span>
|
|
<span class="c1"># cap lets a slow/broken one fall through.</span>
|
|
<span class="n">resolver</span><span class="o">.</span><span class="n">timeout</span> <span class="o">=</span> <span class="nb">min</span><span class="p">(</span><span class="mf">1.0</span><span class="p">,</span> <span class="n">timeout</span><span class="p">)</span>
|
|
<span class="k">if</span> <span class="nb">len</span><span class="p">(</span><span class="n">resolver</span><span class="o">.</span><span class="n">nameservers</span><span class="p">)</span> <span class="o">></span> <span class="mi">1</span><span class="p">:</span>
|
|
<span class="n">resolver</span><span class="o">.</span><span class="n">lifetime</span> <span class="o">=</span> <span class="n">timeout</span> <span class="o">*</span> <span class="nb">len</span><span class="p">(</span><span class="n">resolver</span><span class="o">.</span><span class="n">nameservers</span><span class="p">)</span>
|
|
<span class="k">else</span><span class="p">:</span>
|
|
<span class="n">resolver</span><span class="o">.</span><span class="n">lifetime</span> <span class="o">=</span> <span class="n">timeout</span>
|
|
<span class="k">try</span><span class="p">:</span>
|
|
<span class="n">answers</span> <span class="o">=</span> <span class="n">resolver</span><span class="o">.</span><span class="n">resolve</span><span class="p">(</span><span class="n">domain</span><span class="p">,</span> <span class="n">record_type</span><span class="p">,</span> <span class="n">lifetime</span><span class="o">=</span><span class="n">resolver</span><span class="o">.</span><span class="n">lifetime</span><span class="p">)</span>
|
|
<span class="k">except</span> <span class="n">_RETRYABLE_DNS_ERRORS</span> <span class="k">as</span> <span class="n">e</span><span class="p">:</span>
|
|
<span class="n">_attempt</span> <span class="o">+=</span> <span class="mi">1</span>
|
|
<span class="k">if</span> <span class="n">_attempt</span> <span class="o">></span> <span class="n">retries</span><span class="p">:</span>
|
|
<span class="k">raise</span> <span class="n">e</span>
|
|
<span class="k">return</span> <span class="n">query_dns</span><span class="p">(</span>
|
|
<span class="n">domain</span><span class="p">,</span>
|
|
<span class="n">record_type</span><span class="p">,</span>
|
|
<span class="n">cache</span><span class="o">=</span><span class="n">cache</span><span class="p">,</span>
|
|
<span class="n">nameservers</span><span class="o">=</span><span class="n">nameservers</span><span class="p">,</span>
|
|
<span class="n">timeout</span><span class="o">=</span><span class="n">timeout</span><span class="p">,</span>
|
|
<span class="n">retries</span><span class="o">=</span><span class="n">retries</span><span class="p">,</span>
|
|
<span class="n">_attempt</span><span class="o">=</span><span class="n">_attempt</span><span class="p">,</span>
|
|
<span class="p">)</span>
|
|
<span class="n">records</span> <span class="o">=</span> <span class="nb">list</span><span class="p">(</span>
|
|
<span class="nb">map</span><span class="p">(</span>
|
|
<span class="k">lambda</span> <span class="n">r</span><span class="p">:</span> <span class="n">r</span><span class="o">.</span><span class="n">to_text</span><span class="p">()</span><span class="o">.</span><span class="n">replace</span><span class="p">(</span><span class="s1">'"'</span><span class="p">,</span> <span class="s2">""</span><span class="p">)</span><span class="o">.</span><span class="n">rstrip</span><span class="p">(</span><span class="s2">"."</span><span class="p">),</span>
|
|
<span class="n">answers</span><span class="p">,</span>
|
|
<span class="p">)</span>
|
|
<span class="p">)</span>
|
|
<span class="k">if</span> <span class="n">cache</span><span class="p">:</span>
|
|
<span class="n">cache</span><span class="p">[</span><span class="n">cache_key</span><span class="p">]</span> <span class="o">=</span> <span class="n">records</span>
|
|
|
|
<span class="k">return</span> <span class="n">records</span></div>
|
|
|
|
|
|
|
|
<div class="viewcode-block" id="get_reverse_dns">
|
|
<a class="viewcode-back" href="../../api.html#parsedmarc.utils.get_reverse_dns">[docs]</a>
|
|
<span class="k">def</span><span class="w"> </span><span class="nf">get_reverse_dns</span><span class="p">(</span>
|
|
<span class="n">ip_address</span><span class="p">,</span>
|
|
<span class="o">*</span><span class="p">,</span>
|
|
<span class="n">cache</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="n">ExpiringDict</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span>
|
|
<span class="n">nameservers</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="nb">list</span><span class="p">[</span><span class="nb">str</span><span class="p">]]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span>
|
|
<span class="n">timeout</span><span class="p">:</span> <span class="nb">float</span> <span class="o">=</span> <span class="n">DEFAULT_DNS_TIMEOUT</span><span class="p">,</span>
|
|
<span class="n">retries</span><span class="p">:</span> <span class="nb">int</span> <span class="o">=</span> <span class="n">DEFAULT_DNS_MAX_RETRIES</span><span class="p">,</span>
|
|
<span class="p">)</span> <span class="o">-></span> <span class="n">Optional</span><span class="p">[</span><span class="nb">str</span><span class="p">]:</span>
|
|
<span class="w"> </span><span class="sd">"""</span>
|
|
<span class="sd"> Resolves an IP address to a hostname using a reverse DNS query</span>
|
|
|
|
<span class="sd"> Args:</span>
|
|
<span class="sd"> ip_address (str): The IP address to resolve</span>
|
|
<span class="sd"> cache (ExpiringDict): Cache storage</span>
|
|
<span class="sd"> nameservers (list): A list of one or more nameservers to use</span>
|
|
<span class="sd"> (Cloudflare's public DNS resolvers by default)</span>
|
|
<span class="sd"> timeout (float): Sets the DNS query timeout in seconds</span>
|
|
<span class="sd"> retries (int): Number of times to retry on timeout or other transient</span>
|
|
<span class="sd"> errors</span>
|
|
|
|
<span class="sd"> Returns:</span>
|
|
<span class="sd"> str: The reverse DNS hostname (if any)</span>
|
|
<span class="sd"> """</span>
|
|
<span class="n">hostname</span> <span class="o">=</span> <span class="kc">None</span>
|
|
<span class="k">try</span><span class="p">:</span>
|
|
<span class="n">address</span> <span class="o">=</span> <span class="n">dns</span><span class="o">.</span><span class="n">reversename</span><span class="o">.</span><span class="n">from_address</span><span class="p">(</span><span class="n">ip_address</span><span class="p">)</span>
|
|
<span class="n">hostname</span> <span class="o">=</span> <span class="n">query_dns</span><span class="p">(</span>
|
|
<span class="nb">str</span><span class="p">(</span><span class="n">address</span><span class="p">),</span>
|
|
<span class="s2">"PTR"</span><span class="p">,</span>
|
|
<span class="n">cache</span><span class="o">=</span><span class="n">cache</span><span class="p">,</span>
|
|
<span class="n">nameservers</span><span class="o">=</span><span class="n">nameservers</span><span class="p">,</span>
|
|
<span class="n">timeout</span><span class="o">=</span><span class="n">timeout</span><span class="p">,</span>
|
|
<span class="n">retries</span><span class="o">=</span><span class="n">retries</span><span class="p">,</span>
|
|
<span class="p">)[</span><span class="mi">0</span><span class="p">]</span>
|
|
|
|
<span class="k">except</span> <span class="n">dns</span><span class="o">.</span><span class="n">exception</span><span class="o">.</span><span class="n">DNSException</span> <span class="k">as</span> <span class="n">e</span><span class="p">:</span>
|
|
<span class="n">logger</span><span class="o">.</span><span class="n">debug</span><span class="p">(</span><span class="sa">f</span><span class="s2">"get_reverse_dns(</span><span class="si">{</span><span class="n">ip_address</span><span class="si">}</span><span class="s2">) exception: </span><span class="si">{</span><span class="n">e</span><span class="si">}</span><span class="s2">"</span><span class="p">)</span>
|
|
|
|
<span class="k">return</span> <span class="n">hostname</span></div>
|
|
|
|
|
|
|
|
<div class="viewcode-block" id="timestamp_to_datetime">
|
|
<a class="viewcode-back" href="../../api.html#parsedmarc.utils.timestamp_to_datetime">[docs]</a>
|
|
<span class="k">def</span><span class="w"> </span><span class="nf">timestamp_to_datetime</span><span class="p">(</span><span class="n">timestamp</span><span class="p">:</span> <span class="nb">int</span><span class="p">)</span> <span class="o">-></span> <span class="n">datetime</span><span class="p">:</span>
|
|
<span class="w"> </span><span class="sd">"""</span>
|
|
<span class="sd"> Converts a UNIX/DMARC timestamp to a Python ``datetime`` object</span>
|
|
|
|
<span class="sd"> Args:</span>
|
|
<span class="sd"> timestamp (int): The timestamp</span>
|
|
|
|
<span class="sd"> Returns:</span>
|
|
<span class="sd"> datetime: The converted timestamp as a Python ``datetime`` object</span>
|
|
<span class="sd"> """</span>
|
|
<span class="k">return</span> <span class="n">datetime</span><span class="o">.</span><span class="n">fromtimestamp</span><span class="p">(</span><span class="nb">int</span><span class="p">(</span><span class="n">timestamp</span><span class="p">))</span></div>
|
|
|
|
|
|
|
|
<div class="viewcode-block" id="timestamp_to_human">
|
|
<a class="viewcode-back" href="../../api.html#parsedmarc.utils.timestamp_to_human">[docs]</a>
|
|
<span class="k">def</span><span class="w"> </span><span class="nf">timestamp_to_human</span><span class="p">(</span><span class="n">timestamp</span><span class="p">:</span> <span class="nb">int</span><span class="p">)</span> <span class="o">-></span> <span class="nb">str</span><span class="p">:</span>
|
|
<span class="w"> </span><span class="sd">"""</span>
|
|
<span class="sd"> Converts a UNIX/DMARC timestamp to a human-readable string</span>
|
|
|
|
<span class="sd"> Args:</span>
|
|
<span class="sd"> timestamp: The timestamp</span>
|
|
|
|
<span class="sd"> Returns:</span>
|
|
<span class="sd"> str: The converted timestamp in ``YYYY-MM-DD HH:MM:SS`` format</span>
|
|
<span class="sd"> """</span>
|
|
<span class="k">return</span> <span class="n">timestamp_to_datetime</span><span class="p">(</span><span class="n">timestamp</span><span class="p">)</span><span class="o">.</span><span class="n">strftime</span><span class="p">(</span><span class="s2">"%Y-%m-</span><span class="si">%d</span><span class="s2"> %H:%M:%S"</span><span class="p">)</span></div>
|
|
|
|
|
|
|
|
<div class="viewcode-block" id="human_timestamp_to_datetime">
|
|
<a class="viewcode-back" href="../../api.html#parsedmarc.utils.human_timestamp_to_datetime">[docs]</a>
|
|
<span class="k">def</span><span class="w"> </span><span class="nf">human_timestamp_to_datetime</span><span class="p">(</span>
|
|
<span class="n">human_timestamp</span><span class="p">:</span> <span class="nb">str</span><span class="p">,</span> <span class="o">*</span><span class="p">,</span> <span class="n">to_utc</span><span class="p">:</span> <span class="nb">bool</span> <span class="o">=</span> <span class="kc">False</span>
|
|
<span class="p">)</span> <span class="o">-></span> <span class="n">datetime</span><span class="p">:</span>
|
|
<span class="w"> </span><span class="sd">"""</span>
|
|
<span class="sd"> Converts a human-readable timestamp into a Python ``datetime`` object</span>
|
|
|
|
<span class="sd"> Args:</span>
|
|
<span class="sd"> human_timestamp (str): A timestamp string</span>
|
|
<span class="sd"> to_utc (bool): Convert the timestamp to UTC</span>
|
|
|
|
<span class="sd"> Returns:</span>
|
|
<span class="sd"> datetime: The converted timestamp</span>
|
|
<span class="sd"> """</span>
|
|
|
|
<span class="n">human_timestamp</span> <span class="o">=</span> <span class="n">human_timestamp</span><span class="o">.</span><span class="n">replace</span><span class="p">(</span><span class="s2">"-0000"</span><span class="p">,</span> <span class="s2">""</span><span class="p">)</span>
|
|
<span class="n">human_timestamp</span> <span class="o">=</span> <span class="n">parenthesis_regex</span><span class="o">.</span><span class="n">sub</span><span class="p">(</span><span class="s2">""</span><span class="p">,</span> <span class="n">human_timestamp</span><span class="p">)</span>
|
|
|
|
<span class="n">dt</span> <span class="o">=</span> <span class="n">parse_date</span><span class="p">(</span><span class="n">human_timestamp</span><span class="p">)</span>
|
|
<span class="k">return</span> <span class="n">dt</span><span class="o">.</span><span class="n">astimezone</span><span class="p">(</span><span class="n">timezone</span><span class="o">.</span><span class="n">utc</span><span class="p">)</span> <span class="k">if</span> <span class="n">to_utc</span> <span class="k">else</span> <span class="n">dt</span></div>
|
|
|
|
|
|
|
|
<div class="viewcode-block" id="human_timestamp_to_unix_timestamp">
|
|
<a class="viewcode-back" href="../../api.html#parsedmarc.utils.human_timestamp_to_unix_timestamp">[docs]</a>
|
|
<span class="k">def</span><span class="w"> </span><span class="nf">human_timestamp_to_unix_timestamp</span><span class="p">(</span><span class="n">human_timestamp</span><span class="p">:</span> <span class="nb">str</span><span class="p">)</span> <span class="o">-></span> <span class="nb">int</span><span class="p">:</span>
|
|
<span class="w"> </span><span class="sd">"""</span>
|
|
<span class="sd"> Converts a human-readable timestamp into a UNIX timestamp</span>
|
|
|
|
<span class="sd"> Args:</span>
|
|
<span class="sd"> human_timestamp (str): A timestamp in `YYYY-MM-DD HH:MM:SS`` format</span>
|
|
|
|
<span class="sd"> Returns:</span>
|
|
<span class="sd"> float: The converted timestamp</span>
|
|
<span class="sd"> """</span>
|
|
<span class="n">human_timestamp</span> <span class="o">=</span> <span class="n">human_timestamp</span><span class="o">.</span><span class="n">replace</span><span class="p">(</span><span class="s2">"T"</span><span class="p">,</span> <span class="s2">" "</span><span class="p">)</span>
|
|
<span class="k">return</span> <span class="nb">int</span><span class="p">(</span><span class="n">human_timestamp_to_datetime</span><span class="p">(</span><span class="n">human_timestamp</span><span class="p">)</span><span class="o">.</span><span class="n">timestamp</span><span class="p">())</span></div>
|
|
|
|
|
|
|
|
<span class="n">_IP_DB_PATH</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="nb">str</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span>
|
|
|
|
|
|
<div class="viewcode-block" id="load_ip_db">
|
|
<a class="viewcode-back" href="../../api.html#parsedmarc.utils.load_ip_db">[docs]</a>
|
|
<span class="k">def</span><span class="w"> </span><span class="nf">load_ip_db</span><span class="p">(</span>
|
|
<span class="o">*</span><span class="p">,</span>
|
|
<span class="n">always_use_local_file</span><span class="p">:</span> <span class="nb">bool</span> <span class="o">=</span> <span class="kc">False</span><span class="p">,</span>
|
|
<span class="n">local_file_path</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="nb">str</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span>
|
|
<span class="n">url</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="nb">str</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span>
|
|
<span class="n">offline</span><span class="p">:</span> <span class="nb">bool</span> <span class="o">=</span> <span class="kc">False</span><span class="p">,</span>
|
|
<span class="p">)</span> <span class="o">-></span> <span class="kc">None</span><span class="p">:</span>
|
|
<span class="w"> </span><span class="sd">"""</span>
|
|
<span class="sd"> Downloads the IP-to-country MMDB database from a URL and caches it</span>
|
|
<span class="sd"> locally. Falls back to the bundled copy on failure or when offline.</span>
|
|
|
|
<span class="sd"> Args:</span>
|
|
<span class="sd"> always_use_local_file: Always use a local/bundled database file</span>
|
|
<span class="sd"> local_file_path: Path to a local MMDB file</span>
|
|
<span class="sd"> url: URL to the MMDB database file</span>
|
|
<span class="sd"> offline: Do not make online requests</span>
|
|
<span class="sd"> """</span>
|
|
<span class="k">global</span> <span class="n">_IP_DB_PATH</span>
|
|
|
|
<span class="k">if</span> <span class="n">url</span> <span class="ow">is</span> <span class="kc">None</span><span class="p">:</span>
|
|
<span class="n">url</span> <span class="o">=</span> <span class="p">(</span>
|
|
<span class="s2">"https://github.com/domainaware/parsedmarc/raw/"</span>
|
|
<span class="s2">"refs/heads/master/parsedmarc/resources/ipinfo/"</span>
|
|
<span class="s2">"ipinfo_lite.mmdb"</span>
|
|
<span class="p">)</span>
|
|
|
|
<span class="k">if</span> <span class="n">local_file_path</span> <span class="ow">is</span> <span class="ow">not</span> <span class="kc">None</span> <span class="ow">and</span> <span class="n">os</span><span class="o">.</span><span class="n">path</span><span class="o">.</span><span class="n">isfile</span><span class="p">(</span><span class="n">local_file_path</span><span class="p">):</span>
|
|
<span class="n">_IP_DB_PATH</span> <span class="o">=</span> <span class="n">local_file_path</span>
|
|
<span class="n">logger</span><span class="o">.</span><span class="n">info</span><span class="p">(</span><span class="sa">f</span><span class="s2">"Using local IP database at </span><span class="si">{</span><span class="n">local_file_path</span><span class="si">}</span><span class="s2">"</span><span class="p">)</span>
|
|
<span class="k">return</span>
|
|
|
|
<span class="n">cache_dir</span> <span class="o">=</span> <span class="n">os</span><span class="o">.</span><span class="n">path</span><span class="o">.</span><span class="n">join</span><span class="p">(</span><span class="n">tempfile</span><span class="o">.</span><span class="n">gettempdir</span><span class="p">(),</span> <span class="s2">"parsedmarc"</span><span class="p">)</span>
|
|
<span class="n">cached_path</span> <span class="o">=</span> <span class="n">os</span><span class="o">.</span><span class="n">path</span><span class="o">.</span><span class="n">join</span><span class="p">(</span><span class="n">cache_dir</span><span class="p">,</span> <span class="s2">"ipinfo_lite.mmdb"</span><span class="p">)</span>
|
|
|
|
<span class="k">if</span> <span class="ow">not</span> <span class="p">(</span><span class="n">offline</span> <span class="ow">or</span> <span class="n">always_use_local_file</span><span class="p">):</span>
|
|
<span class="k">try</span><span class="p">:</span>
|
|
<span class="n">logger</span><span class="o">.</span><span class="n">debug</span><span class="p">(</span><span class="sa">f</span><span class="s2">"Trying to fetch IP database from </span><span class="si">{</span><span class="n">url</span><span class="si">}</span><span class="s2">..."</span><span class="p">)</span>
|
|
<span class="n">headers</span> <span class="o">=</span> <span class="p">{</span><span class="s2">"User-Agent"</span><span class="p">:</span> <span class="n">USER_AGENT</span><span class="p">}</span>
|
|
<span class="n">response</span> <span class="o">=</span> <span class="n">requests</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="n">url</span><span class="p">,</span> <span class="n">headers</span><span class="o">=</span><span class="n">headers</span><span class="p">,</span> <span class="n">timeout</span><span class="o">=</span><span class="mi">60</span><span class="p">)</span>
|
|
<span class="n">response</span><span class="o">.</span><span class="n">raise_for_status</span><span class="p">()</span>
|
|
<span class="n">os</span><span class="o">.</span><span class="n">makedirs</span><span class="p">(</span><span class="n">cache_dir</span><span class="p">,</span> <span class="n">exist_ok</span><span class="o">=</span><span class="kc">True</span><span class="p">)</span>
|
|
<span class="n">tmp_path</span> <span class="o">=</span> <span class="n">cached_path</span> <span class="o">+</span> <span class="s2">".tmp"</span>
|
|
<span class="k">with</span> <span class="nb">open</span><span class="p">(</span><span class="n">tmp_path</span><span class="p">,</span> <span class="s2">"wb"</span><span class="p">)</span> <span class="k">as</span> <span class="n">f</span><span class="p">:</span>
|
|
<span class="n">f</span><span class="o">.</span><span class="n">write</span><span class="p">(</span><span class="n">response</span><span class="o">.</span><span class="n">content</span><span class="p">)</span>
|
|
<span class="n">shutil</span><span class="o">.</span><span class="n">move</span><span class="p">(</span><span class="n">tmp_path</span><span class="p">,</span> <span class="n">cached_path</span><span class="p">)</span>
|
|
<span class="n">_IP_DB_PATH</span> <span class="o">=</span> <span class="n">cached_path</span>
|
|
<span class="n">logger</span><span class="o">.</span><span class="n">info</span><span class="p">(</span><span class="s2">"IP database updated successfully"</span><span class="p">)</span>
|
|
<span class="k">return</span>
|
|
<span class="k">except</span> <span class="n">requests</span><span class="o">.</span><span class="n">exceptions</span><span class="o">.</span><span class="n">RequestException</span> <span class="k">as</span> <span class="n">e</span><span class="p">:</span>
|
|
<span class="n">logger</span><span class="o">.</span><span class="n">warning</span><span class="p">(</span><span class="sa">f</span><span class="s2">"Failed to fetch IP database: </span><span class="si">{</span><span class="n">e</span><span class="si">}</span><span class="s2">"</span><span class="p">)</span>
|
|
<span class="k">except</span> <span class="ne">Exception</span> <span class="k">as</span> <span class="n">e</span><span class="p">:</span>
|
|
<span class="n">logger</span><span class="o">.</span><span class="n">warning</span><span class="p">(</span><span class="sa">f</span><span class="s2">"Failed to save IP database: </span><span class="si">{</span><span class="n">e</span><span class="si">}</span><span class="s2">"</span><span class="p">)</span>
|
|
|
|
<span class="c1"># Fall back to a previously cached copy if available</span>
|
|
<span class="k">if</span> <span class="n">os</span><span class="o">.</span><span class="n">path</span><span class="o">.</span><span class="n">isfile</span><span class="p">(</span><span class="n">cached_path</span><span class="p">):</span>
|
|
<span class="n">_IP_DB_PATH</span> <span class="o">=</span> <span class="n">cached_path</span>
|
|
<span class="n">logger</span><span class="o">.</span><span class="n">info</span><span class="p">(</span><span class="s2">"Using cached IP database"</span><span class="p">)</span>
|
|
<span class="k">return</span>
|
|
|
|
<span class="c1"># Final fallback: bundled copy</span>
|
|
<span class="n">_IP_DB_PATH</span> <span class="o">=</span> <span class="nb">str</span><span class="p">(</span><span class="n">files</span><span class="p">(</span><span class="n">parsedmarc</span><span class="o">.</span><span class="n">resources</span><span class="o">.</span><span class="n">ipinfo</span><span class="p">)</span><span class="o">.</span><span class="n">joinpath</span><span class="p">(</span><span class="s2">"ipinfo_lite.mmdb"</span><span class="p">))</span>
|
|
<span class="n">logger</span><span class="o">.</span><span class="n">info</span><span class="p">(</span><span class="s2">"Using bundled IP database"</span><span class="p">)</span></div>
|
|
|
|
|
|
|
|
<span class="k">class</span><span class="w"> </span><span class="nc">_IPDatabaseRecord</span><span class="p">(</span><span class="n">TypedDict</span><span class="p">):</span>
|
|
<span class="n">country</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="nb">str</span><span class="p">]</span>
|
|
<span class="n">asn</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="nb">int</span><span class="p">]</span>
|
|
<span class="n">as_name</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="nb">str</span><span class="p">]</span>
|
|
<span class="n">as_domain</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="nb">str</span><span class="p">]</span>
|
|
|
|
|
|
<div class="viewcode-block" id="InvalidIPinfoAPIKey">
|
|
<a class="viewcode-back" href="../../api.html#parsedmarc.utils.InvalidIPinfoAPIKey">[docs]</a>
|
|
<span class="k">class</span><span class="w"> </span><span class="nc">InvalidIPinfoAPIKey</span><span class="p">(</span><span class="ne">Exception</span><span class="p">):</span>
|
|
<span class="w"> </span><span class="sd">"""Raised when the IPinfo API rejects the configured token."""</span></div>
|
|
|
|
|
|
|
|
<span class="c1"># IPinfo Lite REST API. When ``_IPINFO_API_TOKEN`` is set,</span>
|
|
<span class="c1"># ``get_ip_address_db_record()`` queries the API first and falls back to the</span>
|
|
<span class="c1"># bundled/cached MMDB on any non-2xx response or network error. A 401/403</span>
|
|
<span class="c1"># propagates as ``InvalidIPinfoAPIKey`` so the CLI exits fatally.</span>
|
|
<span class="c1">#</span>
|
|
<span class="c1"># The IPinfo Lite API is documented as having no daily or monthly request</span>
|
|
<span class="c1"># limit ("unlimited access"), so there is no rate-limit or quota handling</span>
|
|
<span class="c1"># here — adding it would be inventing behavior the service doesn't document.</span>
|
|
<span class="c1"># Authentication uses the documented ``?token=`` query parameter.</span>
|
|
<span class="n">_IPINFO_API_URL</span> <span class="o">=</span> <span class="s2">"https://api.ipinfo.io/lite"</span>
|
|
<span class="n">_IPINFO_API_TOKEN</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="nb">str</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span>
|
|
<span class="n">_IPINFO_API_TIMEOUT</span><span class="p">:</span> <span class="nb">float</span> <span class="o">=</span> <span class="mf">5.0</span>
|
|
|
|
|
|
<div class="viewcode-block" id="configure_ipinfo_api">
|
|
<a class="viewcode-back" href="../../api.html#parsedmarc.utils.configure_ipinfo_api">[docs]</a>
|
|
<span class="k">def</span><span class="w"> </span><span class="nf">configure_ipinfo_api</span><span class="p">(</span>
|
|
<span class="n">token</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="nb">str</span><span class="p">],</span>
|
|
<span class="o">*</span><span class="p">,</span>
|
|
<span class="n">probe</span><span class="p">:</span> <span class="nb">bool</span> <span class="o">=</span> <span class="kc">True</span><span class="p">,</span>
|
|
<span class="p">)</span> <span class="o">-></span> <span class="kc">None</span><span class="p">:</span>
|
|
<span class="w"> </span><span class="sd">"""Configure the IPinfo Lite REST API as the primary source for IP lookups.</span>
|
|
|
|
<span class="sd"> When a token is configured, ``get_ip_address_db_record()`` hits the API</span>
|
|
<span class="sd"> first for every lookup and falls back to the MMDB on network errors. An</span>
|
|
<span class="sd"> invalid token raises ``InvalidIPinfoAPIKey`` — the CLI catches that and</span>
|
|
<span class="sd"> exits fatally.</span>
|
|
|
|
<span class="sd"> Args:</span>
|
|
<span class="sd"> token: IPinfo API token. ``None`` or empty disables the API.</span>
|
|
<span class="sd"> probe: If ``True``, verify the token by looking up ``1.1.1.1``. A</span>
|
|
<span class="sd"> 401/403 raises ``InvalidIPinfoAPIKey``; other errors are logged</span>
|
|
<span class="sd"> and the token is still accepted so per-request fallback can take</span>
|
|
<span class="sd"> over.</span>
|
|
<span class="sd"> """</span>
|
|
<span class="k">global</span> <span class="n">_IPINFO_API_TOKEN</span>
|
|
<span class="n">_IPINFO_API_TOKEN</span> <span class="o">=</span> <span class="n">token</span> <span class="ow">or</span> <span class="kc">None</span>
|
|
|
|
<span class="k">if</span> <span class="ow">not</span> <span class="n">_IPINFO_API_TOKEN</span> <span class="ow">or</span> <span class="ow">not</span> <span class="n">probe</span><span class="p">:</span>
|
|
<span class="k">return</span>
|
|
|
|
<span class="k">try</span><span class="p">:</span>
|
|
<span class="n">_ipinfo_api_lookup</span><span class="p">(</span><span class="s2">"1.1.1.1"</span><span class="p">)</span>
|
|
<span class="k">except</span> <span class="n">InvalidIPinfoAPIKey</span><span class="p">:</span>
|
|
<span class="k">raise</span>
|
|
<span class="k">except</span> <span class="ne">Exception</span> <span class="k">as</span> <span class="n">e</span><span class="p">:</span>
|
|
<span class="n">logger</span><span class="o">.</span><span class="n">warning</span><span class="p">(</span><span class="sa">f</span><span class="s2">"IPinfo API probe failed (will fall back per-request): </span><span class="si">{</span><span class="n">e</span><span class="si">}</span><span class="s2">"</span><span class="p">)</span>
|
|
<span class="k">else</span><span class="p">:</span>
|
|
<span class="n">logger</span><span class="o">.</span><span class="n">info</span><span class="p">(</span><span class="s2">"IPinfo API configured"</span><span class="p">)</span></div>
|
|
|
|
|
|
|
|
<span class="k">def</span><span class="w"> </span><span class="nf">_ipinfo_api_lookup</span><span class="p">(</span><span class="n">ip_address</span><span class="p">:</span> <span class="nb">str</span><span class="p">)</span> <span class="o">-></span> <span class="n">Optional</span><span class="p">[</span><span class="n">_IPDatabaseRecord</span><span class="p">]:</span>
|
|
<span class="w"> </span><span class="sd">"""Look up an IP via the IPinfo Lite REST API.</span>
|
|
|
|
<span class="sd"> Returns the normalized record on success, or ``None`` on network error or</span>
|
|
<span class="sd"> any non-2xx response (other than 401/403). 401/403 raises</span>
|
|
<span class="sd"> ``InvalidIPinfoAPIKey``.</span>
|
|
<span class="sd"> """</span>
|
|
<span class="k">if</span> <span class="ow">not</span> <span class="n">_IPINFO_API_TOKEN</span><span class="p">:</span>
|
|
<span class="k">return</span> <span class="kc">None</span>
|
|
|
|
<span class="n">url</span> <span class="o">=</span> <span class="sa">f</span><span class="s2">"</span><span class="si">{</span><span class="n">_IPINFO_API_URL</span><span class="si">}</span><span class="s2">/</span><span class="si">{</span><span class="n">ip_address</span><span class="si">}</span><span class="s2">"</span>
|
|
<span class="n">params</span> <span class="o">=</span> <span class="p">{</span><span class="s2">"token"</span><span class="p">:</span> <span class="n">_IPINFO_API_TOKEN</span><span class="p">}</span>
|
|
<span class="n">headers</span> <span class="o">=</span> <span class="p">{</span><span class="s2">"User-Agent"</span><span class="p">:</span> <span class="n">USER_AGENT</span><span class="p">,</span> <span class="s2">"Accept"</span><span class="p">:</span> <span class="s2">"application/json"</span><span class="p">}</span>
|
|
<span class="k">try</span><span class="p">:</span>
|
|
<span class="n">response</span> <span class="o">=</span> <span class="n">requests</span><span class="o">.</span><span class="n">get</span><span class="p">(</span>
|
|
<span class="n">url</span><span class="p">,</span> <span class="n">headers</span><span class="o">=</span><span class="n">headers</span><span class="p">,</span> <span class="n">params</span><span class="o">=</span><span class="n">params</span><span class="p">,</span> <span class="n">timeout</span><span class="o">=</span><span class="n">_IPINFO_API_TIMEOUT</span>
|
|
<span class="p">)</span>
|
|
<span class="k">except</span> <span class="n">requests</span><span class="o">.</span><span class="n">exceptions</span><span class="o">.</span><span class="n">RequestException</span> <span class="k">as</span> <span class="n">e</span><span class="p">:</span>
|
|
<span class="n">logger</span><span class="o">.</span><span class="n">debug</span><span class="p">(</span><span class="sa">f</span><span class="s2">"IPinfo API request for </span><span class="si">{</span><span class="n">ip_address</span><span class="si">}</span><span class="s2"> failed: </span><span class="si">{</span><span class="n">e</span><span class="si">}</span><span class="s2">"</span><span class="p">)</span>
|
|
<span class="k">return</span> <span class="kc">None</span>
|
|
|
|
<span class="k">if</span> <span class="n">response</span><span class="o">.</span><span class="n">status_code</span> <span class="ow">in</span> <span class="p">(</span><span class="mi">401</span><span class="p">,</span> <span class="mi">403</span><span class="p">):</span>
|
|
<span class="k">raise</span> <span class="n">InvalidIPinfoAPIKey</span><span class="p">(</span>
|
|
<span class="sa">f</span><span class="s2">"IPinfo API rejected the configured token (HTTP </span><span class="si">{</span><span class="n">response</span><span class="o">.</span><span class="n">status_code</span><span class="si">}</span><span class="s2">)"</span>
|
|
<span class="p">)</span>
|
|
<span class="k">if</span> <span class="ow">not</span> <span class="n">response</span><span class="o">.</span><span class="n">ok</span><span class="p">:</span>
|
|
<span class="n">logger</span><span class="o">.</span><span class="n">debug</span><span class="p">(</span>
|
|
<span class="sa">f</span><span class="s2">"IPinfo API returned HTTP </span><span class="si">{</span><span class="n">response</span><span class="o">.</span><span class="n">status_code</span><span class="si">}</span><span class="s2"> for </span><span class="si">{</span><span class="n">ip_address</span><span class="si">}</span><span class="s2">"</span>
|
|
<span class="p">)</span>
|
|
<span class="k">return</span> <span class="kc">None</span>
|
|
|
|
<span class="k">try</span><span class="p">:</span>
|
|
<span class="n">payload</span> <span class="o">=</span> <span class="n">response</span><span class="o">.</span><span class="n">json</span><span class="p">()</span>
|
|
<span class="k">except</span> <span class="ne">ValueError</span><span class="p">:</span>
|
|
<span class="n">logger</span><span class="o">.</span><span class="n">debug</span><span class="p">(</span><span class="sa">f</span><span class="s2">"IPinfo API returned non-JSON for </span><span class="si">{</span><span class="n">ip_address</span><span class="si">}</span><span class="s2">"</span><span class="p">)</span>
|
|
<span class="k">return</span> <span class="kc">None</span>
|
|
<span class="k">if</span> <span class="ow">not</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">payload</span><span class="p">,</span> <span class="nb">dict</span><span class="p">):</span>
|
|
<span class="k">return</span> <span class="kc">None</span>
|
|
|
|
<span class="k">return</span> <span class="n">_normalize_ip_record</span><span class="p">(</span><span class="n">payload</span><span class="p">)</span>
|
|
|
|
|
|
<span class="k">def</span><span class="w"> </span><span class="nf">_normalize_ip_record</span><span class="p">(</span><span class="n">record</span><span class="p">:</span> <span class="nb">dict</span><span class="p">)</span> <span class="o">-></span> <span class="n">_IPDatabaseRecord</span><span class="p">:</span>
|
|
<span class="w"> </span><span class="sd">"""Normalize an IPinfo / MaxMind record to the internal shape.</span>
|
|
|
|
<span class="sd"> Shared between the API path and the MMDB path so both schemas produce the</span>
|
|
<span class="sd"> same output: country as ISO code, ASN as plain int, as_name string,</span>
|
|
<span class="sd"> as_domain lowercased.</span>
|
|
<span class="sd"> """</span>
|
|
<span class="n">country</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="nb">str</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span>
|
|
<span class="n">asn</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="nb">int</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span>
|
|
<span class="n">as_name</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="nb">str</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span>
|
|
<span class="n">as_domain</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="nb">str</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span>
|
|
|
|
<span class="n">code</span> <span class="o">=</span> <span class="n">record</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s2">"country_code"</span><span class="p">)</span>
|
|
<span class="k">if</span> <span class="n">code</span> <span class="ow">is</span> <span class="kc">None</span><span class="p">:</span>
|
|
<span class="n">nested</span> <span class="o">=</span> <span class="n">record</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s2">"country"</span><span class="p">)</span>
|
|
<span class="k">if</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">nested</span><span class="p">,</span> <span class="nb">dict</span><span class="p">):</span>
|
|
<span class="n">code</span> <span class="o">=</span> <span class="n">nested</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s2">"iso_code"</span><span class="p">)</span>
|
|
<span class="k">if</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">code</span><span class="p">,</span> <span class="nb">str</span><span class="p">):</span>
|
|
<span class="n">country</span> <span class="o">=</span> <span class="n">code</span>
|
|
|
|
<span class="n">raw_asn</span> <span class="o">=</span> <span class="n">record</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s2">"asn"</span><span class="p">)</span>
|
|
<span class="k">if</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">raw_asn</span><span class="p">,</span> <span class="nb">int</span><span class="p">):</span>
|
|
<span class="n">asn</span> <span class="o">=</span> <span class="n">raw_asn</span>
|
|
<span class="k">elif</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">raw_asn</span><span class="p">,</span> <span class="nb">str</span><span class="p">)</span> <span class="ow">and</span> <span class="n">raw_asn</span><span class="p">:</span>
|
|
<span class="n">digits</span> <span class="o">=</span> <span class="n">raw_asn</span><span class="o">.</span><span class="n">removeprefix</span><span class="p">(</span><span class="s2">"AS"</span><span class="p">)</span><span class="o">.</span><span class="n">removeprefix</span><span class="p">(</span><span class="s2">"as"</span><span class="p">)</span>
|
|
<span class="k">if</span> <span class="n">digits</span><span class="o">.</span><span class="n">isdigit</span><span class="p">():</span>
|
|
<span class="n">asn</span> <span class="o">=</span> <span class="nb">int</span><span class="p">(</span><span class="n">digits</span><span class="p">)</span>
|
|
<span class="k">if</span> <span class="n">asn</span> <span class="ow">is</span> <span class="kc">None</span><span class="p">:</span>
|
|
<span class="n">mm_asn</span> <span class="o">=</span> <span class="n">record</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s2">"autonomous_system_number"</span><span class="p">)</span>
|
|
<span class="k">if</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">mm_asn</span><span class="p">,</span> <span class="nb">int</span><span class="p">):</span>
|
|
<span class="n">asn</span> <span class="o">=</span> <span class="n">mm_asn</span>
|
|
|
|
<span class="n">name</span> <span class="o">=</span> <span class="n">record</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s2">"as_name"</span><span class="p">)</span> <span class="ow">or</span> <span class="n">record</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s2">"autonomous_system_organization"</span><span class="p">)</span>
|
|
<span class="k">if</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">name</span><span class="p">,</span> <span class="nb">str</span><span class="p">)</span> <span class="ow">and</span> <span class="n">name</span><span class="p">:</span>
|
|
<span class="n">as_name</span> <span class="o">=</span> <span class="n">name</span>
|
|
<span class="n">domain</span> <span class="o">=</span> <span class="n">record</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s2">"as_domain"</span><span class="p">)</span>
|
|
<span class="k">if</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">domain</span><span class="p">,</span> <span class="nb">str</span><span class="p">)</span> <span class="ow">and</span> <span class="n">domain</span><span class="p">:</span>
|
|
<span class="n">as_domain</span> <span class="o">=</span> <span class="n">domain</span><span class="o">.</span><span class="n">lower</span><span class="p">()</span>
|
|
|
|
<span class="k">return</span> <span class="p">{</span>
|
|
<span class="s2">"country"</span><span class="p">:</span> <span class="n">country</span><span class="p">,</span>
|
|
<span class="s2">"asn"</span><span class="p">:</span> <span class="n">asn</span><span class="p">,</span>
|
|
<span class="s2">"as_name"</span><span class="p">:</span> <span class="n">as_name</span><span class="p">,</span>
|
|
<span class="s2">"as_domain"</span><span class="p">:</span> <span class="n">as_domain</span><span class="p">,</span>
|
|
<span class="p">}</span>
|
|
|
|
|
|
<span class="k">def</span><span class="w"> </span><span class="nf">_get_ip_database_path</span><span class="p">(</span><span class="n">db_path</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="nb">str</span><span class="p">])</span> <span class="o">-></span> <span class="nb">str</span><span class="p">:</span>
|
|
<span class="n">db_paths</span> <span class="o">=</span> <span class="p">[</span>
|
|
<span class="s2">"ipinfo_lite.mmdb"</span><span class="p">,</span>
|
|
<span class="s2">"GeoLite2-Country.mmdb"</span><span class="p">,</span>
|
|
<span class="s2">"/usr/local/share/GeoIP/GeoLite2-Country.mmdb"</span><span class="p">,</span>
|
|
<span class="s2">"/usr/share/GeoIP/GeoLite2-Country.mmdb"</span><span class="p">,</span>
|
|
<span class="s2">"/var/lib/GeoIP/GeoLite2-Country.mmdb"</span><span class="p">,</span>
|
|
<span class="s2">"/var/local/lib/GeoIP/GeoLite2-Country.mmdb"</span><span class="p">,</span>
|
|
<span class="s2">"/usr/local/var/GeoIP/GeoLite2-Country.mmdb"</span><span class="p">,</span>
|
|
<span class="s2">"%SystemDrive%</span><span class="se">\\</span><span class="s2">ProgramData</span><span class="se">\\</span><span class="s2">MaxMind</span><span class="se">\\</span><span class="s2">GeoIPUpdate</span><span class="se">\\</span><span class="s2">GeoIP</span><span class="se">\\</span><span class="s2">"</span>
|
|
<span class="s2">"GeoLite2-Country.mmdb"</span><span class="p">,</span>
|
|
<span class="s2">"C:</span><span class="se">\\</span><span class="s2">GeoIP</span><span class="se">\\</span><span class="s2">GeoLite2-Country.mmdb"</span><span class="p">,</span>
|
|
<span class="s2">"dbip-country-lite.mmdb"</span><span class="p">,</span>
|
|
<span class="s2">"dbip-country.mmdb"</span><span class="p">,</span>
|
|
<span class="p">]</span>
|
|
|
|
<span class="k">if</span> <span class="n">db_path</span> <span class="ow">is</span> <span class="ow">not</span> <span class="kc">None</span> <span class="ow">and</span> <span class="ow">not</span> <span class="n">os</span><span class="o">.</span><span class="n">path</span><span class="o">.</span><span class="n">isfile</span><span class="p">(</span><span class="n">db_path</span><span class="p">):</span>
|
|
<span class="n">logger</span><span class="o">.</span><span class="n">warning</span><span class="p">(</span>
|
|
<span class="sa">f</span><span class="s2">"No file exists at </span><span class="si">{</span><span class="n">db_path</span><span class="si">}</span><span class="s2">. Falling back to an "</span>
|
|
<span class="s2">"included copy of the IPinfo IP to Country "</span>
|
|
<span class="s2">"Lite database."</span>
|
|
<span class="p">)</span>
|
|
<span class="n">db_path</span> <span class="o">=</span> <span class="kc">None</span>
|
|
|
|
<span class="k">if</span> <span class="n">db_path</span> <span class="ow">is</span> <span class="kc">None</span><span class="p">:</span>
|
|
<span class="k">for</span> <span class="n">system_path</span> <span class="ow">in</span> <span class="n">db_paths</span><span class="p">:</span>
|
|
<span class="k">if</span> <span class="n">os</span><span class="o">.</span><span class="n">path</span><span class="o">.</span><span class="n">exists</span><span class="p">(</span><span class="n">system_path</span><span class="p">):</span>
|
|
<span class="n">db_path</span> <span class="o">=</span> <span class="n">system_path</span>
|
|
<span class="k">break</span>
|
|
|
|
<span class="k">if</span> <span class="n">db_path</span> <span class="ow">is</span> <span class="kc">None</span><span class="p">:</span>
|
|
<span class="k">if</span> <span class="n">_IP_DB_PATH</span> <span class="ow">is</span> <span class="ow">not</span> <span class="kc">None</span><span class="p">:</span>
|
|
<span class="n">db_path</span> <span class="o">=</span> <span class="n">_IP_DB_PATH</span>
|
|
<span class="k">else</span><span class="p">:</span>
|
|
<span class="n">db_path</span> <span class="o">=</span> <span class="nb">str</span><span class="p">(</span>
|
|
<span class="n">files</span><span class="p">(</span><span class="n">parsedmarc</span><span class="o">.</span><span class="n">resources</span><span class="o">.</span><span class="n">ipinfo</span><span class="p">)</span><span class="o">.</span><span class="n">joinpath</span><span class="p">(</span><span class="s2">"ipinfo_lite.mmdb"</span><span class="p">)</span>
|
|
<span class="p">)</span>
|
|
|
|
<span class="n">db_age</span> <span class="o">=</span> <span class="n">datetime</span><span class="o">.</span><span class="n">now</span><span class="p">()</span> <span class="o">-</span> <span class="n">datetime</span><span class="o">.</span><span class="n">fromtimestamp</span><span class="p">(</span><span class="n">os</span><span class="o">.</span><span class="n">stat</span><span class="p">(</span><span class="n">db_path</span><span class="p">)</span><span class="o">.</span><span class="n">st_mtime</span><span class="p">)</span>
|
|
<span class="k">if</span> <span class="n">db_age</span> <span class="o">></span> <span class="n">timedelta</span><span class="p">(</span><span class="n">days</span><span class="o">=</span><span class="mi">30</span><span class="p">):</span>
|
|
<span class="n">logger</span><span class="o">.</span><span class="n">warning</span><span class="p">(</span><span class="s2">"IP database is more than a month old"</span><span class="p">)</span>
|
|
|
|
<span class="k">return</span> <span class="n">db_path</span>
|
|
|
|
|
|
<div class="viewcode-block" id="get_ip_address_db_record">
|
|
<a class="viewcode-back" href="../../api.html#parsedmarc.utils.get_ip_address_db_record">[docs]</a>
|
|
<span class="k">def</span><span class="w"> </span><span class="nf">get_ip_address_db_record</span><span class="p">(</span>
|
|
<span class="n">ip_address</span><span class="p">:</span> <span class="nb">str</span><span class="p">,</span> <span class="o">*</span><span class="p">,</span> <span class="n">db_path</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="nb">str</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span>
|
|
<span class="p">)</span> <span class="o">-></span> <span class="n">_IPDatabaseRecord</span><span class="p">:</span>
|
|
<span class="w"> </span><span class="sd">"""Look up an IP and return country + ASN fields.</span>
|
|
|
|
<span class="sd"> If the IPinfo Lite API is configured via ``configure_ipinfo_api()``, the</span>
|
|
<span class="sd"> API is queried first; any non-fatal failure (rate limit, quota, network)</span>
|
|
<span class="sd"> falls through to the MMDB. An invalid API token raises</span>
|
|
<span class="sd"> ``InvalidIPinfoAPIKey`` and is not caught here.</span>
|
|
|
|
<span class="sd"> IPinfo Lite carries ``country_code``, ``as_name``, and ``as_domain`` on</span>
|
|
<span class="sd"> every record. MaxMind/DBIP country-only databases carry only country, so</span>
|
|
<span class="sd"> ``as_name`` / ``as_domain`` come back None for those users.</span>
|
|
<span class="sd"> """</span>
|
|
<span class="n">api_record</span> <span class="o">=</span> <span class="n">_ipinfo_api_lookup</span><span class="p">(</span><span class="n">ip_address</span><span class="p">)</span>
|
|
<span class="k">if</span> <span class="n">api_record</span> <span class="ow">is</span> <span class="ow">not</span> <span class="kc">None</span><span class="p">:</span>
|
|
<span class="k">return</span> <span class="n">api_record</span>
|
|
|
|
<span class="n">resolved_path</span> <span class="o">=</span> <span class="n">_get_ip_database_path</span><span class="p">(</span><span class="n">db_path</span><span class="p">)</span>
|
|
<span class="n">db_reader</span> <span class="o">=</span> <span class="n">maxminddb</span><span class="o">.</span><span class="n">open_database</span><span class="p">(</span><span class="n">resolved_path</span><span class="p">)</span>
|
|
<span class="n">record</span> <span class="o">=</span> <span class="n">db_reader</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="n">ip_address</span><span class="p">)</span>
|
|
<span class="k">if</span> <span class="ow">not</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">record</span><span class="p">,</span> <span class="nb">dict</span><span class="p">):</span>
|
|
<span class="k">return</span> <span class="p">{</span>
|
|
<span class="s2">"country"</span><span class="p">:</span> <span class="kc">None</span><span class="p">,</span>
|
|
<span class="s2">"asn"</span><span class="p">:</span> <span class="kc">None</span><span class="p">,</span>
|
|
<span class="s2">"as_name"</span><span class="p">:</span> <span class="kc">None</span><span class="p">,</span>
|
|
<span class="s2">"as_domain"</span><span class="p">:</span> <span class="kc">None</span><span class="p">,</span>
|
|
<span class="p">}</span>
|
|
<span class="k">return</span> <span class="n">_normalize_ip_record</span><span class="p">(</span><span class="n">record</span><span class="p">)</span></div>
|
|
|
|
|
|
|
|
<div class="viewcode-block" id="get_ip_address_country">
|
|
<a class="viewcode-back" href="../../api.html#parsedmarc.utils.get_ip_address_country">[docs]</a>
|
|
<span class="k">def</span><span class="w"> </span><span class="nf">get_ip_address_country</span><span class="p">(</span>
|
|
<span class="n">ip_address</span><span class="p">:</span> <span class="nb">str</span><span class="p">,</span> <span class="o">*</span><span class="p">,</span> <span class="n">db_path</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="nb">str</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span>
|
|
<span class="p">)</span> <span class="o">-></span> <span class="n">Optional</span><span class="p">[</span><span class="nb">str</span><span class="p">]:</span>
|
|
<span class="w"> </span><span class="sd">"""</span>
|
|
<span class="sd"> Returns the ISO code for the country associated</span>
|
|
<span class="sd"> with the given IPv4 or IPv6 address.</span>
|
|
|
|
<span class="sd"> Args:</span>
|
|
<span class="sd"> ip_address (str): The IP address to query for</span>
|
|
<span class="sd"> db_path (str): Path to a MMDB file from IPinfo, MaxMind, or DBIP</span>
|
|
|
|
<span class="sd"> Returns:</span>
|
|
<span class="sd"> str: And ISO country code associated with the given IP address</span>
|
|
<span class="sd"> """</span>
|
|
<span class="k">return</span> <span class="n">get_ip_address_db_record</span><span class="p">(</span><span class="n">ip_address</span><span class="p">,</span> <span class="n">db_path</span><span class="o">=</span><span class="n">db_path</span><span class="p">)[</span><span class="s2">"country"</span><span class="p">]</span></div>
|
|
|
|
|
|
|
|
<div class="viewcode-block" id="load_reverse_dns_map">
|
|
<a class="viewcode-back" href="../../api.html#parsedmarc.utils.load_reverse_dns_map">[docs]</a>
|
|
<span class="k">def</span><span class="w"> </span><span class="nf">load_reverse_dns_map</span><span class="p">(</span>
|
|
<span class="n">reverse_dns_map</span><span class="p">:</span> <span class="n">ReverseDNSMap</span><span class="p">,</span>
|
|
<span class="o">*</span><span class="p">,</span>
|
|
<span class="n">always_use_local_file</span><span class="p">:</span> <span class="nb">bool</span> <span class="o">=</span> <span class="kc">False</span><span class="p">,</span>
|
|
<span class="n">local_file_path</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="nb">str</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span>
|
|
<span class="n">url</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="nb">str</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span>
|
|
<span class="n">offline</span><span class="p">:</span> <span class="nb">bool</span> <span class="o">=</span> <span class="kc">False</span><span class="p">,</span>
|
|
<span class="n">psl_overrides_path</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="nb">str</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span>
|
|
<span class="n">psl_overrides_url</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="nb">str</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span>
|
|
<span class="p">)</span> <span class="o">-></span> <span class="kc">None</span><span class="p">:</span>
|
|
<span class="w"> </span><span class="sd">"""</span>
|
|
<span class="sd"> Loads the reverse DNS map from a URL or local file.</span>
|
|
|
|
<span class="sd"> Clears and repopulates the given map dict in place. If the map is</span>
|
|
<span class="sd"> fetched from a URL, that is tried first; on failure (or if offline/local</span>
|
|
<span class="sd"> mode is selected) the bundled CSV is used as a fallback.</span>
|
|
|
|
<span class="sd"> ``psl_overrides.txt`` is reloaded at the same time using the same</span>
|
|
<span class="sd"> ``offline`` / ``always_use_local_file`` flags (with separate path/URL</span>
|
|
<span class="sd"> kwargs), so map entries that depend on a recent overrides entry fold</span>
|
|
<span class="sd"> correctly.</span>
|
|
|
|
<span class="sd"> Args:</span>
|
|
<span class="sd"> reverse_dns_map (dict): The map dict to populate (modified in place)</span>
|
|
<span class="sd"> always_use_local_file (bool): Always use a local map file</span>
|
|
<span class="sd"> local_file_path (str): Path to a local map file</span>
|
|
<span class="sd"> url (str): URL to a reverse DNS map</span>
|
|
<span class="sd"> offline (bool): Use the built-in copy of the reverse DNS map</span>
|
|
<span class="sd"> psl_overrides_path (str): Path to a local PSL overrides file</span>
|
|
<span class="sd"> psl_overrides_url (str): URL to a PSL overrides file</span>
|
|
<span class="sd"> """</span>
|
|
<span class="c1"># Reload PSL overrides first so any map entry that depends on a folded</span>
|
|
<span class="c1"># base domain resolves correctly against the current overrides list.</span>
|
|
<span class="n">load_psl_overrides</span><span class="p">(</span>
|
|
<span class="n">always_use_local_file</span><span class="o">=</span><span class="n">always_use_local_file</span><span class="p">,</span>
|
|
<span class="n">local_file_path</span><span class="o">=</span><span class="n">psl_overrides_path</span><span class="p">,</span>
|
|
<span class="n">url</span><span class="o">=</span><span class="n">psl_overrides_url</span><span class="p">,</span>
|
|
<span class="n">offline</span><span class="o">=</span><span class="n">offline</span><span class="p">,</span>
|
|
<span class="p">)</span>
|
|
|
|
<span class="k">if</span> <span class="n">url</span> <span class="ow">is</span> <span class="kc">None</span><span class="p">:</span>
|
|
<span class="n">url</span> <span class="o">=</span> <span class="p">(</span>
|
|
<span class="s2">"https://raw.githubusercontent.com/domainaware"</span>
|
|
<span class="s2">"/parsedmarc/master/parsedmarc/"</span>
|
|
<span class="s2">"resources/maps/base_reverse_dns_map.csv"</span>
|
|
<span class="p">)</span>
|
|
|
|
<span class="n">reverse_dns_map</span><span class="o">.</span><span class="n">clear</span><span class="p">()</span>
|
|
|
|
<span class="k">def</span><span class="w"> </span><span class="nf">load_csv</span><span class="p">(</span><span class="n">_csv_file</span><span class="p">):</span>
|
|
<span class="n">reader</span> <span class="o">=</span> <span class="n">csv</span><span class="o">.</span><span class="n">DictReader</span><span class="p">(</span><span class="n">_csv_file</span><span class="p">)</span>
|
|
<span class="k">for</span> <span class="n">row</span> <span class="ow">in</span> <span class="n">reader</span><span class="p">:</span>
|
|
<span class="n">key</span> <span class="o">=</span> <span class="n">row</span><span class="p">[</span><span class="s2">"base_reverse_dns"</span><span class="p">]</span><span class="o">.</span><span class="n">lower</span><span class="p">()</span><span class="o">.</span><span class="n">strip</span><span class="p">()</span>
|
|
<span class="n">reverse_dns_map</span><span class="p">[</span><span class="n">key</span><span class="p">]</span> <span class="o">=</span> <span class="p">{</span>
|
|
<span class="s2">"name"</span><span class="p">:</span> <span class="n">row</span><span class="p">[</span><span class="s2">"name"</span><span class="p">]</span><span class="o">.</span><span class="n">strip</span><span class="p">(),</span>
|
|
<span class="s2">"type"</span><span class="p">:</span> <span class="n">row</span><span class="p">[</span><span class="s2">"type"</span><span class="p">]</span><span class="o">.</span><span class="n">strip</span><span class="p">(),</span>
|
|
<span class="p">}</span>
|
|
|
|
<span class="n">csv_file</span> <span class="o">=</span> <span class="n">io</span><span class="o">.</span><span class="n">StringIO</span><span class="p">()</span>
|
|
|
|
<span class="k">if</span> <span class="ow">not</span> <span class="p">(</span><span class="n">offline</span> <span class="ow">or</span> <span class="n">always_use_local_file</span><span class="p">):</span>
|
|
<span class="k">try</span><span class="p">:</span>
|
|
<span class="n">logger</span><span class="o">.</span><span class="n">debug</span><span class="p">(</span><span class="sa">f</span><span class="s2">"Trying to fetch reverse DNS map from </span><span class="si">{</span><span class="n">url</span><span class="si">}</span><span class="s2">..."</span><span class="p">)</span>
|
|
<span class="n">headers</span> <span class="o">=</span> <span class="p">{</span><span class="s2">"User-Agent"</span><span class="p">:</span> <span class="n">USER_AGENT</span><span class="p">}</span>
|
|
<span class="n">response</span> <span class="o">=</span> <span class="n">requests</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="n">url</span><span class="p">,</span> <span class="n">headers</span><span class="o">=</span><span class="n">headers</span><span class="p">)</span>
|
|
<span class="n">response</span><span class="o">.</span><span class="n">raise_for_status</span><span class="p">()</span>
|
|
<span class="n">csv_file</span><span class="o">.</span><span class="n">write</span><span class="p">(</span><span class="n">response</span><span class="o">.</span><span class="n">text</span><span class="p">)</span>
|
|
<span class="n">csv_file</span><span class="o">.</span><span class="n">seek</span><span class="p">(</span><span class="mi">0</span><span class="p">)</span>
|
|
<span class="n">load_csv</span><span class="p">(</span><span class="n">csv_file</span><span class="p">)</span>
|
|
<span class="k">except</span> <span class="n">requests</span><span class="o">.</span><span class="n">exceptions</span><span class="o">.</span><span class="n">RequestException</span> <span class="k">as</span> <span class="n">e</span><span class="p">:</span>
|
|
<span class="n">logger</span><span class="o">.</span><span class="n">warning</span><span class="p">(</span><span class="sa">f</span><span class="s2">"Failed to fetch reverse DNS map: </span><span class="si">{</span><span class="n">e</span><span class="si">}</span><span class="s2">"</span><span class="p">)</span>
|
|
<span class="k">except</span> <span class="ne">Exception</span><span class="p">:</span>
|
|
<span class="n">logger</span><span class="o">.</span><span class="n">warning</span><span class="p">(</span><span class="s2">"Not a valid CSV file"</span><span class="p">)</span>
|
|
<span class="n">csv_file</span><span class="o">.</span><span class="n">seek</span><span class="p">(</span><span class="mi">0</span><span class="p">)</span>
|
|
<span class="n">logging</span><span class="o">.</span><span class="n">debug</span><span class="p">(</span><span class="s2">"Response body:"</span><span class="p">)</span>
|
|
<span class="n">logger</span><span class="o">.</span><span class="n">debug</span><span class="p">(</span><span class="n">csv_file</span><span class="o">.</span><span class="n">read</span><span class="p">())</span>
|
|
|
|
<span class="k">if</span> <span class="nb">len</span><span class="p">(</span><span class="n">reverse_dns_map</span><span class="p">)</span> <span class="o">==</span> <span class="mi">0</span><span class="p">:</span>
|
|
<span class="n">logger</span><span class="o">.</span><span class="n">info</span><span class="p">(</span><span class="s2">"Loading included reverse DNS map..."</span><span class="p">)</span>
|
|
<span class="n">path</span> <span class="o">=</span> <span class="nb">str</span><span class="p">(</span>
|
|
<span class="n">files</span><span class="p">(</span><span class="n">parsedmarc</span><span class="o">.</span><span class="n">resources</span><span class="o">.</span><span class="n">maps</span><span class="p">)</span><span class="o">.</span><span class="n">joinpath</span><span class="p">(</span><span class="s2">"base_reverse_dns_map.csv"</span><span class="p">)</span>
|
|
<span class="p">)</span>
|
|
<span class="k">if</span> <span class="n">local_file_path</span> <span class="ow">is</span> <span class="ow">not</span> <span class="kc">None</span><span class="p">:</span>
|
|
<span class="n">path</span> <span class="o">=</span> <span class="n">local_file_path</span>
|
|
<span class="k">with</span> <span class="nb">open</span><span class="p">(</span><span class="n">path</span><span class="p">)</span> <span class="k">as</span> <span class="n">csv_file</span><span class="p">:</span>
|
|
<span class="n">load_csv</span><span class="p">(</span><span class="n">csv_file</span><span class="p">)</span></div>
|
|
|
|
|
|
|
|
<div class="viewcode-block" id="get_service_from_reverse_dns_base_domain">
|
|
<a class="viewcode-back" href="../../api.html#parsedmarc.utils.get_service_from_reverse_dns_base_domain">[docs]</a>
|
|
<span class="k">def</span><span class="w"> </span><span class="nf">get_service_from_reverse_dns_base_domain</span><span class="p">(</span>
|
|
<span class="n">base_domain</span><span class="p">,</span>
|
|
<span class="o">*</span><span class="p">,</span>
|
|
<span class="n">always_use_local_file</span><span class="p">:</span> <span class="nb">bool</span> <span class="o">=</span> <span class="kc">False</span><span class="p">,</span>
|
|
<span class="n">local_file_path</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="nb">str</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span>
|
|
<span class="n">url</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="nb">str</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span>
|
|
<span class="n">offline</span><span class="p">:</span> <span class="nb">bool</span> <span class="o">=</span> <span class="kc">False</span><span class="p">,</span>
|
|
<span class="n">reverse_dns_map</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="n">ReverseDNSMap</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span>
|
|
<span class="p">)</span> <span class="o">-></span> <span class="n">ReverseDNSService</span><span class="p">:</span>
|
|
<span class="w"> </span><span class="sd">"""</span>
|
|
<span class="sd"> Returns the service name of a given base domain name from reverse DNS.</span>
|
|
|
|
<span class="sd"> Args:</span>
|
|
<span class="sd"> base_domain (str): The base domain of the reverse DNS lookup</span>
|
|
<span class="sd"> always_use_local_file (bool): Always use a local map file</span>
|
|
<span class="sd"> local_file_path (str): Path to a local map file</span>
|
|
<span class="sd"> url (str): URL ro a reverse DNS map</span>
|
|
<span class="sd"> offline (bool): Use the built-in copy of the reverse DNS map</span>
|
|
<span class="sd"> reverse_dns_map (dict): A reverse DNS map</span>
|
|
<span class="sd"> Returns:</span>
|
|
<span class="sd"> dict: A dictionary containing name and type.</span>
|
|
<span class="sd"> If the service is unknown, the name will be</span>
|
|
<span class="sd"> the supplied reverse_dns_base_domain and the type will be None</span>
|
|
<span class="sd"> """</span>
|
|
|
|
<span class="n">base_domain</span> <span class="o">=</span> <span class="n">base_domain</span><span class="o">.</span><span class="n">lower</span><span class="p">()</span><span class="o">.</span><span class="n">strip</span><span class="p">()</span>
|
|
<span class="n">reverse_dns_map_value</span><span class="p">:</span> <span class="n">ReverseDNSMap</span>
|
|
<span class="k">if</span> <span class="n">reverse_dns_map</span> <span class="ow">is</span> <span class="kc">None</span><span class="p">:</span>
|
|
<span class="n">reverse_dns_map_value</span> <span class="o">=</span> <span class="p">{}</span>
|
|
<span class="k">else</span><span class="p">:</span>
|
|
<span class="n">reverse_dns_map_value</span> <span class="o">=</span> <span class="n">reverse_dns_map</span>
|
|
|
|
<span class="k">if</span> <span class="nb">len</span><span class="p">(</span><span class="n">reverse_dns_map_value</span><span class="p">)</span> <span class="o">==</span> <span class="mi">0</span><span class="p">:</span>
|
|
<span class="n">load_reverse_dns_map</span><span class="p">(</span>
|
|
<span class="n">reverse_dns_map_value</span><span class="p">,</span>
|
|
<span class="n">always_use_local_file</span><span class="o">=</span><span class="n">always_use_local_file</span><span class="p">,</span>
|
|
<span class="n">local_file_path</span><span class="o">=</span><span class="n">local_file_path</span><span class="p">,</span>
|
|
<span class="n">url</span><span class="o">=</span><span class="n">url</span><span class="p">,</span>
|
|
<span class="n">offline</span><span class="o">=</span><span class="n">offline</span><span class="p">,</span>
|
|
<span class="p">)</span>
|
|
|
|
<span class="n">service</span><span class="p">:</span> <span class="n">ReverseDNSService</span>
|
|
<span class="k">try</span><span class="p">:</span>
|
|
<span class="n">service</span> <span class="o">=</span> <span class="n">reverse_dns_map_value</span><span class="p">[</span><span class="n">base_domain</span><span class="p">]</span>
|
|
<span class="k">except</span> <span class="ne">KeyError</span><span class="p">:</span>
|
|
<span class="n">service</span> <span class="o">=</span> <span class="p">{</span><span class="s2">"name"</span><span class="p">:</span> <span class="n">base_domain</span><span class="p">,</span> <span class="s2">"type"</span><span class="p">:</span> <span class="kc">None</span><span class="p">}</span>
|
|
|
|
<span class="k">return</span> <span class="n">service</span></div>
|
|
|
|
|
|
|
|
<div class="viewcode-block" id="get_ip_address_info">
|
|
<a class="viewcode-back" href="../../api.html#parsedmarc.utils.get_ip_address_info">[docs]</a>
|
|
<span class="k">def</span><span class="w"> </span><span class="nf">get_ip_address_info</span><span class="p">(</span>
|
|
<span class="n">ip_address</span><span class="p">,</span>
|
|
<span class="o">*</span><span class="p">,</span>
|
|
<span class="n">ip_db_path</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="nb">str</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span>
|
|
<span class="n">reverse_dns_map_path</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="nb">str</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span>
|
|
<span class="n">always_use_local_files</span><span class="p">:</span> <span class="nb">bool</span> <span class="o">=</span> <span class="kc">False</span><span class="p">,</span>
|
|
<span class="n">reverse_dns_map_url</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="nb">str</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span>
|
|
<span class="n">cache</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="n">ExpiringDict</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span>
|
|
<span class="n">reverse_dns_map</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="n">ReverseDNSMap</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span>
|
|
<span class="n">offline</span><span class="p">:</span> <span class="nb">bool</span> <span class="o">=</span> <span class="kc">False</span><span class="p">,</span>
|
|
<span class="n">nameservers</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="nb">list</span><span class="p">[</span><span class="nb">str</span><span class="p">]]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span>
|
|
<span class="n">timeout</span><span class="p">:</span> <span class="nb">float</span> <span class="o">=</span> <span class="n">DEFAULT_DNS_TIMEOUT</span><span class="p">,</span>
|
|
<span class="n">retries</span><span class="p">:</span> <span class="nb">int</span> <span class="o">=</span> <span class="n">DEFAULT_DNS_MAX_RETRIES</span><span class="p">,</span>
|
|
<span class="p">)</span> <span class="o">-></span> <span class="n">IPAddressInfo</span><span class="p">:</span>
|
|
<span class="w"> </span><span class="sd">"""</span>
|
|
<span class="sd"> Returns reverse DNS and country information for the given IP address</span>
|
|
|
|
<span class="sd"> Args:</span>
|
|
<span class="sd"> ip_address (str): The IP address to check</span>
|
|
<span class="sd"> ip_db_path (str): path to a MMDB file from MaxMind or DBIP</span>
|
|
<span class="sd"> reverse_dns_map_path (str): Path to a reverse DNS map file</span>
|
|
<span class="sd"> reverse_dns_map_url (str): URL to the reverse DNS map file</span>
|
|
<span class="sd"> always_use_local_files (bool): Do not download files</span>
|
|
<span class="sd"> cache (ExpiringDict): Cache storage</span>
|
|
<span class="sd"> reverse_dns_map (dict): A reverse DNS map</span>
|
|
<span class="sd"> offline (bool): Do not make online queries for geolocation or DNS</span>
|
|
<span class="sd"> nameservers (list): A list of one or more nameservers to use</span>
|
|
<span class="sd"> (Cloudflare's public DNS resolvers by default)</span>
|
|
<span class="sd"> timeout (float): Sets the DNS timeout in seconds</span>
|
|
<span class="sd"> retries (int): Number of times to retry on timeout or other transient</span>
|
|
<span class="sd"> errors</span>
|
|
|
|
<span class="sd"> Returns:</span>
|
|
<span class="sd"> dict: ``ip_address``, ``reverse_dns``, ``country``</span>
|
|
|
|
<span class="sd"> """</span>
|
|
<span class="n">ip_address</span> <span class="o">=</span> <span class="n">ip_address</span><span class="o">.</span><span class="n">lower</span><span class="p">()</span>
|
|
<span class="k">if</span> <span class="n">cache</span> <span class="ow">is</span> <span class="ow">not</span> <span class="kc">None</span><span class="p">:</span>
|
|
<span class="n">cached_info</span> <span class="o">=</span> <span class="n">cache</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="n">ip_address</span><span class="p">,</span> <span class="kc">None</span><span class="p">)</span>
|
|
<span class="k">if</span> <span class="p">(</span>
|
|
<span class="n">cached_info</span>
|
|
<span class="ow">and</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">cached_info</span><span class="p">,</span> <span class="nb">dict</span><span class="p">)</span>
|
|
<span class="ow">and</span> <span class="s2">"ip_address"</span> <span class="ow">in</span> <span class="n">cached_info</span>
|
|
<span class="p">):</span>
|
|
<span class="n">logger</span><span class="o">.</span><span class="n">debug</span><span class="p">(</span><span class="sa">f</span><span class="s2">"IP address </span><span class="si">{</span><span class="n">ip_address</span><span class="si">}</span><span class="s2"> was found in cache"</span><span class="p">)</span>
|
|
<span class="k">return</span> <span class="n">cast</span><span class="p">(</span><span class="n">IPAddressInfo</span><span class="p">,</span> <span class="n">cached_info</span><span class="p">)</span>
|
|
<span class="n">info</span><span class="p">:</span> <span class="n">IPAddressInfo</span> <span class="o">=</span> <span class="p">{</span>
|
|
<span class="s2">"ip_address"</span><span class="p">:</span> <span class="n">ip_address</span><span class="p">,</span>
|
|
<span class="s2">"reverse_dns"</span><span class="p">:</span> <span class="kc">None</span><span class="p">,</span>
|
|
<span class="s2">"country"</span><span class="p">:</span> <span class="kc">None</span><span class="p">,</span>
|
|
<span class="s2">"base_domain"</span><span class="p">:</span> <span class="kc">None</span><span class="p">,</span>
|
|
<span class="s2">"name"</span><span class="p">:</span> <span class="kc">None</span><span class="p">,</span>
|
|
<span class="s2">"type"</span><span class="p">:</span> <span class="kc">None</span><span class="p">,</span>
|
|
<span class="s2">"asn"</span><span class="p">:</span> <span class="kc">None</span><span class="p">,</span>
|
|
<span class="s2">"as_name"</span><span class="p">:</span> <span class="kc">None</span><span class="p">,</span>
|
|
<span class="s2">"as_domain"</span><span class="p">:</span> <span class="kc">None</span><span class="p">,</span>
|
|
<span class="p">}</span>
|
|
<span class="k">if</span> <span class="n">offline</span><span class="p">:</span>
|
|
<span class="n">reverse_dns</span> <span class="o">=</span> <span class="kc">None</span>
|
|
<span class="k">else</span><span class="p">:</span>
|
|
<span class="n">reverse_dns</span> <span class="o">=</span> <span class="n">get_reverse_dns</span><span class="p">(</span>
|
|
<span class="n">ip_address</span><span class="p">,</span>
|
|
<span class="n">nameservers</span><span class="o">=</span><span class="n">nameservers</span><span class="p">,</span>
|
|
<span class="n">timeout</span><span class="o">=</span><span class="n">timeout</span><span class="p">,</span>
|
|
<span class="n">retries</span><span class="o">=</span><span class="n">retries</span><span class="p">,</span>
|
|
<span class="p">)</span>
|
|
<span class="n">db_record</span> <span class="o">=</span> <span class="n">get_ip_address_db_record</span><span class="p">(</span><span class="n">ip_address</span><span class="p">,</span> <span class="n">db_path</span><span class="o">=</span><span class="n">ip_db_path</span><span class="p">)</span>
|
|
<span class="n">info</span><span class="p">[</span><span class="s2">"country"</span><span class="p">]</span> <span class="o">=</span> <span class="n">db_record</span><span class="p">[</span><span class="s2">"country"</span><span class="p">]</span>
|
|
<span class="n">info</span><span class="p">[</span><span class="s2">"asn"</span><span class="p">]</span> <span class="o">=</span> <span class="n">db_record</span><span class="p">[</span><span class="s2">"asn"</span><span class="p">]</span>
|
|
<span class="n">info</span><span class="p">[</span><span class="s2">"as_name"</span><span class="p">]</span> <span class="o">=</span> <span class="n">db_record</span><span class="p">[</span><span class="s2">"as_name"</span><span class="p">]</span>
|
|
<span class="n">info</span><span class="p">[</span><span class="s2">"as_domain"</span><span class="p">]</span> <span class="o">=</span> <span class="n">db_record</span><span class="p">[</span><span class="s2">"as_domain"</span><span class="p">]</span>
|
|
<span class="n">info</span><span class="p">[</span><span class="s2">"reverse_dns"</span><span class="p">]</span> <span class="o">=</span> <span class="n">reverse_dns</span>
|
|
|
|
<span class="k">if</span> <span class="n">reverse_dns</span> <span class="ow">is</span> <span class="ow">not</span> <span class="kc">None</span><span class="p">:</span>
|
|
<span class="n">base_domain</span> <span class="o">=</span> <span class="n">get_base_domain</span><span class="p">(</span><span class="n">reverse_dns</span><span class="p">)</span>
|
|
<span class="k">if</span> <span class="n">base_domain</span> <span class="ow">is</span> <span class="ow">not</span> <span class="kc">None</span><span class="p">:</span>
|
|
<span class="n">service</span> <span class="o">=</span> <span class="n">get_service_from_reverse_dns_base_domain</span><span class="p">(</span>
|
|
<span class="n">base_domain</span><span class="p">,</span>
|
|
<span class="n">offline</span><span class="o">=</span><span class="n">offline</span><span class="p">,</span>
|
|
<span class="n">local_file_path</span><span class="o">=</span><span class="n">reverse_dns_map_path</span><span class="p">,</span>
|
|
<span class="n">url</span><span class="o">=</span><span class="n">reverse_dns_map_url</span><span class="p">,</span>
|
|
<span class="n">always_use_local_file</span><span class="o">=</span><span class="n">always_use_local_files</span><span class="p">,</span>
|
|
<span class="n">reverse_dns_map</span><span class="o">=</span><span class="n">reverse_dns_map</span><span class="p">,</span>
|
|
<span class="p">)</span>
|
|
<span class="n">info</span><span class="p">[</span><span class="s2">"base_domain"</span><span class="p">]</span> <span class="o">=</span> <span class="n">base_domain</span>
|
|
<span class="n">info</span><span class="p">[</span><span class="s2">"type"</span><span class="p">]</span> <span class="o">=</span> <span class="n">service</span><span class="p">[</span><span class="s2">"type"</span><span class="p">]</span>
|
|
<span class="n">info</span><span class="p">[</span><span class="s2">"name"</span><span class="p">]</span> <span class="o">=</span> <span class="n">service</span><span class="p">[</span><span class="s2">"name"</span><span class="p">]</span>
|
|
<span class="k">else</span><span class="p">:</span>
|
|
<span class="n">logger</span><span class="o">.</span><span class="n">debug</span><span class="p">(</span><span class="sa">f</span><span class="s2">"IP address </span><span class="si">{</span><span class="n">ip_address</span><span class="si">}</span><span class="s2"> reverse_dns not found"</span><span class="p">)</span>
|
|
<span class="c1"># Fall back to ASN data for source attribution. ``reverse_dns`` and</span>
|
|
<span class="c1"># ``base_domain`` are left null so consumers can still tell an</span>
|
|
<span class="c1"># ASN-derived row apart from one resolved via a real PTR.</span>
|
|
<span class="n">map_value</span><span class="p">:</span> <span class="n">ReverseDNSMap</span> <span class="o">=</span> <span class="p">(</span>
|
|
<span class="n">reverse_dns_map</span> <span class="k">if</span> <span class="n">reverse_dns_map</span> <span class="ow">is</span> <span class="ow">not</span> <span class="kc">None</span> <span class="k">else</span> <span class="p">{}</span>
|
|
<span class="p">)</span>
|
|
<span class="k">if</span> <span class="nb">len</span><span class="p">(</span><span class="n">map_value</span><span class="p">)</span> <span class="o">==</span> <span class="mi">0</span><span class="p">:</span>
|
|
<span class="n">load_reverse_dns_map</span><span class="p">(</span>
|
|
<span class="n">map_value</span><span class="p">,</span>
|
|
<span class="n">always_use_local_file</span><span class="o">=</span><span class="n">always_use_local_files</span><span class="p">,</span>
|
|
<span class="n">local_file_path</span><span class="o">=</span><span class="n">reverse_dns_map_path</span><span class="p">,</span>
|
|
<span class="n">url</span><span class="o">=</span><span class="n">reverse_dns_map_url</span><span class="p">,</span>
|
|
<span class="n">offline</span><span class="o">=</span><span class="n">offline</span><span class="p">,</span>
|
|
<span class="p">)</span>
|
|
<span class="k">if</span> <span class="n">info</span><span class="p">[</span><span class="s2">"as_domain"</span><span class="p">]</span> <span class="ow">and</span> <span class="n">info</span><span class="p">[</span><span class="s2">"as_domain"</span><span class="p">]</span> <span class="ow">in</span> <span class="n">map_value</span><span class="p">:</span>
|
|
<span class="n">service</span> <span class="o">=</span> <span class="n">map_value</span><span class="p">[</span><span class="n">info</span><span class="p">[</span><span class="s2">"as_domain"</span><span class="p">]]</span>
|
|
<span class="n">info</span><span class="p">[</span><span class="s2">"name"</span><span class="p">]</span> <span class="o">=</span> <span class="n">service</span><span class="p">[</span><span class="s2">"name"</span><span class="p">]</span>
|
|
<span class="n">info</span><span class="p">[</span><span class="s2">"type"</span><span class="p">]</span> <span class="o">=</span> <span class="n">service</span><span class="p">[</span><span class="s2">"type"</span><span class="p">]</span>
|
|
<span class="k">elif</span> <span class="n">info</span><span class="p">[</span><span class="s2">"as_name"</span><span class="p">]:</span>
|
|
<span class="c1"># ASN-domain not in the map: surface the raw AS name with no</span>
|
|
<span class="c1"># classification. Better than leaving the row unattributed.</span>
|
|
<span class="n">info</span><span class="p">[</span><span class="s2">"name"</span><span class="p">]</span> <span class="o">=</span> <span class="n">info</span><span class="p">[</span><span class="s2">"as_name"</span><span class="p">]</span>
|
|
|
|
<span class="c1"># Don't cache weak-fallback attributions — rows where we had no PTR AND</span>
|
|
<span class="c1"># the ASN domain wasn't in the map, so ``name`` is just the raw ``as_name``</span>
|
|
<span class="c1"># from the MMDB. ``get_reverse_dns()`` swallows every ``DNSException`` as</span>
|
|
<span class="c1"># ``None``, so a transient PTR lookup failure (timeout, SERVFAIL, OSError)</span>
|
|
<span class="c1"># is indistinguishable from a real no-PTR case at this point. Caching the</span>
|
|
<span class="c1"># weak result would poison the 4-hour cache with a misattribution even</span>
|
|
<span class="c1"># after the PTR becomes resolvable again. Re-running on the next lookup</span>
|
|
<span class="c1"># is cheap and either produces a proper PTR-backed match or the same</span>
|
|
<span class="c1"># (still-best-effort) ASN attribution.</span>
|
|
<span class="n">weak_fallback</span> <span class="o">=</span> <span class="p">(</span>
|
|
<span class="n">info</span><span class="p">[</span><span class="s2">"reverse_dns"</span><span class="p">]</span> <span class="ow">is</span> <span class="kc">None</span>
|
|
<span class="ow">and</span> <span class="n">info</span><span class="p">[</span><span class="s2">"type"</span><span class="p">]</span> <span class="ow">is</span> <span class="kc">None</span>
|
|
<span class="ow">and</span> <span class="n">info</span><span class="p">[</span><span class="s2">"name"</span><span class="p">]</span> <span class="ow">is</span> <span class="ow">not</span> <span class="kc">None</span>
|
|
<span class="ow">and</span> <span class="n">info</span><span class="p">[</span><span class="s2">"name"</span><span class="p">]</span> <span class="o">==</span> <span class="n">info</span><span class="p">[</span><span class="s2">"as_name"</span><span class="p">]</span>
|
|
<span class="p">)</span>
|
|
<span class="k">if</span> <span class="n">cache</span> <span class="ow">is</span> <span class="ow">not</span> <span class="kc">None</span> <span class="ow">and</span> <span class="ow">not</span> <span class="n">weak_fallback</span><span class="p">:</span>
|
|
<span class="n">cache</span><span class="p">[</span><span class="n">ip_address</span><span class="p">]</span> <span class="o">=</span> <span class="n">info</span>
|
|
<span class="n">logger</span><span class="o">.</span><span class="n">debug</span><span class="p">(</span><span class="sa">f</span><span class="s2">"IP address </span><span class="si">{</span><span class="n">ip_address</span><span class="si">}</span><span class="s2"> added to cache"</span><span class="p">)</span>
|
|
|
|
<span class="k">return</span> <span class="n">info</span></div>
|
|
|
|
|
|
|
|
<span class="k">def</span><span class="w"> </span><span class="nf">parse_email_address</span><span class="p">(</span><span class="n">original_address</span><span class="p">:</span> <span class="nb">str</span><span class="p">)</span> <span class="o">-></span> <span class="nb">dict</span><span class="p">[</span><span class="nb">str</span><span class="p">,</span> <span class="n">Optional</span><span class="p">[</span><span class="nb">str</span><span class="p">]]:</span>
|
|
<span class="k">if</span> <span class="n">original_address</span><span class="p">[</span><span class="mi">0</span><span class="p">]</span> <span class="o">==</span> <span class="s2">""</span><span class="p">:</span>
|
|
<span class="n">display_name</span> <span class="o">=</span> <span class="kc">None</span>
|
|
<span class="k">else</span><span class="p">:</span>
|
|
<span class="n">display_name</span> <span class="o">=</span> <span class="n">original_address</span><span class="p">[</span><span class="mi">0</span><span class="p">]</span>
|
|
<span class="n">address</span> <span class="o">=</span> <span class="n">original_address</span><span class="p">[</span><span class="mi">1</span><span class="p">]</span>
|
|
<span class="n">address_parts</span> <span class="o">=</span> <span class="n">address</span><span class="o">.</span><span class="n">split</span><span class="p">(</span><span class="s2">"@"</span><span class="p">)</span>
|
|
<span class="n">local</span> <span class="o">=</span> <span class="kc">None</span>
|
|
<span class="n">domain</span> <span class="o">=</span> <span class="kc">None</span>
|
|
<span class="k">if</span> <span class="nb">len</span><span class="p">(</span><span class="n">address_parts</span><span class="p">)</span> <span class="o">></span> <span class="mi">1</span><span class="p">:</span>
|
|
<span class="n">local</span> <span class="o">=</span> <span class="n">address_parts</span><span class="p">[</span><span class="mi">0</span><span class="p">]</span><span class="o">.</span><span class="n">lower</span><span class="p">()</span>
|
|
<span class="n">domain</span> <span class="o">=</span> <span class="n">address_parts</span><span class="p">[</span><span class="o">-</span><span class="mi">1</span><span class="p">]</span><span class="o">.</span><span class="n">lower</span><span class="p">()</span>
|
|
|
|
<span class="k">return</span> <span class="p">{</span>
|
|
<span class="s2">"display_name"</span><span class="p">:</span> <span class="n">display_name</span><span class="p">,</span>
|
|
<span class="s2">"address"</span><span class="p">:</span> <span class="n">address</span><span class="p">,</span>
|
|
<span class="s2">"local"</span><span class="p">:</span> <span class="n">local</span><span class="p">,</span>
|
|
<span class="s2">"domain"</span><span class="p">:</span> <span class="n">domain</span><span class="p">,</span>
|
|
<span class="p">}</span>
|
|
|
|
|
|
<div class="viewcode-block" id="get_filename_safe_string">
|
|
<a class="viewcode-back" href="../../api.html#parsedmarc.utils.get_filename_safe_string">[docs]</a>
|
|
<span class="k">def</span><span class="w"> </span><span class="nf">get_filename_safe_string</span><span class="p">(</span><span class="n">string</span><span class="p">:</span> <span class="nb">str</span><span class="p">)</span> <span class="o">-></span> <span class="nb">str</span><span class="p">:</span>
|
|
<span class="w"> </span><span class="sd">"""</span>
|
|
<span class="sd"> Converts a string to a string that is safe for a filename</span>
|
|
|
|
<span class="sd"> Args:</span>
|
|
<span class="sd"> string (str): A string to make safe for a filename</span>
|
|
|
|
<span class="sd"> Returns:</span>
|
|
<span class="sd"> str: A string safe for a filename</span>
|
|
<span class="sd"> """</span>
|
|
<span class="n">invalid_filename_chars</span> <span class="o">=</span> <span class="p">[</span><span class="s2">"</span><span class="se">\\</span><span class="s2">"</span><span class="p">,</span> <span class="s2">"/"</span><span class="p">,</span> <span class="s2">":"</span><span class="p">,</span> <span class="s1">'"'</span><span class="p">,</span> <span class="s2">"*"</span><span class="p">,</span> <span class="s2">"?"</span><span class="p">,</span> <span class="s2">"|"</span><span class="p">,</span> <span class="s2">"</span><span class="se">\n</span><span class="s2">"</span><span class="p">,</span> <span class="s2">"</span><span class="se">\r</span><span class="s2">"</span><span class="p">]</span>
|
|
<span class="k">if</span> <span class="n">string</span> <span class="ow">is</span> <span class="kc">None</span><span class="p">:</span>
|
|
<span class="n">string</span> <span class="o">=</span> <span class="s2">"None"</span>
|
|
<span class="k">for</span> <span class="n">char</span> <span class="ow">in</span> <span class="n">invalid_filename_chars</span><span class="p">:</span>
|
|
<span class="n">string</span> <span class="o">=</span> <span class="n">string</span><span class="o">.</span><span class="n">replace</span><span class="p">(</span><span class="n">char</span><span class="p">,</span> <span class="s2">""</span><span class="p">)</span>
|
|
<span class="n">string</span> <span class="o">=</span> <span class="n">string</span><span class="o">.</span><span class="n">rstrip</span><span class="p">(</span><span class="s2">"."</span><span class="p">)</span>
|
|
|
|
<span class="n">string</span> <span class="o">=</span> <span class="p">(</span><span class="n">string</span><span class="p">[:</span><span class="mi">100</span><span class="p">])</span> <span class="k">if</span> <span class="nb">len</span><span class="p">(</span><span class="n">string</span><span class="p">)</span> <span class="o">></span> <span class="mi">100</span> <span class="k">else</span> <span class="n">string</span>
|
|
|
|
<span class="k">return</span> <span class="n">string</span></div>
|
|
|
|
|
|
|
|
<div class="viewcode-block" id="is_mbox">
|
|
<a class="viewcode-back" href="../../api.html#parsedmarc.utils.is_mbox">[docs]</a>
|
|
<span class="k">def</span><span class="w"> </span><span class="nf">is_mbox</span><span class="p">(</span><span class="n">path</span><span class="p">:</span> <span class="nb">str</span><span class="p">)</span> <span class="o">-></span> <span class="nb">bool</span><span class="p">:</span>
|
|
<span class="w"> </span><span class="sd">"""</span>
|
|
<span class="sd"> Checks if the given content is an MBOX mailbox file</span>
|
|
|
|
<span class="sd"> Args:</span>
|
|
<span class="sd"> path: Content to check</span>
|
|
|
|
<span class="sd"> Returns:</span>
|
|
<span class="sd"> bool: A flag that indicates if the file is an MBOX mailbox file</span>
|
|
<span class="sd"> """</span>
|
|
<span class="n">_is_mbox</span> <span class="o">=</span> <span class="kc">False</span>
|
|
<span class="k">try</span><span class="p">:</span>
|
|
<span class="n">mbox</span> <span class="o">=</span> <span class="n">mailbox</span><span class="o">.</span><span class="n">mbox</span><span class="p">(</span><span class="n">path</span><span class="p">)</span>
|
|
<span class="k">if</span> <span class="nb">len</span><span class="p">(</span><span class="n">mbox</span><span class="o">.</span><span class="n">keys</span><span class="p">())</span> <span class="o">></span> <span class="mi">0</span><span class="p">:</span>
|
|
<span class="n">_is_mbox</span> <span class="o">=</span> <span class="kc">True</span>
|
|
<span class="k">except</span> <span class="ne">Exception</span> <span class="k">as</span> <span class="n">e</span><span class="p">:</span>
|
|
<span class="n">logger</span><span class="o">.</span><span class="n">debug</span><span class="p">(</span><span class="s2">"Error checking for MBOX file: </span><span class="si">{0}</span><span class="s2">"</span><span class="o">.</span><span class="n">format</span><span class="p">(</span><span class="n">e</span><span class="o">.</span><span class="fm">__str__</span><span class="p">()))</span>
|
|
|
|
<span class="k">return</span> <span class="n">_is_mbox</span></div>
|
|
|
|
|
|
|
|
<div class="viewcode-block" id="is_outlook_msg">
|
|
<a class="viewcode-back" href="../../api.html#parsedmarc.utils.is_outlook_msg">[docs]</a>
|
|
<span class="k">def</span><span class="w"> </span><span class="nf">is_outlook_msg</span><span class="p">(</span><span class="n">content</span><span class="p">)</span> <span class="o">-></span> <span class="nb">bool</span><span class="p">:</span>
|
|
<span class="w"> </span><span class="sd">"""</span>
|
|
<span class="sd"> Checks if the given content is an Outlook msg OLE/MSG file</span>
|
|
|
|
<span class="sd"> Args:</span>
|
|
<span class="sd"> content: Content to check</span>
|
|
|
|
<span class="sd"> Returns:</span>
|
|
<span class="sd"> bool: A flag that indicates if the file is an Outlook MSG file</span>
|
|
<span class="sd"> """</span>
|
|
<span class="k">return</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">content</span><span class="p">,</span> <span class="nb">bytes</span><span class="p">)</span> <span class="ow">and</span> <span class="n">content</span><span class="o">.</span><span class="n">startswith</span><span class="p">(</span>
|
|
<span class="sa">b</span><span class="s2">"</span><span class="se">\xd0\xcf\x11\xe0\xa1\xb1\x1a\xe1</span><span class="s2">"</span>
|
|
<span class="p">)</span></div>
|
|
|
|
|
|
|
|
<div class="viewcode-block" id="convert_outlook_msg">
|
|
<a class="viewcode-back" href="../../api.html#parsedmarc.utils.convert_outlook_msg">[docs]</a>
|
|
<span class="k">def</span><span class="w"> </span><span class="nf">convert_outlook_msg</span><span class="p">(</span><span class="n">msg_bytes</span><span class="p">:</span> <span class="nb">bytes</span><span class="p">)</span> <span class="o">-></span> <span class="nb">bytes</span><span class="p">:</span>
|
|
<span class="w"> </span><span class="sd">"""</span>
|
|
<span class="sd"> Uses the ``msgconvert`` Perl utility to convert an Outlook MS file to</span>
|
|
<span class="sd"> standard RFC 822 format</span>
|
|
|
|
<span class="sd"> Args:</span>
|
|
<span class="sd"> msg_bytes (bytes): the content of the .msg file</span>
|
|
|
|
<span class="sd"> Returns:</span>
|
|
<span class="sd"> A RFC 822 bytes payload</span>
|
|
<span class="sd"> """</span>
|
|
<span class="k">if</span> <span class="ow">not</span> <span class="n">is_outlook_msg</span><span class="p">(</span><span class="n">msg_bytes</span><span class="p">):</span>
|
|
<span class="k">raise</span> <span class="ne">ValueError</span><span class="p">(</span><span class="s2">"The supplied bytes are not an Outlook MSG file"</span><span class="p">)</span>
|
|
<span class="n">orig_dir</span> <span class="o">=</span> <span class="n">os</span><span class="o">.</span><span class="n">getcwd</span><span class="p">()</span>
|
|
<span class="n">tmp_dir</span> <span class="o">=</span> <span class="n">tempfile</span><span class="o">.</span><span class="n">mkdtemp</span><span class="p">()</span>
|
|
<span class="n">os</span><span class="o">.</span><span class="n">chdir</span><span class="p">(</span><span class="n">tmp_dir</span><span class="p">)</span>
|
|
<span class="k">with</span> <span class="nb">open</span><span class="p">(</span><span class="s2">"sample.msg"</span><span class="p">,</span> <span class="s2">"wb"</span><span class="p">)</span> <span class="k">as</span> <span class="n">msg_file</span><span class="p">:</span>
|
|
<span class="n">msg_file</span><span class="o">.</span><span class="n">write</span><span class="p">(</span><span class="n">msg_bytes</span><span class="p">)</span>
|
|
<span class="k">try</span><span class="p">:</span>
|
|
<span class="n">subprocess</span><span class="o">.</span><span class="n">check_call</span><span class="p">(</span>
|
|
<span class="p">[</span><span class="s2">"msgconvert"</span><span class="p">,</span> <span class="s2">"sample.msg"</span><span class="p">],</span> <span class="n">stdout</span><span class="o">=</span><span class="n">null_file</span><span class="p">,</span> <span class="n">stderr</span><span class="o">=</span><span class="n">null_file</span>
|
|
<span class="p">)</span>
|
|
<span class="n">eml_path</span> <span class="o">=</span> <span class="s2">"sample.eml"</span>
|
|
<span class="k">with</span> <span class="nb">open</span><span class="p">(</span><span class="n">eml_path</span><span class="p">,</span> <span class="s2">"rb"</span><span class="p">)</span> <span class="k">as</span> <span class="n">eml_file</span><span class="p">:</span>
|
|
<span class="n">rfc822</span> <span class="o">=</span> <span class="n">eml_file</span><span class="o">.</span><span class="n">read</span><span class="p">()</span>
|
|
<span class="k">except</span> <span class="ne">FileNotFoundError</span><span class="p">:</span>
|
|
<span class="k">raise</span> <span class="n">EmailParserError</span><span class="p">(</span>
|
|
<span class="s2">"Failed to convert Outlook MSG: msgconvert utility not found"</span>
|
|
<span class="p">)</span>
|
|
<span class="k">finally</span><span class="p">:</span>
|
|
<span class="n">os</span><span class="o">.</span><span class="n">chdir</span><span class="p">(</span><span class="n">orig_dir</span><span class="p">)</span>
|
|
<span class="n">shutil</span><span class="o">.</span><span class="n">rmtree</span><span class="p">(</span><span class="n">tmp_dir</span><span class="p">)</span>
|
|
|
|
<span class="k">return</span> <span class="n">rfc822</span></div>
|
|
|
|
|
|
|
|
<div class="viewcode-block" id="parse_email">
|
|
<a class="viewcode-back" href="../../api.html#parsedmarc.utils.parse_email">[docs]</a>
|
|
<span class="k">def</span><span class="w"> </span><span class="nf">parse_email</span><span class="p">(</span>
|
|
<span class="n">data</span><span class="p">:</span> <span class="n">Union</span><span class="p">[</span><span class="nb">bytes</span><span class="p">,</span> <span class="nb">str</span><span class="p">],</span> <span class="o">*</span><span class="p">,</span> <span class="n">strip_attachment_payloads</span><span class="p">:</span> <span class="nb">bool</span> <span class="o">=</span> <span class="kc">False</span>
|
|
<span class="p">)</span> <span class="o">-></span> <span class="nb">dict</span><span class="p">:</span>
|
|
<span class="w"> </span><span class="sd">"""</span>
|
|
<span class="sd"> A simplified email parser</span>
|
|
|
|
<span class="sd"> Args:</span>
|
|
<span class="sd"> data: The RFC 822 message string, or MSG binary</span>
|
|
<span class="sd"> strip_attachment_payloads (bool): Remove attachment payloads</span>
|
|
|
|
<span class="sd"> Returns:</span>
|
|
<span class="sd"> dict: Parsed email data</span>
|
|
<span class="sd"> """</span>
|
|
|
|
<span class="k">if</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">data</span><span class="p">,</span> <span class="nb">bytes</span><span class="p">):</span>
|
|
<span class="k">if</span> <span class="n">is_outlook_msg</span><span class="p">(</span><span class="n">data</span><span class="p">):</span>
|
|
<span class="n">data</span> <span class="o">=</span> <span class="n">convert_outlook_msg</span><span class="p">(</span><span class="n">data</span><span class="p">)</span>
|
|
<span class="n">data</span> <span class="o">=</span> <span class="n">data</span><span class="o">.</span><span class="n">decode</span><span class="p">(</span><span class="s2">"utf-8"</span><span class="p">,</span> <span class="n">errors</span><span class="o">=</span><span class="s2">"replace"</span><span class="p">)</span>
|
|
<span class="n">parsed_email</span> <span class="o">=</span> <span class="n">mailparser</span><span class="o">.</span><span class="n">parse_from_string</span><span class="p">(</span><span class="n">data</span><span class="p">)</span>
|
|
<span class="n">headers</span> <span class="o">=</span> <span class="n">json</span><span class="o">.</span><span class="n">loads</span><span class="p">(</span><span class="n">parsed_email</span><span class="o">.</span><span class="n">headers_json</span><span class="p">)</span><span class="o">.</span><span class="n">copy</span><span class="p">()</span>
|
|
<span class="n">parsed_email</span> <span class="o">=</span> <span class="n">json</span><span class="o">.</span><span class="n">loads</span><span class="p">(</span><span class="n">parsed_email</span><span class="o">.</span><span class="n">mail_json</span><span class="p">)</span><span class="o">.</span><span class="n">copy</span><span class="p">()</span>
|
|
<span class="n">parsed_email</span><span class="p">[</span><span class="s2">"headers"</span><span class="p">]</span> <span class="o">=</span> <span class="n">headers</span>
|
|
|
|
<span class="k">if</span> <span class="s2">"received"</span> <span class="ow">in</span> <span class="n">parsed_email</span><span class="p">:</span>
|
|
<span class="k">for</span> <span class="n">received</span> <span class="ow">in</span> <span class="n">parsed_email</span><span class="p">[</span><span class="s2">"received"</span><span class="p">]:</span>
|
|
<span class="k">if</span> <span class="s2">"date_utc"</span> <span class="ow">in</span> <span class="n">received</span><span class="p">:</span>
|
|
<span class="k">if</span> <span class="n">received</span><span class="p">[</span><span class="s2">"date_utc"</span><span class="p">]</span> <span class="ow">is</span> <span class="kc">None</span><span class="p">:</span>
|
|
<span class="k">del</span> <span class="n">received</span><span class="p">[</span><span class="s2">"date_utc"</span><span class="p">]</span>
|
|
<span class="k">else</span><span class="p">:</span>
|
|
<span class="n">received</span><span class="p">[</span><span class="s2">"date_utc"</span><span class="p">]</span> <span class="o">=</span> <span class="n">received</span><span class="p">[</span><span class="s2">"date_utc"</span><span class="p">]</span><span class="o">.</span><span class="n">replace</span><span class="p">(</span><span class="s2">"T"</span><span class="p">,</span> <span class="s2">" "</span><span class="p">)</span>
|
|
|
|
<span class="k">if</span> <span class="s2">"from"</span> <span class="ow">not</span> <span class="ow">in</span> <span class="n">parsed_email</span><span class="p">:</span>
|
|
<span class="k">if</span> <span class="s2">"From"</span> <span class="ow">in</span> <span class="n">parsed_email</span><span class="p">[</span><span class="s2">"headers"</span><span class="p">]:</span>
|
|
<span class="n">parsed_email</span><span class="p">[</span><span class="s2">"from"</span><span class="p">]</span> <span class="o">=</span> <span class="n">parsed_email</span><span class="p">[</span><span class="s2">"Headers"</span><span class="p">][</span><span class="s2">"From"</span><span class="p">]</span>
|
|
<span class="k">else</span><span class="p">:</span>
|
|
<span class="n">parsed_email</span><span class="p">[</span><span class="s2">"from"</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span>
|
|
|
|
<span class="k">if</span> <span class="n">parsed_email</span><span class="p">[</span><span class="s2">"from"</span><span class="p">]</span> <span class="ow">is</span> <span class="ow">not</span> <span class="kc">None</span><span class="p">:</span>
|
|
<span class="n">parsed_email</span><span class="p">[</span><span class="s2">"from"</span><span class="p">]</span> <span class="o">=</span> <span class="n">parse_email_address</span><span class="p">(</span><span class="n">parsed_email</span><span class="p">[</span><span class="s2">"from"</span><span class="p">][</span><span class="mi">0</span><span class="p">])</span>
|
|
|
|
<span class="k">if</span> <span class="s2">"date"</span> <span class="ow">in</span> <span class="n">parsed_email</span><span class="p">:</span>
|
|
<span class="n">parsed_email</span><span class="p">[</span><span class="s2">"date"</span><span class="p">]</span> <span class="o">=</span> <span class="n">parsed_email</span><span class="p">[</span><span class="s2">"date"</span><span class="p">]</span><span class="o">.</span><span class="n">replace</span><span class="p">(</span><span class="s2">"T"</span><span class="p">,</span> <span class="s2">" "</span><span class="p">)</span>
|
|
<span class="k">else</span><span class="p">:</span>
|
|
<span class="n">parsed_email</span><span class="p">[</span><span class="s2">"date"</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span>
|
|
<span class="c1"># mailparser's mail_json names these headers with hyphens</span>
|
|
<span class="c1"># ("reply-to", "delivered-to"), not underscores. Reading the</span>
|
|
<span class="c1"># underscored key always missed, so every Reply-To address was</span>
|
|
<span class="c1"># silently dropped. Convert under the underscored name consumers</span>
|
|
<span class="c1"># expect and drop the raw hyphenated key so the body carries a</span>
|
|
<span class="c1"># single representation, matching how "to"/"cc"/"bcc" are handled.</span>
|
|
<span class="k">if</span> <span class="s2">"reply-to"</span> <span class="ow">in</span> <span class="n">parsed_email</span><span class="p">:</span>
|
|
<span class="n">parsed_email</span><span class="p">[</span><span class="s2">"reply_to"</span><span class="p">]</span> <span class="o">=</span> <span class="nb">list</span><span class="p">(</span>
|
|
<span class="nb">map</span><span class="p">(</span><span class="k">lambda</span> <span class="n">x</span><span class="p">:</span> <span class="n">parse_email_address</span><span class="p">(</span><span class="n">x</span><span class="p">),</span> <span class="n">parsed_email</span><span class="o">.</span><span class="n">pop</span><span class="p">(</span><span class="s2">"reply-to"</span><span class="p">))</span>
|
|
<span class="p">)</span>
|
|
<span class="k">else</span><span class="p">:</span>
|
|
<span class="n">parsed_email</span><span class="p">[</span><span class="s2">"reply_to"</span><span class="p">]</span> <span class="o">=</span> <span class="p">[]</span>
|
|
|
|
<span class="k">if</span> <span class="s2">"to"</span> <span class="ow">in</span> <span class="n">parsed_email</span><span class="p">:</span>
|
|
<span class="n">parsed_email</span><span class="p">[</span><span class="s2">"to"</span><span class="p">]</span> <span class="o">=</span> <span class="nb">list</span><span class="p">(</span>
|
|
<span class="nb">map</span><span class="p">(</span><span class="k">lambda</span> <span class="n">x</span><span class="p">:</span> <span class="n">parse_email_address</span><span class="p">(</span><span class="n">x</span><span class="p">),</span> <span class="n">parsed_email</span><span class="p">[</span><span class="s2">"to"</span><span class="p">])</span>
|
|
<span class="p">)</span>
|
|
<span class="k">else</span><span class="p">:</span>
|
|
<span class="n">parsed_email</span><span class="p">[</span><span class="s2">"to"</span><span class="p">]</span> <span class="o">=</span> <span class="p">[]</span>
|
|
|
|
<span class="k">if</span> <span class="s2">"cc"</span> <span class="ow">in</span> <span class="n">parsed_email</span><span class="p">:</span>
|
|
<span class="n">parsed_email</span><span class="p">[</span><span class="s2">"cc"</span><span class="p">]</span> <span class="o">=</span> <span class="nb">list</span><span class="p">(</span>
|
|
<span class="nb">map</span><span class="p">(</span><span class="k">lambda</span> <span class="n">x</span><span class="p">:</span> <span class="n">parse_email_address</span><span class="p">(</span><span class="n">x</span><span class="p">),</span> <span class="n">parsed_email</span><span class="p">[</span><span class="s2">"cc"</span><span class="p">])</span>
|
|
<span class="p">)</span>
|
|
<span class="k">else</span><span class="p">:</span>
|
|
<span class="n">parsed_email</span><span class="p">[</span><span class="s2">"cc"</span><span class="p">]</span> <span class="o">=</span> <span class="p">[]</span>
|
|
|
|
<span class="k">if</span> <span class="s2">"bcc"</span> <span class="ow">in</span> <span class="n">parsed_email</span><span class="p">:</span>
|
|
<span class="n">parsed_email</span><span class="p">[</span><span class="s2">"bcc"</span><span class="p">]</span> <span class="o">=</span> <span class="nb">list</span><span class="p">(</span>
|
|
<span class="nb">map</span><span class="p">(</span><span class="k">lambda</span> <span class="n">x</span><span class="p">:</span> <span class="n">parse_email_address</span><span class="p">(</span><span class="n">x</span><span class="p">),</span> <span class="n">parsed_email</span><span class="p">[</span><span class="s2">"bcc"</span><span class="p">])</span>
|
|
<span class="p">)</span>
|
|
<span class="k">else</span><span class="p">:</span>
|
|
<span class="n">parsed_email</span><span class="p">[</span><span class="s2">"bcc"</span><span class="p">]</span> <span class="o">=</span> <span class="p">[]</span>
|
|
|
|
<span class="k">if</span> <span class="s2">"delivered-to"</span> <span class="ow">in</span> <span class="n">parsed_email</span><span class="p">:</span>
|
|
<span class="n">parsed_email</span><span class="p">[</span><span class="s2">"delivered_to"</span><span class="p">]</span> <span class="o">=</span> <span class="nb">list</span><span class="p">(</span>
|
|
<span class="nb">map</span><span class="p">(</span><span class="k">lambda</span> <span class="n">x</span><span class="p">:</span> <span class="n">parse_email_address</span><span class="p">(</span><span class="n">x</span><span class="p">),</span> <span class="n">parsed_email</span><span class="o">.</span><span class="n">pop</span><span class="p">(</span><span class="s2">"delivered-to"</span><span class="p">))</span>
|
|
<span class="p">)</span>
|
|
|
|
<span class="k">if</span> <span class="s2">"attachments"</span> <span class="ow">not</span> <span class="ow">in</span> <span class="n">parsed_email</span><span class="p">:</span>
|
|
<span class="n">parsed_email</span><span class="p">[</span><span class="s2">"attachments"</span><span class="p">]</span> <span class="o">=</span> <span class="p">[]</span>
|
|
<span class="k">else</span><span class="p">:</span>
|
|
<span class="k">for</span> <span class="n">attachment</span> <span class="ow">in</span> <span class="n">parsed_email</span><span class="p">[</span><span class="s2">"attachments"</span><span class="p">]:</span>
|
|
<span class="k">if</span> <span class="s2">"payload"</span> <span class="ow">in</span> <span class="n">attachment</span><span class="p">:</span>
|
|
<span class="n">payload</span> <span class="o">=</span> <span class="n">attachment</span><span class="p">[</span><span class="s2">"payload"</span><span class="p">]</span>
|
|
<span class="k">try</span><span class="p">:</span>
|
|
<span class="k">if</span> <span class="s2">"content_transfer_encoding"</span> <span class="ow">in</span> <span class="n">attachment</span><span class="p">:</span>
|
|
<span class="k">if</span> <span class="n">attachment</span><span class="p">[</span><span class="s2">"content_transfer_encoding"</span><span class="p">]</span> <span class="o">==</span> <span class="s2">"base64"</span><span class="p">:</span>
|
|
<span class="n">payload</span> <span class="o">=</span> <span class="n">decode_base64</span><span class="p">(</span><span class="n">payload</span><span class="p">)</span>
|
|
<span class="k">else</span><span class="p">:</span>
|
|
<span class="n">payload</span> <span class="o">=</span> <span class="nb">str</span><span class="o">.</span><span class="n">encode</span><span class="p">(</span><span class="n">payload</span><span class="p">)</span>
|
|
<span class="n">attachment</span><span class="p">[</span><span class="s2">"sha256"</span><span class="p">]</span> <span class="o">=</span> <span class="n">hashlib</span><span class="o">.</span><span class="n">sha256</span><span class="p">(</span><span class="n">payload</span><span class="p">)</span><span class="o">.</span><span class="n">hexdigest</span><span class="p">()</span>
|
|
<span class="k">except</span> <span class="ne">Exception</span> <span class="k">as</span> <span class="n">e</span><span class="p">:</span>
|
|
<span class="n">logger</span><span class="o">.</span><span class="n">debug</span><span class="p">(</span><span class="s2">"Unable to decode attachment: </span><span class="si">{0}</span><span class="s2">"</span><span class="o">.</span><span class="n">format</span><span class="p">(</span><span class="n">e</span><span class="o">.</span><span class="fm">__str__</span><span class="p">()))</span>
|
|
<span class="k">if</span> <span class="n">strip_attachment_payloads</span><span class="p">:</span>
|
|
<span class="k">for</span> <span class="n">attachment</span> <span class="ow">in</span> <span class="n">parsed_email</span><span class="p">[</span><span class="s2">"attachments"</span><span class="p">]:</span>
|
|
<span class="k">if</span> <span class="s2">"payload"</span> <span class="ow">in</span> <span class="n">attachment</span><span class="p">:</span>
|
|
<span class="k">del</span> <span class="n">attachment</span><span class="p">[</span><span class="s2">"payload"</span><span class="p">]</span>
|
|
|
|
<span class="k">if</span> <span class="s2">"subject"</span> <span class="ow">not</span> <span class="ow">in</span> <span class="n">parsed_email</span><span class="p">:</span>
|
|
<span class="n">parsed_email</span><span class="p">[</span><span class="s2">"subject"</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span>
|
|
|
|
<span class="n">parsed_email</span><span class="p">[</span><span class="s2">"filename_safe_subject"</span><span class="p">]</span> <span class="o">=</span> <span class="n">get_filename_safe_string</span><span class="p">(</span>
|
|
<span class="n">parsed_email</span><span class="p">[</span><span class="s2">"subject"</span><span class="p">]</span>
|
|
<span class="p">)</span>
|
|
|
|
<span class="k">if</span> <span class="s2">"body"</span> <span class="ow">not</span> <span class="ow">in</span> <span class="n">parsed_email</span><span class="p">:</span>
|
|
<span class="n">parsed_email</span><span class="p">[</span><span class="s2">"body"</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span>
|
|
|
|
<span class="k">return</span> <span class="n">parsed_email</span></div>
|
|
|
|
</pre></div>
|
|
|
|
</div>
|
|
</div>
|
|
<footer>
|
|
|
|
<hr/>
|
|
|
|
<div role="contentinfo">
|
|
<p>© Copyright 2018 - 2025, Sean Whalen and contributors.</p>
|
|
</div>
|
|
|
|
Built with <a href="https://www.sphinx-doc.org/">Sphinx</a> using a
|
|
<a href="https://github.com/readthedocs/sphinx_rtd_theme">theme</a>
|
|
provided by <a href="https://readthedocs.org">Read the Docs</a>.
|
|
|
|
|
|
</footer>
|
|
</div>
|
|
</div>
|
|
</section>
|
|
</div>
|
|
<script>
|
|
jQuery(function () {
|
|
SphinxRtdTheme.Navigation.enable(true);
|
|
});
|
|
</script>
|
|
|
|
</body>
|
|
</html> |