Files
parsedmarc/_modules/parsedmarc/utils.html
Sean Whalen 493c0512f5 Update docs
2026-03-27 10:14:10 -04:00

939 lines
99 KiB
HTML

<!DOCTYPE html>
<html class="writer-html5" lang="en" data-content_root="../../">
<head>
<meta charset="utf-8" />
<meta name="viewport" content="width=device-width, initial-scale=1.0" />
<title>parsedmarc.utils &mdash; parsedmarc 9.5.5 documentation</title>
<link rel="stylesheet" type="text/css" href="../../_static/pygments.css?v=b86133f3" />
<link rel="stylesheet" type="text/css" href="../../_static/css/theme.css?v=e59714d7" />
<script src="../../_static/jquery.js?v=5d32c60e"></script>
<script src="../../_static/_sphinx_javascript_frameworks_compat.js?v=2cd50e6c"></script>
<script src="../../_static/documentation_options.js?v=3eeb2fce"></script>
<script src="../../_static/doctools.js?v=9bcbadda"></script>
<script src="../../_static/sphinx_highlight.js?v=dc90522c"></script>
<script src="../../_static/js/theme.js"></script>
<link rel="index" title="Index" href="../../genindex.html" />
<link rel="search" title="Search" href="../../search.html" />
</head>
<body class="wy-body-for-nav">
<div class="wy-grid-for-nav">
<nav data-toggle="wy-nav-shift" class="wy-nav-side">
<div class="wy-side-scroll">
<div class="wy-side-nav-search" >
<a href="../../index.html" class="icon icon-home">
parsedmarc
</a>
<div role="search">
<form id="rtd-search-form" class="wy-form" action="../../search.html" method="get">
<input type="text" name="q" placeholder="Search docs" aria-label="Search docs" />
<input type="hidden" name="check_keywords" value="yes" />
<input type="hidden" name="area" value="default" />
</form>
</div>
</div><div class="wy-menu wy-menu-vertical" data-spy="affix" role="navigation" aria-label="Navigation menu">
<p class="caption" role="heading"><span class="caption-text">Contents</span></p>
<ul>
<li class="toctree-l1"><a class="reference internal" href="../../installation.html">Installation</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../usage.html">Using parsedmarc</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../output.html">Sample outputs</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../elasticsearch.html">Elasticsearch and Kibana</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../opensearch.html">OpenSearch and Grafana</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../kibana.html">Using the Kibana dashboards</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../splunk.html">Splunk</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../davmail.html">Accessing an inbox using OWA/EWS</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../dmarc.html">Understanding DMARC</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../contributing.html">Contributing to parsedmarc</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../api.html">API reference</a></li>
</ul>
</div>
</div>
</nav>
<section data-toggle="wy-nav-shift" class="wy-nav-content-wrap"><nav class="wy-nav-top" aria-label="Mobile navigation menu" >
<i data-toggle="wy-nav-top" class="fa fa-bars"></i>
<a href="../../index.html">parsedmarc</a>
</nav>
<div class="wy-nav-content">
<div class="rst-content">
<div role="navigation" aria-label="Page navigation">
<ul class="wy-breadcrumbs">
<li><a href="../../index.html" class="icon icon-home" aria-label="Home"></a></li>
<li class="breadcrumb-item"><a href="../index.html">Module code</a></li>
<li class="breadcrumb-item"><a href="../parsedmarc.html">parsedmarc</a></li>
<li class="breadcrumb-item active">parsedmarc.utils</li>
<li class="wy-breadcrumbs-aside">
</li>
</ul>
<hr/>
</div>
<div role="main" class="document" itemscope="itemscope" itemtype="http://schema.org/Article">
<div itemprop="articleBody">
<h1>Source code for parsedmarc.utils</h1><div class="highlight"><pre>
<span></span><span class="c1"># -*- coding: utf-8 -*-</span>
<span class="sd">&quot;&quot;&quot;Utility functions that might be useful for other projects&quot;&quot;&quot;</span>
<span class="kn">from</span><span class="w"> </span><span class="nn">__future__</span><span class="w"> </span><span class="kn">import</span> <span class="n">annotations</span>
<span class="kn">import</span><span class="w"> </span><span class="nn">base64</span>
<span class="kn">import</span><span class="w"> </span><span class="nn">csv</span>
<span class="kn">import</span><span class="w"> </span><span class="nn">hashlib</span>
<span class="kn">import</span><span class="w"> </span><span class="nn">io</span>
<span class="kn">import</span><span class="w"> </span><span class="nn">json</span>
<span class="kn">import</span><span class="w"> </span><span class="nn">logging</span>
<span class="kn">import</span><span class="w"> </span><span class="nn">mailbox</span>
<span class="kn">import</span><span class="w"> </span><span class="nn">os</span>
<span class="kn">import</span><span class="w"> </span><span class="nn">re</span>
<span class="kn">import</span><span class="w"> </span><span class="nn">shutil</span>
<span class="kn">import</span><span class="w"> </span><span class="nn">subprocess</span>
<span class="kn">import</span><span class="w"> </span><span class="nn">tempfile</span>
<span class="kn">from</span><span class="w"> </span><span class="nn">datetime</span><span class="w"> </span><span class="kn">import</span> <span class="n">datetime</span><span class="p">,</span> <span class="n">timedelta</span><span class="p">,</span> <span class="n">timezone</span>
<span class="kn">from</span><span class="w"> </span><span class="nn">typing</span><span class="w"> </span><span class="kn">import</span> <span class="n">Optional</span><span class="p">,</span> <span class="n">TypedDict</span><span class="p">,</span> <span class="n">Union</span><span class="p">,</span> <span class="n">cast</span>
<span class="kn">import</span><span class="w"> </span><span class="nn">mailparser</span>
<span class="kn">from</span><span class="w"> </span><span class="nn">expiringdict</span><span class="w"> </span><span class="kn">import</span> <span class="n">ExpiringDict</span>
<span class="k">try</span><span class="p">:</span>
<span class="kn">from</span><span class="w"> </span><span class="nn">importlib.resources</span><span class="w"> </span><span class="kn">import</span> <span class="n">files</span>
<span class="k">except</span> <span class="ne">ImportError</span><span class="p">:</span>
<span class="c1"># Try backported to PY&lt;3 `importlib_resources`</span>
<span class="kn">from</span><span class="w"> </span><span class="nn">importlib.resources</span><span class="w"> </span><span class="kn">import</span> <span class="n">files</span>
<span class="kn">import</span><span class="w"> </span><span class="nn">dns.exception</span>
<span class="kn">import</span><span class="w"> </span><span class="nn">dns.resolver</span>
<span class="kn">import</span><span class="w"> </span><span class="nn">dns.reversename</span>
<span class="kn">import</span><span class="w"> </span><span class="nn">geoip2.database</span>
<span class="kn">import</span><span class="w"> </span><span class="nn">geoip2.errors</span>
<span class="kn">import</span><span class="w"> </span><span class="nn">publicsuffixlist</span>
<span class="kn">import</span><span class="w"> </span><span class="nn">requests</span>
<span class="kn">from</span><span class="w"> </span><span class="nn">dateutil.parser</span><span class="w"> </span><span class="kn">import</span> <span class="n">parse</span> <span class="k">as</span> <span class="n">parse_date</span>
<span class="kn">import</span><span class="w"> </span><span class="nn">parsedmarc.resources.dbip</span>
<span class="kn">import</span><span class="w"> </span><span class="nn">parsedmarc.resources.maps</span>
<span class="kn">from</span><span class="w"> </span><span class="nn">parsedmarc.constants</span><span class="w"> </span><span class="kn">import</span> <span class="n">USER_AGENT</span>
<span class="kn">from</span><span class="w"> </span><span class="nn">parsedmarc.log</span><span class="w"> </span><span class="kn">import</span> <span class="n">logger</span>
<span class="n">parenthesis_regex</span> <span class="o">=</span> <span class="n">re</span><span class="o">.</span><span class="n">compile</span><span class="p">(</span><span class="sa">r</span><span class="s2">&quot;\s*\(.*\)\s*&quot;</span><span class="p">)</span>
<span class="n">null_file</span> <span class="o">=</span> <span class="nb">open</span><span class="p">(</span><span class="n">os</span><span class="o">.</span><span class="n">devnull</span><span class="p">,</span> <span class="s2">&quot;w&quot;</span><span class="p">)</span>
<span class="n">mailparser_logger</span> <span class="o">=</span> <span class="n">logging</span><span class="o">.</span><span class="n">getLogger</span><span class="p">(</span><span class="s2">&quot;mailparser&quot;</span><span class="p">)</span>
<span class="n">mailparser_logger</span><span class="o">.</span><span class="n">setLevel</span><span class="p">(</span><span class="n">logging</span><span class="o">.</span><span class="n">CRITICAL</span><span class="p">)</span>
<span class="n">psl</span> <span class="o">=</span> <span class="n">publicsuffixlist</span><span class="o">.</span><span class="n">PublicSuffixList</span><span class="p">()</span>
<span class="n">psl_overrides_path</span> <span class="o">=</span> <span class="nb">str</span><span class="p">(</span><span class="n">files</span><span class="p">(</span><span class="n">parsedmarc</span><span class="o">.</span><span class="n">resources</span><span class="o">.</span><span class="n">maps</span><span class="p">)</span><span class="o">.</span><span class="n">joinpath</span><span class="p">(</span><span class="s2">&quot;psl_overrides.txt&quot;</span><span class="p">))</span>
<span class="k">with</span> <span class="nb">open</span><span class="p">(</span><span class="n">psl_overrides_path</span><span class="p">)</span> <span class="k">as</span> <span class="n">f</span><span class="p">:</span>
<span class="n">psl_overrides</span> <span class="o">=</span> <span class="p">[</span><span class="n">line</span><span class="o">.</span><span class="n">rstrip</span><span class="p">()</span> <span class="k">for</span> <span class="n">line</span> <span class="ow">in</span> <span class="n">f</span><span class="o">.</span><span class="n">readlines</span><span class="p">()]</span>
<span class="k">while</span> <span class="s2">&quot;&quot;</span> <span class="ow">in</span> <span class="n">psl_overrides</span><span class="p">:</span>
<span class="n">psl_overrides</span><span class="o">.</span><span class="n">remove</span><span class="p">(</span><span class="s2">&quot;&quot;</span><span class="p">)</span>
<div class="viewcode-block" id="EmailParserError">
<a class="viewcode-back" href="../../api.html#parsedmarc.utils.EmailParserError">[docs]</a>
<span class="k">class</span><span class="w"> </span><span class="nc">EmailParserError</span><span class="p">(</span><span class="ne">RuntimeError</span><span class="p">):</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;Raised when an error parsing the email occurs&quot;&quot;&quot;</span></div>
<div class="viewcode-block" id="DownloadError">
<a class="viewcode-back" href="../../api.html#parsedmarc.utils.DownloadError">[docs]</a>
<span class="k">class</span><span class="w"> </span><span class="nc">DownloadError</span><span class="p">(</span><span class="ne">RuntimeError</span><span class="p">):</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;Raised when an error occurs when downloading a file&quot;&quot;&quot;</span></div>
<div class="viewcode-block" id="ReverseDNSService">
<a class="viewcode-back" href="../../api.html#parsedmarc.utils.ReverseDNSService">[docs]</a>
<span class="k">class</span><span class="w"> </span><span class="nc">ReverseDNSService</span><span class="p">(</span><span class="n">TypedDict</span><span class="p">):</span>
<span class="n">name</span><span class="p">:</span> <span class="nb">str</span>
<span class="nb">type</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="nb">str</span><span class="p">]</span></div>
<span class="n">ReverseDNSMap</span> <span class="o">=</span> <span class="nb">dict</span><span class="p">[</span><span class="nb">str</span><span class="p">,</span> <span class="n">ReverseDNSService</span><span class="p">]</span>
<div class="viewcode-block" id="IPAddressInfo">
<a class="viewcode-back" href="../../api.html#parsedmarc.utils.IPAddressInfo">[docs]</a>
<span class="k">class</span><span class="w"> </span><span class="nc">IPAddressInfo</span><span class="p">(</span><span class="n">TypedDict</span><span class="p">):</span>
<span class="n">ip_address</span><span class="p">:</span> <span class="nb">str</span>
<span class="n">reverse_dns</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="nb">str</span><span class="p">]</span>
<span class="n">country</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="nb">str</span><span class="p">]</span>
<span class="n">base_domain</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="nb">str</span><span class="p">]</span>
<span class="n">name</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="nb">str</span><span class="p">]</span>
<span class="nb">type</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="nb">str</span><span class="p">]</span></div>
<div class="viewcode-block" id="decode_base64">
<a class="viewcode-back" href="../../api.html#parsedmarc.utils.decode_base64">[docs]</a>
<span class="k">def</span><span class="w"> </span><span class="nf">decode_base64</span><span class="p">(</span><span class="n">data</span><span class="p">:</span> <span class="nb">str</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="nb">bytes</span><span class="p">:</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Decodes a base64 string, with padding being optional</span>
<span class="sd"> Args:</span>
<span class="sd"> data (str): A base64 encoded string</span>
<span class="sd"> Returns:</span>
<span class="sd"> bytes: The decoded bytes</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="n">data_bytes</span> <span class="o">=</span> <span class="nb">bytes</span><span class="p">(</span><span class="n">data</span><span class="p">,</span> <span class="n">encoding</span><span class="o">=</span><span class="s2">&quot;ascii&quot;</span><span class="p">)</span>
<span class="n">missing_padding</span> <span class="o">=</span> <span class="nb">len</span><span class="p">(</span><span class="n">data_bytes</span><span class="p">)</span> <span class="o">%</span> <span class="mi">4</span>
<span class="k">if</span> <span class="n">missing_padding</span> <span class="o">!=</span> <span class="mi">0</span><span class="p">:</span>
<span class="n">data_bytes</span> <span class="o">+=</span> <span class="sa">b</span><span class="s2">&quot;=&quot;</span> <span class="o">*</span> <span class="p">(</span><span class="mi">4</span> <span class="o">-</span> <span class="n">missing_padding</span><span class="p">)</span>
<span class="k">return</span> <span class="n">base64</span><span class="o">.</span><span class="n">b64decode</span><span class="p">(</span><span class="n">data_bytes</span><span class="p">)</span></div>
<div class="viewcode-block" id="get_base_domain">
<a class="viewcode-back" href="../../api.html#parsedmarc.utils.get_base_domain">[docs]</a>
<span class="k">def</span><span class="w"> </span><span class="nf">get_base_domain</span><span class="p">(</span><span class="n">domain</span><span class="p">:</span> <span class="nb">str</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="n">Optional</span><span class="p">[</span><span class="nb">str</span><span class="p">]:</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Gets the base domain name for the given domain</span>
<span class="sd"> .. note::</span>
<span class="sd"> Results are based on a list of public domain suffixes at</span>
<span class="sd"> https://publicsuffix.org/list/public_suffix_list.dat and overrides included in</span>
<span class="sd"> parsedmarc.resources.maps.psl_overrides.txt</span>
<span class="sd"> Args:</span>
<span class="sd"> domain (str): A domain or subdomain</span>
<span class="sd"> Returns:</span>
<span class="sd"> str: The base domain of the given domain</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="n">domain</span> <span class="o">=</span> <span class="n">domain</span><span class="o">.</span><span class="n">lower</span><span class="p">()</span>
<span class="n">publicsuffix</span> <span class="o">=</span> <span class="n">psl</span><span class="o">.</span><span class="n">privatesuffix</span><span class="p">(</span><span class="n">domain</span><span class="p">)</span>
<span class="k">for</span> <span class="n">override</span> <span class="ow">in</span> <span class="n">psl_overrides</span><span class="p">:</span>
<span class="k">if</span> <span class="n">domain</span><span class="o">.</span><span class="n">endswith</span><span class="p">(</span><span class="n">override</span><span class="p">):</span>
<span class="k">return</span> <span class="n">override</span><span class="o">.</span><span class="n">strip</span><span class="p">(</span><span class="s2">&quot;.&quot;</span><span class="p">)</span><span class="o">.</span><span class="n">strip</span><span class="p">(</span><span class="s2">&quot;-&quot;</span><span class="p">)</span>
<span class="k">return</span> <span class="n">publicsuffix</span></div>
<div class="viewcode-block" id="query_dns">
<a class="viewcode-back" href="../../api.html#parsedmarc.utils.query_dns">[docs]</a>
<span class="k">def</span><span class="w"> </span><span class="nf">query_dns</span><span class="p">(</span>
<span class="n">domain</span><span class="p">:</span> <span class="nb">str</span><span class="p">,</span>
<span class="n">record_type</span><span class="p">:</span> <span class="nb">str</span><span class="p">,</span>
<span class="o">*</span><span class="p">,</span>
<span class="n">cache</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="n">ExpiringDict</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span>
<span class="n">nameservers</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="nb">list</span><span class="p">[</span><span class="nb">str</span><span class="p">]]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span>
<span class="n">timeout</span><span class="p">:</span> <span class="nb">float</span> <span class="o">=</span> <span class="mf">2.0</span><span class="p">,</span>
<span class="p">)</span> <span class="o">-&gt;</span> <span class="nb">list</span><span class="p">[</span><span class="nb">str</span><span class="p">]:</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Queries DNS</span>
<span class="sd"> Args:</span>
<span class="sd"> domain (str): The domain or subdomain to query about</span>
<span class="sd"> record_type (str): The record type to query for</span>
<span class="sd"> cache (ExpiringDict): Cache storage</span>
<span class="sd"> nameservers (list): A list of one or more nameservers to use</span>
<span class="sd"> (Cloudflare&#39;s public DNS resolvers by default)</span>
<span class="sd"> timeout (float): Sets the DNS timeout in seconds</span>
<span class="sd"> Returns:</span>
<span class="sd"> list: A list of answers</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="n">domain</span> <span class="o">=</span> <span class="nb">str</span><span class="p">(</span><span class="n">domain</span><span class="p">)</span><span class="o">.</span><span class="n">lower</span><span class="p">()</span>
<span class="n">record_type</span> <span class="o">=</span> <span class="n">record_type</span><span class="o">.</span><span class="n">upper</span><span class="p">()</span>
<span class="n">cache_key</span> <span class="o">=</span> <span class="s2">&quot;</span><span class="si">{0}</span><span class="s2">_</span><span class="si">{1}</span><span class="s2">&quot;</span><span class="o">.</span><span class="n">format</span><span class="p">(</span><span class="n">domain</span><span class="p">,</span> <span class="n">record_type</span><span class="p">)</span>
<span class="k">if</span> <span class="n">cache</span><span class="p">:</span>
<span class="n">cached_records</span> <span class="o">=</span> <span class="n">cache</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="n">cache_key</span><span class="p">,</span> <span class="kc">None</span><span class="p">)</span>
<span class="k">if</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">cached_records</span><span class="p">,</span> <span class="nb">list</span><span class="p">):</span>
<span class="k">return</span> <span class="n">cast</span><span class="p">(</span><span class="nb">list</span><span class="p">[</span><span class="nb">str</span><span class="p">],</span> <span class="n">cached_records</span><span class="p">)</span>
<span class="n">resolver</span> <span class="o">=</span> <span class="n">dns</span><span class="o">.</span><span class="n">resolver</span><span class="o">.</span><span class="n">Resolver</span><span class="p">()</span>
<span class="n">timeout</span> <span class="o">=</span> <span class="nb">float</span><span class="p">(</span><span class="n">timeout</span><span class="p">)</span>
<span class="k">if</span> <span class="n">nameservers</span> <span class="ow">is</span> <span class="kc">None</span><span class="p">:</span>
<span class="n">nameservers</span> <span class="o">=</span> <span class="p">[</span>
<span class="s2">&quot;1.1.1.1&quot;</span><span class="p">,</span>
<span class="s2">&quot;1.0.0.1&quot;</span><span class="p">,</span>
<span class="s2">&quot;2606:4700:4700::1111&quot;</span><span class="p">,</span>
<span class="s2">&quot;2606:4700:4700::1001&quot;</span><span class="p">,</span>
<span class="p">]</span>
<span class="n">resolver</span><span class="o">.</span><span class="n">nameservers</span> <span class="o">=</span> <span class="n">nameservers</span>
<span class="n">resolver</span><span class="o">.</span><span class="n">timeout</span> <span class="o">=</span> <span class="n">timeout</span>
<span class="n">resolver</span><span class="o">.</span><span class="n">lifetime</span> <span class="o">=</span> <span class="n">timeout</span>
<span class="n">records</span> <span class="o">=</span> <span class="nb">list</span><span class="p">(</span>
<span class="nb">map</span><span class="p">(</span>
<span class="k">lambda</span> <span class="n">r</span><span class="p">:</span> <span class="n">r</span><span class="o">.</span><span class="n">to_text</span><span class="p">()</span><span class="o">.</span><span class="n">replace</span><span class="p">(</span><span class="s1">&#39;&quot;&#39;</span><span class="p">,</span> <span class="s2">&quot;&quot;</span><span class="p">)</span><span class="o">.</span><span class="n">rstrip</span><span class="p">(</span><span class="s2">&quot;.&quot;</span><span class="p">),</span>
<span class="n">resolver</span><span class="o">.</span><span class="n">resolve</span><span class="p">(</span><span class="n">domain</span><span class="p">,</span> <span class="n">record_type</span><span class="p">,</span> <span class="n">lifetime</span><span class="o">=</span><span class="n">timeout</span><span class="p">),</span>
<span class="p">)</span>
<span class="p">)</span>
<span class="k">if</span> <span class="n">cache</span><span class="p">:</span>
<span class="n">cache</span><span class="p">[</span><span class="n">cache_key</span><span class="p">]</span> <span class="o">=</span> <span class="n">records</span>
<span class="k">return</span> <span class="n">records</span></div>
<div class="viewcode-block" id="get_reverse_dns">
<a class="viewcode-back" href="../../api.html#parsedmarc.utils.get_reverse_dns">[docs]</a>
<span class="k">def</span><span class="w"> </span><span class="nf">get_reverse_dns</span><span class="p">(</span>
<span class="n">ip_address</span><span class="p">,</span>
<span class="o">*</span><span class="p">,</span>
<span class="n">cache</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="n">ExpiringDict</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span>
<span class="n">nameservers</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="nb">list</span><span class="p">[</span><span class="nb">str</span><span class="p">]]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span>
<span class="n">timeout</span><span class="p">:</span> <span class="nb">float</span> <span class="o">=</span> <span class="mf">2.0</span><span class="p">,</span>
<span class="p">)</span> <span class="o">-&gt;</span> <span class="n">Optional</span><span class="p">[</span><span class="nb">str</span><span class="p">]:</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Resolves an IP address to a hostname using a reverse DNS query</span>
<span class="sd"> Args:</span>
<span class="sd"> ip_address (str): The IP address to resolve</span>
<span class="sd"> cache (ExpiringDict): Cache storage</span>
<span class="sd"> nameservers (list): A list of one or more nameservers to use</span>
<span class="sd"> (Cloudflare&#39;s public DNS resolvers by default)</span>
<span class="sd"> timeout (float): Sets the DNS query timeout in seconds</span>
<span class="sd"> Returns:</span>
<span class="sd"> str: The reverse DNS hostname (if any)</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="n">hostname</span> <span class="o">=</span> <span class="kc">None</span>
<span class="k">try</span><span class="p">:</span>
<span class="n">address</span> <span class="o">=</span> <span class="n">dns</span><span class="o">.</span><span class="n">reversename</span><span class="o">.</span><span class="n">from_address</span><span class="p">(</span><span class="n">ip_address</span><span class="p">)</span>
<span class="n">hostname</span> <span class="o">=</span> <span class="n">query_dns</span><span class="p">(</span>
<span class="nb">str</span><span class="p">(</span><span class="n">address</span><span class="p">),</span> <span class="s2">&quot;PTR&quot;</span><span class="p">,</span> <span class="n">cache</span><span class="o">=</span><span class="n">cache</span><span class="p">,</span> <span class="n">nameservers</span><span class="o">=</span><span class="n">nameservers</span><span class="p">,</span> <span class="n">timeout</span><span class="o">=</span><span class="n">timeout</span>
<span class="p">)[</span><span class="mi">0</span><span class="p">]</span>
<span class="k">except</span> <span class="n">dns</span><span class="o">.</span><span class="n">exception</span><span class="o">.</span><span class="n">DNSException</span> <span class="k">as</span> <span class="n">e</span><span class="p">:</span>
<span class="n">logger</span><span class="o">.</span><span class="n">debug</span><span class="p">(</span><span class="sa">f</span><span class="s2">&quot;get_reverse_dns(</span><span class="si">{</span><span class="n">ip_address</span><span class="si">}</span><span class="s2">) exception: </span><span class="si">{</span><span class="n">e</span><span class="si">}</span><span class="s2">&quot;</span><span class="p">)</span>
<span class="k">return</span> <span class="n">hostname</span></div>
<div class="viewcode-block" id="timestamp_to_datetime">
<a class="viewcode-back" href="../../api.html#parsedmarc.utils.timestamp_to_datetime">[docs]</a>
<span class="k">def</span><span class="w"> </span><span class="nf">timestamp_to_datetime</span><span class="p">(</span><span class="n">timestamp</span><span class="p">:</span> <span class="nb">int</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="n">datetime</span><span class="p">:</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Converts a UNIX/DMARC timestamp to a Python ``datetime`` object</span>
<span class="sd"> Args:</span>
<span class="sd"> timestamp (int): The timestamp</span>
<span class="sd"> Returns:</span>
<span class="sd"> datetime: The converted timestamp as a Python ``datetime`` object</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="k">return</span> <span class="n">datetime</span><span class="o">.</span><span class="n">fromtimestamp</span><span class="p">(</span><span class="nb">int</span><span class="p">(</span><span class="n">timestamp</span><span class="p">))</span></div>
<div class="viewcode-block" id="timestamp_to_human">
<a class="viewcode-back" href="../../api.html#parsedmarc.utils.timestamp_to_human">[docs]</a>
<span class="k">def</span><span class="w"> </span><span class="nf">timestamp_to_human</span><span class="p">(</span><span class="n">timestamp</span><span class="p">:</span> <span class="nb">int</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="nb">str</span><span class="p">:</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Converts a UNIX/DMARC timestamp to a human-readable string</span>
<span class="sd"> Args:</span>
<span class="sd"> timestamp: The timestamp</span>
<span class="sd"> Returns:</span>
<span class="sd"> str: The converted timestamp in ``YYYY-MM-DD HH:MM:SS`` format</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="k">return</span> <span class="n">timestamp_to_datetime</span><span class="p">(</span><span class="n">timestamp</span><span class="p">)</span><span class="o">.</span><span class="n">strftime</span><span class="p">(</span><span class="s2">&quot;%Y-%m-</span><span class="si">%d</span><span class="s2"> %H:%M:%S&quot;</span><span class="p">)</span></div>
<div class="viewcode-block" id="human_timestamp_to_datetime">
<a class="viewcode-back" href="../../api.html#parsedmarc.utils.human_timestamp_to_datetime">[docs]</a>
<span class="k">def</span><span class="w"> </span><span class="nf">human_timestamp_to_datetime</span><span class="p">(</span>
<span class="n">human_timestamp</span><span class="p">:</span> <span class="nb">str</span><span class="p">,</span> <span class="o">*</span><span class="p">,</span> <span class="n">to_utc</span><span class="p">:</span> <span class="nb">bool</span> <span class="o">=</span> <span class="kc">False</span>
<span class="p">)</span> <span class="o">-&gt;</span> <span class="n">datetime</span><span class="p">:</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Converts a human-readable timestamp into a Python ``datetime`` object</span>
<span class="sd"> Args:</span>
<span class="sd"> human_timestamp (str): A timestamp string</span>
<span class="sd"> to_utc (bool): Convert the timestamp to UTC</span>
<span class="sd"> Returns:</span>
<span class="sd"> datetime: The converted timestamp</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="n">human_timestamp</span> <span class="o">=</span> <span class="n">human_timestamp</span><span class="o">.</span><span class="n">replace</span><span class="p">(</span><span class="s2">&quot;-0000&quot;</span><span class="p">,</span> <span class="s2">&quot;&quot;</span><span class="p">)</span>
<span class="n">human_timestamp</span> <span class="o">=</span> <span class="n">parenthesis_regex</span><span class="o">.</span><span class="n">sub</span><span class="p">(</span><span class="s2">&quot;&quot;</span><span class="p">,</span> <span class="n">human_timestamp</span><span class="p">)</span>
<span class="n">dt</span> <span class="o">=</span> <span class="n">parse_date</span><span class="p">(</span><span class="n">human_timestamp</span><span class="p">)</span>
<span class="k">return</span> <span class="n">dt</span><span class="o">.</span><span class="n">astimezone</span><span class="p">(</span><span class="n">timezone</span><span class="o">.</span><span class="n">utc</span><span class="p">)</span> <span class="k">if</span> <span class="n">to_utc</span> <span class="k">else</span> <span class="n">dt</span></div>
<div class="viewcode-block" id="human_timestamp_to_unix_timestamp">
<a class="viewcode-back" href="../../api.html#parsedmarc.utils.human_timestamp_to_unix_timestamp">[docs]</a>
<span class="k">def</span><span class="w"> </span><span class="nf">human_timestamp_to_unix_timestamp</span><span class="p">(</span><span class="n">human_timestamp</span><span class="p">:</span> <span class="nb">str</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="nb">int</span><span class="p">:</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Converts a human-readable timestamp into a UNIX timestamp</span>
<span class="sd"> Args:</span>
<span class="sd"> human_timestamp (str): A timestamp in `YYYY-MM-DD HH:MM:SS`` format</span>
<span class="sd"> Returns:</span>
<span class="sd"> float: The converted timestamp</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="n">human_timestamp</span> <span class="o">=</span> <span class="n">human_timestamp</span><span class="o">.</span><span class="n">replace</span><span class="p">(</span><span class="s2">&quot;T&quot;</span><span class="p">,</span> <span class="s2">&quot; &quot;</span><span class="p">)</span>
<span class="k">return</span> <span class="nb">int</span><span class="p">(</span><span class="n">human_timestamp_to_datetime</span><span class="p">(</span><span class="n">human_timestamp</span><span class="p">)</span><span class="o">.</span><span class="n">timestamp</span><span class="p">())</span></div>
<div class="viewcode-block" id="get_ip_address_country">
<a class="viewcode-back" href="../../api.html#parsedmarc.utils.get_ip_address_country">[docs]</a>
<span class="k">def</span><span class="w"> </span><span class="nf">get_ip_address_country</span><span class="p">(</span>
<span class="n">ip_address</span><span class="p">:</span> <span class="nb">str</span><span class="p">,</span> <span class="o">*</span><span class="p">,</span> <span class="n">db_path</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="nb">str</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span>
<span class="p">)</span> <span class="o">-&gt;</span> <span class="n">Optional</span><span class="p">[</span><span class="nb">str</span><span class="p">]:</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Returns the ISO code for the country associated</span>
<span class="sd"> with the given IPv4 or IPv6 address</span>
<span class="sd"> Args:</span>
<span class="sd"> ip_address (str): The IP address to query for</span>
<span class="sd"> db_path (str): Path to a MMDB file from MaxMind or DBIP</span>
<span class="sd"> Returns:</span>
<span class="sd"> str: And ISO country code associated with the given IP address</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="n">db_paths</span> <span class="o">=</span> <span class="p">[</span>
<span class="s2">&quot;GeoLite2-Country.mmdb&quot;</span><span class="p">,</span>
<span class="s2">&quot;/usr/local/share/GeoIP/GeoLite2-Country.mmdb&quot;</span><span class="p">,</span>
<span class="s2">&quot;/usr/share/GeoIP/GeoLite2-Country.mmdb&quot;</span><span class="p">,</span>
<span class="s2">&quot;/var/lib/GeoIP/GeoLite2-Country.mmdb&quot;</span><span class="p">,</span>
<span class="s2">&quot;/var/local/lib/GeoIP/GeoLite2-Country.mmdb&quot;</span><span class="p">,</span>
<span class="s2">&quot;/usr/local/var/GeoIP/GeoLite2-Country.mmdb&quot;</span><span class="p">,</span>
<span class="s2">&quot;%SystemDrive%</span><span class="se">\\</span><span class="s2">ProgramData</span><span class="se">\\</span><span class="s2">MaxMind</span><span class="se">\\</span><span class="s2">GeoIPUpdate</span><span class="se">\\</span><span class="s2">GeoIP</span><span class="se">\\</span><span class="s2">&quot;</span>
<span class="s2">&quot;GeoLite2-Country.mmdb&quot;</span><span class="p">,</span>
<span class="s2">&quot;C:</span><span class="se">\\</span><span class="s2">GeoIP</span><span class="se">\\</span><span class="s2">GeoLite2-Country.mmdb&quot;</span><span class="p">,</span>
<span class="s2">&quot;dbip-country-lite.mmdb&quot;</span><span class="p">,</span>
<span class="s2">&quot;dbip-country.mmdb&quot;</span><span class="p">,</span>
<span class="p">]</span>
<span class="k">if</span> <span class="n">db_path</span> <span class="ow">is</span> <span class="ow">not</span> <span class="kc">None</span><span class="p">:</span>
<span class="k">if</span> <span class="ow">not</span> <span class="n">os</span><span class="o">.</span><span class="n">path</span><span class="o">.</span><span class="n">isfile</span><span class="p">(</span><span class="n">db_path</span><span class="p">):</span>
<span class="n">db_path</span> <span class="o">=</span> <span class="kc">None</span>
<span class="n">logger</span><span class="o">.</span><span class="n">warning</span><span class="p">(</span>
<span class="sa">f</span><span class="s2">&quot;No file exists at </span><span class="si">{</span><span class="n">db_path</span><span class="si">}</span><span class="s2">. Falling back to an &quot;</span>
<span class="s2">&quot;included copy of the IPDB IP to Country &quot;</span>
<span class="s2">&quot;Lite database.&quot;</span>
<span class="p">)</span>
<span class="k">if</span> <span class="n">db_path</span> <span class="ow">is</span> <span class="kc">None</span><span class="p">:</span>
<span class="k">for</span> <span class="n">system_path</span> <span class="ow">in</span> <span class="n">db_paths</span><span class="p">:</span>
<span class="k">if</span> <span class="n">os</span><span class="o">.</span><span class="n">path</span><span class="o">.</span><span class="n">exists</span><span class="p">(</span><span class="n">system_path</span><span class="p">):</span>
<span class="n">db_path</span> <span class="o">=</span> <span class="n">system_path</span>
<span class="k">break</span>
<span class="k">if</span> <span class="n">db_path</span> <span class="ow">is</span> <span class="kc">None</span><span class="p">:</span>
<span class="n">db_path</span> <span class="o">=</span> <span class="nb">str</span><span class="p">(</span>
<span class="n">files</span><span class="p">(</span><span class="n">parsedmarc</span><span class="o">.</span><span class="n">resources</span><span class="o">.</span><span class="n">dbip</span><span class="p">)</span><span class="o">.</span><span class="n">joinpath</span><span class="p">(</span><span class="s2">&quot;dbip-country-lite.mmdb&quot;</span><span class="p">)</span>
<span class="p">)</span>
<span class="n">db_age</span> <span class="o">=</span> <span class="n">datetime</span><span class="o">.</span><span class="n">now</span><span class="p">()</span> <span class="o">-</span> <span class="n">datetime</span><span class="o">.</span><span class="n">fromtimestamp</span><span class="p">(</span><span class="n">os</span><span class="o">.</span><span class="n">stat</span><span class="p">(</span><span class="n">db_path</span><span class="p">)</span><span class="o">.</span><span class="n">st_mtime</span><span class="p">)</span>
<span class="k">if</span> <span class="n">db_age</span> <span class="o">&gt;</span> <span class="n">timedelta</span><span class="p">(</span><span class="n">days</span><span class="o">=</span><span class="mi">30</span><span class="p">):</span>
<span class="n">logger</span><span class="o">.</span><span class="n">warning</span><span class="p">(</span><span class="s2">&quot;IP database is more than a month old&quot;</span><span class="p">)</span>
<span class="n">db_reader</span> <span class="o">=</span> <span class="n">geoip2</span><span class="o">.</span><span class="n">database</span><span class="o">.</span><span class="n">Reader</span><span class="p">(</span><span class="n">db_path</span><span class="p">)</span>
<span class="n">country</span> <span class="o">=</span> <span class="kc">None</span>
<span class="k">try</span><span class="p">:</span>
<span class="n">country</span> <span class="o">=</span> <span class="n">db_reader</span><span class="o">.</span><span class="n">country</span><span class="p">(</span><span class="n">ip_address</span><span class="p">)</span><span class="o">.</span><span class="n">country</span><span class="o">.</span><span class="n">iso_code</span>
<span class="k">except</span> <span class="n">geoip2</span><span class="o">.</span><span class="n">errors</span><span class="o">.</span><span class="n">AddressNotFoundError</span><span class="p">:</span>
<span class="k">pass</span>
<span class="k">return</span> <span class="n">country</span></div>
<div class="viewcode-block" id="load_reverse_dns_map">
<a class="viewcode-back" href="../../api.html#parsedmarc.utils.load_reverse_dns_map">[docs]</a>
<span class="k">def</span><span class="w"> </span><span class="nf">load_reverse_dns_map</span><span class="p">(</span>
<span class="n">reverse_dns_map</span><span class="p">:</span> <span class="n">ReverseDNSMap</span><span class="p">,</span>
<span class="o">*</span><span class="p">,</span>
<span class="n">always_use_local_file</span><span class="p">:</span> <span class="nb">bool</span> <span class="o">=</span> <span class="kc">False</span><span class="p">,</span>
<span class="n">local_file_path</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="nb">str</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span>
<span class="n">url</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="nb">str</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span>
<span class="n">offline</span><span class="p">:</span> <span class="nb">bool</span> <span class="o">=</span> <span class="kc">False</span><span class="p">,</span>
<span class="p">)</span> <span class="o">-&gt;</span> <span class="kc">None</span><span class="p">:</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Loads the reverse DNS map from a URL or local file.</span>
<span class="sd"> Clears and repopulates the given map dict in place. If the map is</span>
<span class="sd"> fetched from a URL, that is tried first; on failure (or if offline/local</span>
<span class="sd"> mode is selected) the bundled CSV is used as a fallback.</span>
<span class="sd"> Args:</span>
<span class="sd"> reverse_dns_map (dict): The map dict to populate (modified in place)</span>
<span class="sd"> always_use_local_file (bool): Always use a local map file</span>
<span class="sd"> local_file_path (str): Path to a local map file</span>
<span class="sd"> url (str): URL to a reverse DNS map</span>
<span class="sd"> offline (bool): Use the built-in copy of the reverse DNS map</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="k">if</span> <span class="n">url</span> <span class="ow">is</span> <span class="kc">None</span><span class="p">:</span>
<span class="n">url</span> <span class="o">=</span> <span class="p">(</span>
<span class="s2">&quot;https://raw.githubusercontent.com/domainaware&quot;</span>
<span class="s2">&quot;/parsedmarc/master/parsedmarc/&quot;</span>
<span class="s2">&quot;resources/maps/base_reverse_dns_map.csv&quot;</span>
<span class="p">)</span>
<span class="n">reverse_dns_map</span><span class="o">.</span><span class="n">clear</span><span class="p">()</span>
<span class="k">def</span><span class="w"> </span><span class="nf">load_csv</span><span class="p">(</span><span class="n">_csv_file</span><span class="p">):</span>
<span class="n">reader</span> <span class="o">=</span> <span class="n">csv</span><span class="o">.</span><span class="n">DictReader</span><span class="p">(</span><span class="n">_csv_file</span><span class="p">)</span>
<span class="k">for</span> <span class="n">row</span> <span class="ow">in</span> <span class="n">reader</span><span class="p">:</span>
<span class="n">key</span> <span class="o">=</span> <span class="n">row</span><span class="p">[</span><span class="s2">&quot;base_reverse_dns&quot;</span><span class="p">]</span><span class="o">.</span><span class="n">lower</span><span class="p">()</span><span class="o">.</span><span class="n">strip</span><span class="p">()</span>
<span class="n">reverse_dns_map</span><span class="p">[</span><span class="n">key</span><span class="p">]</span> <span class="o">=</span> <span class="p">{</span>
<span class="s2">&quot;name&quot;</span><span class="p">:</span> <span class="n">row</span><span class="p">[</span><span class="s2">&quot;name&quot;</span><span class="p">]</span><span class="o">.</span><span class="n">strip</span><span class="p">(),</span>
<span class="s2">&quot;type&quot;</span><span class="p">:</span> <span class="n">row</span><span class="p">[</span><span class="s2">&quot;type&quot;</span><span class="p">]</span><span class="o">.</span><span class="n">strip</span><span class="p">(),</span>
<span class="p">}</span>
<span class="n">csv_file</span> <span class="o">=</span> <span class="n">io</span><span class="o">.</span><span class="n">StringIO</span><span class="p">()</span>
<span class="k">if</span> <span class="ow">not</span> <span class="p">(</span><span class="n">offline</span> <span class="ow">or</span> <span class="n">always_use_local_file</span><span class="p">):</span>
<span class="k">try</span><span class="p">:</span>
<span class="n">logger</span><span class="o">.</span><span class="n">debug</span><span class="p">(</span><span class="sa">f</span><span class="s2">&quot;Trying to fetch reverse DNS map from </span><span class="si">{</span><span class="n">url</span><span class="si">}</span><span class="s2">...&quot;</span><span class="p">)</span>
<span class="n">headers</span> <span class="o">=</span> <span class="p">{</span><span class="s2">&quot;User-Agent&quot;</span><span class="p">:</span> <span class="n">USER_AGENT</span><span class="p">}</span>
<span class="n">response</span> <span class="o">=</span> <span class="n">requests</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="n">url</span><span class="p">,</span> <span class="n">headers</span><span class="o">=</span><span class="n">headers</span><span class="p">)</span>
<span class="n">response</span><span class="o">.</span><span class="n">raise_for_status</span><span class="p">()</span>
<span class="n">csv_file</span><span class="o">.</span><span class="n">write</span><span class="p">(</span><span class="n">response</span><span class="o">.</span><span class="n">text</span><span class="p">)</span>
<span class="n">csv_file</span><span class="o">.</span><span class="n">seek</span><span class="p">(</span><span class="mi">0</span><span class="p">)</span>
<span class="n">load_csv</span><span class="p">(</span><span class="n">csv_file</span><span class="p">)</span>
<span class="k">except</span> <span class="n">requests</span><span class="o">.</span><span class="n">exceptions</span><span class="o">.</span><span class="n">RequestException</span> <span class="k">as</span> <span class="n">e</span><span class="p">:</span>
<span class="n">logger</span><span class="o">.</span><span class="n">warning</span><span class="p">(</span><span class="sa">f</span><span class="s2">&quot;Failed to fetch reverse DNS map: </span><span class="si">{</span><span class="n">e</span><span class="si">}</span><span class="s2">&quot;</span><span class="p">)</span>
<span class="k">except</span> <span class="ne">Exception</span><span class="p">:</span>
<span class="n">logger</span><span class="o">.</span><span class="n">warning</span><span class="p">(</span><span class="s2">&quot;Not a valid CSV file&quot;</span><span class="p">)</span>
<span class="n">csv_file</span><span class="o">.</span><span class="n">seek</span><span class="p">(</span><span class="mi">0</span><span class="p">)</span>
<span class="n">logging</span><span class="o">.</span><span class="n">debug</span><span class="p">(</span><span class="s2">&quot;Response body:&quot;</span><span class="p">)</span>
<span class="n">logger</span><span class="o">.</span><span class="n">debug</span><span class="p">(</span><span class="n">csv_file</span><span class="o">.</span><span class="n">read</span><span class="p">())</span>
<span class="k">if</span> <span class="nb">len</span><span class="p">(</span><span class="n">reverse_dns_map</span><span class="p">)</span> <span class="o">==</span> <span class="mi">0</span><span class="p">:</span>
<span class="n">logger</span><span class="o">.</span><span class="n">info</span><span class="p">(</span><span class="s2">&quot;Loading included reverse DNS map...&quot;</span><span class="p">)</span>
<span class="n">path</span> <span class="o">=</span> <span class="nb">str</span><span class="p">(</span>
<span class="n">files</span><span class="p">(</span><span class="n">parsedmarc</span><span class="o">.</span><span class="n">resources</span><span class="o">.</span><span class="n">maps</span><span class="p">)</span><span class="o">.</span><span class="n">joinpath</span><span class="p">(</span><span class="s2">&quot;base_reverse_dns_map.csv&quot;</span><span class="p">)</span>
<span class="p">)</span>
<span class="k">if</span> <span class="n">local_file_path</span> <span class="ow">is</span> <span class="ow">not</span> <span class="kc">None</span><span class="p">:</span>
<span class="n">path</span> <span class="o">=</span> <span class="n">local_file_path</span>
<span class="k">with</span> <span class="nb">open</span><span class="p">(</span><span class="n">path</span><span class="p">)</span> <span class="k">as</span> <span class="n">csv_file</span><span class="p">:</span>
<span class="n">load_csv</span><span class="p">(</span><span class="n">csv_file</span><span class="p">)</span></div>
<div class="viewcode-block" id="get_service_from_reverse_dns_base_domain">
<a class="viewcode-back" href="../../api.html#parsedmarc.utils.get_service_from_reverse_dns_base_domain">[docs]</a>
<span class="k">def</span><span class="w"> </span><span class="nf">get_service_from_reverse_dns_base_domain</span><span class="p">(</span>
<span class="n">base_domain</span><span class="p">,</span>
<span class="o">*</span><span class="p">,</span>
<span class="n">always_use_local_file</span><span class="p">:</span> <span class="nb">bool</span> <span class="o">=</span> <span class="kc">False</span><span class="p">,</span>
<span class="n">local_file_path</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="nb">str</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span>
<span class="n">url</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="nb">str</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span>
<span class="n">offline</span><span class="p">:</span> <span class="nb">bool</span> <span class="o">=</span> <span class="kc">False</span><span class="p">,</span>
<span class="n">reverse_dns_map</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="n">ReverseDNSMap</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span>
<span class="p">)</span> <span class="o">-&gt;</span> <span class="n">ReverseDNSService</span><span class="p">:</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Returns the service name of a given base domain name from reverse DNS.</span>
<span class="sd"> Args:</span>
<span class="sd"> base_domain (str): The base domain of the reverse DNS lookup</span>
<span class="sd"> always_use_local_file (bool): Always use a local map file</span>
<span class="sd"> local_file_path (str): Path to a local map file</span>
<span class="sd"> url (str): URL ro a reverse DNS map</span>
<span class="sd"> offline (bool): Use the built-in copy of the reverse DNS map</span>
<span class="sd"> reverse_dns_map (dict): A reverse DNS map</span>
<span class="sd"> Returns:</span>
<span class="sd"> dict: A dictionary containing name and type.</span>
<span class="sd"> If the service is unknown, the name will be</span>
<span class="sd"> the supplied reverse_dns_base_domain and the type will be None</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="n">base_domain</span> <span class="o">=</span> <span class="n">base_domain</span><span class="o">.</span><span class="n">lower</span><span class="p">()</span><span class="o">.</span><span class="n">strip</span><span class="p">()</span>
<span class="n">reverse_dns_map_value</span><span class="p">:</span> <span class="n">ReverseDNSMap</span>
<span class="k">if</span> <span class="n">reverse_dns_map</span> <span class="ow">is</span> <span class="kc">None</span><span class="p">:</span>
<span class="n">reverse_dns_map_value</span> <span class="o">=</span> <span class="p">{}</span>
<span class="k">else</span><span class="p">:</span>
<span class="n">reverse_dns_map_value</span> <span class="o">=</span> <span class="n">reverse_dns_map</span>
<span class="k">if</span> <span class="nb">len</span><span class="p">(</span><span class="n">reverse_dns_map_value</span><span class="p">)</span> <span class="o">==</span> <span class="mi">0</span><span class="p">:</span>
<span class="n">load_reverse_dns_map</span><span class="p">(</span>
<span class="n">reverse_dns_map_value</span><span class="p">,</span>
<span class="n">always_use_local_file</span><span class="o">=</span><span class="n">always_use_local_file</span><span class="p">,</span>
<span class="n">local_file_path</span><span class="o">=</span><span class="n">local_file_path</span><span class="p">,</span>
<span class="n">url</span><span class="o">=</span><span class="n">url</span><span class="p">,</span>
<span class="n">offline</span><span class="o">=</span><span class="n">offline</span><span class="p">,</span>
<span class="p">)</span>
<span class="n">service</span><span class="p">:</span> <span class="n">ReverseDNSService</span>
<span class="k">try</span><span class="p">:</span>
<span class="n">service</span> <span class="o">=</span> <span class="n">reverse_dns_map_value</span><span class="p">[</span><span class="n">base_domain</span><span class="p">]</span>
<span class="k">except</span> <span class="ne">KeyError</span><span class="p">:</span>
<span class="n">service</span> <span class="o">=</span> <span class="p">{</span><span class="s2">&quot;name&quot;</span><span class="p">:</span> <span class="n">base_domain</span><span class="p">,</span> <span class="s2">&quot;type&quot;</span><span class="p">:</span> <span class="kc">None</span><span class="p">}</span>
<span class="k">return</span> <span class="n">service</span></div>
<div class="viewcode-block" id="get_ip_address_info">
<a class="viewcode-back" href="../../api.html#parsedmarc.utils.get_ip_address_info">[docs]</a>
<span class="k">def</span><span class="w"> </span><span class="nf">get_ip_address_info</span><span class="p">(</span>
<span class="n">ip_address</span><span class="p">,</span>
<span class="o">*</span><span class="p">,</span>
<span class="n">ip_db_path</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="nb">str</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span>
<span class="n">reverse_dns_map_path</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="nb">str</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span>
<span class="n">always_use_local_files</span><span class="p">:</span> <span class="nb">bool</span> <span class="o">=</span> <span class="kc">False</span><span class="p">,</span>
<span class="n">reverse_dns_map_url</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="nb">str</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span>
<span class="n">cache</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="n">ExpiringDict</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span>
<span class="n">reverse_dns_map</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="n">ReverseDNSMap</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span>
<span class="n">offline</span><span class="p">:</span> <span class="nb">bool</span> <span class="o">=</span> <span class="kc">False</span><span class="p">,</span>
<span class="n">nameservers</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="nb">list</span><span class="p">[</span><span class="nb">str</span><span class="p">]]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span>
<span class="n">timeout</span><span class="p">:</span> <span class="nb">float</span> <span class="o">=</span> <span class="mf">2.0</span><span class="p">,</span>
<span class="p">)</span> <span class="o">-&gt;</span> <span class="n">IPAddressInfo</span><span class="p">:</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Returns reverse DNS and country information for the given IP address</span>
<span class="sd"> Args:</span>
<span class="sd"> ip_address (str): The IP address to check</span>
<span class="sd"> ip_db_path (str): path to a MMDB file from MaxMind or DBIP</span>
<span class="sd"> reverse_dns_map_path (str): Path to a reverse DNS map file</span>
<span class="sd"> reverse_dns_map_url (str): URL to the reverse DNS map file</span>
<span class="sd"> always_use_local_files (bool): Do not download files</span>
<span class="sd"> cache (ExpiringDict): Cache storage</span>
<span class="sd"> reverse_dns_map (dict): A reverse DNS map</span>
<span class="sd"> offline (bool): Do not make online queries for geolocation or DNS</span>
<span class="sd"> nameservers (list): A list of one or more nameservers to use</span>
<span class="sd"> (Cloudflare&#39;s public DNS resolvers by default)</span>
<span class="sd"> timeout (float): Sets the DNS timeout in seconds</span>
<span class="sd"> Returns:</span>
<span class="sd"> dict: ``ip_address``, ``reverse_dns``, ``country``</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="n">ip_address</span> <span class="o">=</span> <span class="n">ip_address</span><span class="o">.</span><span class="n">lower</span><span class="p">()</span>
<span class="k">if</span> <span class="n">cache</span> <span class="ow">is</span> <span class="ow">not</span> <span class="kc">None</span><span class="p">:</span>
<span class="n">cached_info</span> <span class="o">=</span> <span class="n">cache</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="n">ip_address</span><span class="p">,</span> <span class="kc">None</span><span class="p">)</span>
<span class="k">if</span> <span class="p">(</span>
<span class="n">cached_info</span>
<span class="ow">and</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">cached_info</span><span class="p">,</span> <span class="nb">dict</span><span class="p">)</span>
<span class="ow">and</span> <span class="s2">&quot;ip_address&quot;</span> <span class="ow">in</span> <span class="n">cached_info</span>
<span class="p">):</span>
<span class="n">logger</span><span class="o">.</span><span class="n">debug</span><span class="p">(</span><span class="sa">f</span><span class="s2">&quot;IP address </span><span class="si">{</span><span class="n">ip_address</span><span class="si">}</span><span class="s2"> was found in cache&quot;</span><span class="p">)</span>
<span class="k">return</span> <span class="n">cast</span><span class="p">(</span><span class="n">IPAddressInfo</span><span class="p">,</span> <span class="n">cached_info</span><span class="p">)</span>
<span class="n">info</span><span class="p">:</span> <span class="n">IPAddressInfo</span> <span class="o">=</span> <span class="p">{</span>
<span class="s2">&quot;ip_address&quot;</span><span class="p">:</span> <span class="n">ip_address</span><span class="p">,</span>
<span class="s2">&quot;reverse_dns&quot;</span><span class="p">:</span> <span class="kc">None</span><span class="p">,</span>
<span class="s2">&quot;country&quot;</span><span class="p">:</span> <span class="kc">None</span><span class="p">,</span>
<span class="s2">&quot;base_domain&quot;</span><span class="p">:</span> <span class="kc">None</span><span class="p">,</span>
<span class="s2">&quot;name&quot;</span><span class="p">:</span> <span class="kc">None</span><span class="p">,</span>
<span class="s2">&quot;type&quot;</span><span class="p">:</span> <span class="kc">None</span><span class="p">,</span>
<span class="p">}</span>
<span class="k">if</span> <span class="n">offline</span><span class="p">:</span>
<span class="n">reverse_dns</span> <span class="o">=</span> <span class="kc">None</span>
<span class="k">else</span><span class="p">:</span>
<span class="n">reverse_dns</span> <span class="o">=</span> <span class="n">get_reverse_dns</span><span class="p">(</span>
<span class="n">ip_address</span><span class="p">,</span> <span class="n">nameservers</span><span class="o">=</span><span class="n">nameservers</span><span class="p">,</span> <span class="n">timeout</span><span class="o">=</span><span class="n">timeout</span>
<span class="p">)</span>
<span class="n">country</span> <span class="o">=</span> <span class="n">get_ip_address_country</span><span class="p">(</span><span class="n">ip_address</span><span class="p">,</span> <span class="n">db_path</span><span class="o">=</span><span class="n">ip_db_path</span><span class="p">)</span>
<span class="n">info</span><span class="p">[</span><span class="s2">&quot;country&quot;</span><span class="p">]</span> <span class="o">=</span> <span class="n">country</span>
<span class="n">info</span><span class="p">[</span><span class="s2">&quot;reverse_dns&quot;</span><span class="p">]</span> <span class="o">=</span> <span class="n">reverse_dns</span>
<span class="k">if</span> <span class="n">reverse_dns</span> <span class="ow">is</span> <span class="ow">not</span> <span class="kc">None</span><span class="p">:</span>
<span class="n">base_domain</span> <span class="o">=</span> <span class="n">get_base_domain</span><span class="p">(</span><span class="n">reverse_dns</span><span class="p">)</span>
<span class="k">if</span> <span class="n">base_domain</span> <span class="ow">is</span> <span class="ow">not</span> <span class="kc">None</span><span class="p">:</span>
<span class="n">service</span> <span class="o">=</span> <span class="n">get_service_from_reverse_dns_base_domain</span><span class="p">(</span>
<span class="n">base_domain</span><span class="p">,</span>
<span class="n">offline</span><span class="o">=</span><span class="n">offline</span><span class="p">,</span>
<span class="n">local_file_path</span><span class="o">=</span><span class="n">reverse_dns_map_path</span><span class="p">,</span>
<span class="n">url</span><span class="o">=</span><span class="n">reverse_dns_map_url</span><span class="p">,</span>
<span class="n">always_use_local_file</span><span class="o">=</span><span class="n">always_use_local_files</span><span class="p">,</span>
<span class="n">reverse_dns_map</span><span class="o">=</span><span class="n">reverse_dns_map</span><span class="p">,</span>
<span class="p">)</span>
<span class="n">info</span><span class="p">[</span><span class="s2">&quot;base_domain&quot;</span><span class="p">]</span> <span class="o">=</span> <span class="n">base_domain</span>
<span class="n">info</span><span class="p">[</span><span class="s2">&quot;type&quot;</span><span class="p">]</span> <span class="o">=</span> <span class="n">service</span><span class="p">[</span><span class="s2">&quot;type&quot;</span><span class="p">]</span>
<span class="n">info</span><span class="p">[</span><span class="s2">&quot;name&quot;</span><span class="p">]</span> <span class="o">=</span> <span class="n">service</span><span class="p">[</span><span class="s2">&quot;name&quot;</span><span class="p">]</span>
<span class="k">if</span> <span class="n">cache</span> <span class="ow">is</span> <span class="ow">not</span> <span class="kc">None</span><span class="p">:</span>
<span class="n">cache</span><span class="p">[</span><span class="n">ip_address</span><span class="p">]</span> <span class="o">=</span> <span class="n">info</span>
<span class="n">logger</span><span class="o">.</span><span class="n">debug</span><span class="p">(</span><span class="sa">f</span><span class="s2">&quot;IP address </span><span class="si">{</span><span class="n">ip_address</span><span class="si">}</span><span class="s2"> added to cache&quot;</span><span class="p">)</span>
<span class="k">else</span><span class="p">:</span>
<span class="n">logger</span><span class="o">.</span><span class="n">debug</span><span class="p">(</span><span class="sa">f</span><span class="s2">&quot;IP address </span><span class="si">{</span><span class="n">ip_address</span><span class="si">}</span><span class="s2"> reverse_dns not found&quot;</span><span class="p">)</span>
<span class="k">return</span> <span class="n">info</span></div>
<span class="k">def</span><span class="w"> </span><span class="nf">parse_email_address</span><span class="p">(</span><span class="n">original_address</span><span class="p">:</span> <span class="nb">str</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="nb">dict</span><span class="p">[</span><span class="nb">str</span><span class="p">,</span> <span class="n">Optional</span><span class="p">[</span><span class="nb">str</span><span class="p">]]:</span>
<span class="k">if</span> <span class="n">original_address</span><span class="p">[</span><span class="mi">0</span><span class="p">]</span> <span class="o">==</span> <span class="s2">&quot;&quot;</span><span class="p">:</span>
<span class="n">display_name</span> <span class="o">=</span> <span class="kc">None</span>
<span class="k">else</span><span class="p">:</span>
<span class="n">display_name</span> <span class="o">=</span> <span class="n">original_address</span><span class="p">[</span><span class="mi">0</span><span class="p">]</span>
<span class="n">address</span> <span class="o">=</span> <span class="n">original_address</span><span class="p">[</span><span class="mi">1</span><span class="p">]</span>
<span class="n">address_parts</span> <span class="o">=</span> <span class="n">address</span><span class="o">.</span><span class="n">split</span><span class="p">(</span><span class="s2">&quot;@&quot;</span><span class="p">)</span>
<span class="n">local</span> <span class="o">=</span> <span class="kc">None</span>
<span class="n">domain</span> <span class="o">=</span> <span class="kc">None</span>
<span class="k">if</span> <span class="nb">len</span><span class="p">(</span><span class="n">address_parts</span><span class="p">)</span> <span class="o">&gt;</span> <span class="mi">1</span><span class="p">:</span>
<span class="n">local</span> <span class="o">=</span> <span class="n">address_parts</span><span class="p">[</span><span class="mi">0</span><span class="p">]</span><span class="o">.</span><span class="n">lower</span><span class="p">()</span>
<span class="n">domain</span> <span class="o">=</span> <span class="n">address_parts</span><span class="p">[</span><span class="o">-</span><span class="mi">1</span><span class="p">]</span><span class="o">.</span><span class="n">lower</span><span class="p">()</span>
<span class="k">return</span> <span class="p">{</span>
<span class="s2">&quot;display_name&quot;</span><span class="p">:</span> <span class="n">display_name</span><span class="p">,</span>
<span class="s2">&quot;address&quot;</span><span class="p">:</span> <span class="n">address</span><span class="p">,</span>
<span class="s2">&quot;local&quot;</span><span class="p">:</span> <span class="n">local</span><span class="p">,</span>
<span class="s2">&quot;domain&quot;</span><span class="p">:</span> <span class="n">domain</span><span class="p">,</span>
<span class="p">}</span>
<div class="viewcode-block" id="get_filename_safe_string">
<a class="viewcode-back" href="../../api.html#parsedmarc.utils.get_filename_safe_string">[docs]</a>
<span class="k">def</span><span class="w"> </span><span class="nf">get_filename_safe_string</span><span class="p">(</span><span class="n">string</span><span class="p">:</span> <span class="nb">str</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="nb">str</span><span class="p">:</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Converts a string to a string that is safe for a filename</span>
<span class="sd"> Args:</span>
<span class="sd"> string (str): A string to make safe for a filename</span>
<span class="sd"> Returns:</span>
<span class="sd"> str: A string safe for a filename</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="n">invalid_filename_chars</span> <span class="o">=</span> <span class="p">[</span><span class="s2">&quot;</span><span class="se">\\</span><span class="s2">&quot;</span><span class="p">,</span> <span class="s2">&quot;/&quot;</span><span class="p">,</span> <span class="s2">&quot;:&quot;</span><span class="p">,</span> <span class="s1">&#39;&quot;&#39;</span><span class="p">,</span> <span class="s2">&quot;*&quot;</span><span class="p">,</span> <span class="s2">&quot;?&quot;</span><span class="p">,</span> <span class="s2">&quot;|&quot;</span><span class="p">,</span> <span class="s2">&quot;</span><span class="se">\n</span><span class="s2">&quot;</span><span class="p">,</span> <span class="s2">&quot;</span><span class="se">\r</span><span class="s2">&quot;</span><span class="p">]</span>
<span class="k">if</span> <span class="n">string</span> <span class="ow">is</span> <span class="kc">None</span><span class="p">:</span>
<span class="n">string</span> <span class="o">=</span> <span class="s2">&quot;None&quot;</span>
<span class="k">for</span> <span class="n">char</span> <span class="ow">in</span> <span class="n">invalid_filename_chars</span><span class="p">:</span>
<span class="n">string</span> <span class="o">=</span> <span class="n">string</span><span class="o">.</span><span class="n">replace</span><span class="p">(</span><span class="n">char</span><span class="p">,</span> <span class="s2">&quot;&quot;</span><span class="p">)</span>
<span class="n">string</span> <span class="o">=</span> <span class="n">string</span><span class="o">.</span><span class="n">rstrip</span><span class="p">(</span><span class="s2">&quot;.&quot;</span><span class="p">)</span>
<span class="n">string</span> <span class="o">=</span> <span class="p">(</span><span class="n">string</span><span class="p">[:</span><span class="mi">100</span><span class="p">])</span> <span class="k">if</span> <span class="nb">len</span><span class="p">(</span><span class="n">string</span><span class="p">)</span> <span class="o">&gt;</span> <span class="mi">100</span> <span class="k">else</span> <span class="n">string</span>
<span class="k">return</span> <span class="n">string</span></div>
<div class="viewcode-block" id="is_mbox">
<a class="viewcode-back" href="../../api.html#parsedmarc.utils.is_mbox">[docs]</a>
<span class="k">def</span><span class="w"> </span><span class="nf">is_mbox</span><span class="p">(</span><span class="n">path</span><span class="p">:</span> <span class="nb">str</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="nb">bool</span><span class="p">:</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Checks if the given content is an MBOX mailbox file</span>
<span class="sd"> Args:</span>
<span class="sd"> path: Content to check</span>
<span class="sd"> Returns:</span>
<span class="sd"> bool: A flag that indicates if the file is an MBOX mailbox file</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="n">_is_mbox</span> <span class="o">=</span> <span class="kc">False</span>
<span class="k">try</span><span class="p">:</span>
<span class="n">mbox</span> <span class="o">=</span> <span class="n">mailbox</span><span class="o">.</span><span class="n">mbox</span><span class="p">(</span><span class="n">path</span><span class="p">)</span>
<span class="k">if</span> <span class="nb">len</span><span class="p">(</span><span class="n">mbox</span><span class="o">.</span><span class="n">keys</span><span class="p">())</span> <span class="o">&gt;</span> <span class="mi">0</span><span class="p">:</span>
<span class="n">_is_mbox</span> <span class="o">=</span> <span class="kc">True</span>
<span class="k">except</span> <span class="ne">Exception</span> <span class="k">as</span> <span class="n">e</span><span class="p">:</span>
<span class="n">logger</span><span class="o">.</span><span class="n">debug</span><span class="p">(</span><span class="s2">&quot;Error checking for MBOX file: </span><span class="si">{0}</span><span class="s2">&quot;</span><span class="o">.</span><span class="n">format</span><span class="p">(</span><span class="n">e</span><span class="o">.</span><span class="fm">__str__</span><span class="p">()))</span>
<span class="k">return</span> <span class="n">_is_mbox</span></div>
<div class="viewcode-block" id="is_outlook_msg">
<a class="viewcode-back" href="../../api.html#parsedmarc.utils.is_outlook_msg">[docs]</a>
<span class="k">def</span><span class="w"> </span><span class="nf">is_outlook_msg</span><span class="p">(</span><span class="n">content</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="nb">bool</span><span class="p">:</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Checks if the given content is an Outlook msg OLE/MSG file</span>
<span class="sd"> Args:</span>
<span class="sd"> content: Content to check</span>
<span class="sd"> Returns:</span>
<span class="sd"> bool: A flag that indicates if the file is an Outlook MSG file</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="k">return</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">content</span><span class="p">,</span> <span class="nb">bytes</span><span class="p">)</span> <span class="ow">and</span> <span class="n">content</span><span class="o">.</span><span class="n">startswith</span><span class="p">(</span>
<span class="sa">b</span><span class="s2">&quot;</span><span class="se">\xd0\xcf\x11\xe0\xa1\xb1\x1a\xe1</span><span class="s2">&quot;</span>
<span class="p">)</span></div>
<div class="viewcode-block" id="convert_outlook_msg">
<a class="viewcode-back" href="../../api.html#parsedmarc.utils.convert_outlook_msg">[docs]</a>
<span class="k">def</span><span class="w"> </span><span class="nf">convert_outlook_msg</span><span class="p">(</span><span class="n">msg_bytes</span><span class="p">:</span> <span class="nb">bytes</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="nb">bytes</span><span class="p">:</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Uses the ``msgconvert`` Perl utility to convert an Outlook MS file to</span>
<span class="sd"> standard RFC 822 format</span>
<span class="sd"> Args:</span>
<span class="sd"> msg_bytes (bytes): the content of the .msg file</span>
<span class="sd"> Returns:</span>
<span class="sd"> A RFC 822 bytes payload</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="k">if</span> <span class="ow">not</span> <span class="n">is_outlook_msg</span><span class="p">(</span><span class="n">msg_bytes</span><span class="p">):</span>
<span class="k">raise</span> <span class="ne">ValueError</span><span class="p">(</span><span class="s2">&quot;The supplied bytes are not an Outlook MSG file&quot;</span><span class="p">)</span>
<span class="n">orig_dir</span> <span class="o">=</span> <span class="n">os</span><span class="o">.</span><span class="n">getcwd</span><span class="p">()</span>
<span class="n">tmp_dir</span> <span class="o">=</span> <span class="n">tempfile</span><span class="o">.</span><span class="n">mkdtemp</span><span class="p">()</span>
<span class="n">os</span><span class="o">.</span><span class="n">chdir</span><span class="p">(</span><span class="n">tmp_dir</span><span class="p">)</span>
<span class="k">with</span> <span class="nb">open</span><span class="p">(</span><span class="s2">&quot;sample.msg&quot;</span><span class="p">,</span> <span class="s2">&quot;wb&quot;</span><span class="p">)</span> <span class="k">as</span> <span class="n">msg_file</span><span class="p">:</span>
<span class="n">msg_file</span><span class="o">.</span><span class="n">write</span><span class="p">(</span><span class="n">msg_bytes</span><span class="p">)</span>
<span class="k">try</span><span class="p">:</span>
<span class="n">subprocess</span><span class="o">.</span><span class="n">check_call</span><span class="p">(</span>
<span class="p">[</span><span class="s2">&quot;msgconvert&quot;</span><span class="p">,</span> <span class="s2">&quot;sample.msg&quot;</span><span class="p">],</span> <span class="n">stdout</span><span class="o">=</span><span class="n">null_file</span><span class="p">,</span> <span class="n">stderr</span><span class="o">=</span><span class="n">null_file</span>
<span class="p">)</span>
<span class="n">eml_path</span> <span class="o">=</span> <span class="s2">&quot;sample.eml&quot;</span>
<span class="k">with</span> <span class="nb">open</span><span class="p">(</span><span class="n">eml_path</span><span class="p">,</span> <span class="s2">&quot;rb&quot;</span><span class="p">)</span> <span class="k">as</span> <span class="n">eml_file</span><span class="p">:</span>
<span class="n">rfc822</span> <span class="o">=</span> <span class="n">eml_file</span><span class="o">.</span><span class="n">read</span><span class="p">()</span>
<span class="k">except</span> <span class="ne">FileNotFoundError</span><span class="p">:</span>
<span class="k">raise</span> <span class="n">EmailParserError</span><span class="p">(</span>
<span class="s2">&quot;Failed to convert Outlook MSG: msgconvert utility not found&quot;</span>
<span class="p">)</span>
<span class="k">finally</span><span class="p">:</span>
<span class="n">os</span><span class="o">.</span><span class="n">chdir</span><span class="p">(</span><span class="n">orig_dir</span><span class="p">)</span>
<span class="n">shutil</span><span class="o">.</span><span class="n">rmtree</span><span class="p">(</span><span class="n">tmp_dir</span><span class="p">)</span>
<span class="k">return</span> <span class="n">rfc822</span></div>
<div class="viewcode-block" id="parse_email">
<a class="viewcode-back" href="../../api.html#parsedmarc.utils.parse_email">[docs]</a>
<span class="k">def</span><span class="w"> </span><span class="nf">parse_email</span><span class="p">(</span>
<span class="n">data</span><span class="p">:</span> <span class="n">Union</span><span class="p">[</span><span class="nb">bytes</span><span class="p">,</span> <span class="nb">str</span><span class="p">],</span> <span class="o">*</span><span class="p">,</span> <span class="n">strip_attachment_payloads</span><span class="p">:</span> <span class="nb">bool</span> <span class="o">=</span> <span class="kc">False</span>
<span class="p">)</span> <span class="o">-&gt;</span> <span class="nb">dict</span><span class="p">:</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> A simplified email parser</span>
<span class="sd"> Args:</span>
<span class="sd"> data: The RFC 822 message string, or MSG binary</span>
<span class="sd"> strip_attachment_payloads (bool): Remove attachment payloads</span>
<span class="sd"> Returns:</span>
<span class="sd"> dict: Parsed email data</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="k">if</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">data</span><span class="p">,</span> <span class="nb">bytes</span><span class="p">):</span>
<span class="k">if</span> <span class="n">is_outlook_msg</span><span class="p">(</span><span class="n">data</span><span class="p">):</span>
<span class="n">data</span> <span class="o">=</span> <span class="n">convert_outlook_msg</span><span class="p">(</span><span class="n">data</span><span class="p">)</span>
<span class="n">data</span> <span class="o">=</span> <span class="n">data</span><span class="o">.</span><span class="n">decode</span><span class="p">(</span><span class="s2">&quot;utf-8&quot;</span><span class="p">,</span> <span class="n">errors</span><span class="o">=</span><span class="s2">&quot;replace&quot;</span><span class="p">)</span>
<span class="n">parsed_email</span> <span class="o">=</span> <span class="n">mailparser</span><span class="o">.</span><span class="n">parse_from_string</span><span class="p">(</span><span class="n">data</span><span class="p">)</span>
<span class="n">headers</span> <span class="o">=</span> <span class="n">json</span><span class="o">.</span><span class="n">loads</span><span class="p">(</span><span class="n">parsed_email</span><span class="o">.</span><span class="n">headers_json</span><span class="p">)</span><span class="o">.</span><span class="n">copy</span><span class="p">()</span>
<span class="n">parsed_email</span> <span class="o">=</span> <span class="n">json</span><span class="o">.</span><span class="n">loads</span><span class="p">(</span><span class="n">parsed_email</span><span class="o">.</span><span class="n">mail_json</span><span class="p">)</span><span class="o">.</span><span class="n">copy</span><span class="p">()</span>
<span class="n">parsed_email</span><span class="p">[</span><span class="s2">&quot;headers&quot;</span><span class="p">]</span> <span class="o">=</span> <span class="n">headers</span>
<span class="k">if</span> <span class="s2">&quot;received&quot;</span> <span class="ow">in</span> <span class="n">parsed_email</span><span class="p">:</span>
<span class="k">for</span> <span class="n">received</span> <span class="ow">in</span> <span class="n">parsed_email</span><span class="p">[</span><span class="s2">&quot;received&quot;</span><span class="p">]:</span>
<span class="k">if</span> <span class="s2">&quot;date_utc&quot;</span> <span class="ow">in</span> <span class="n">received</span><span class="p">:</span>
<span class="k">if</span> <span class="n">received</span><span class="p">[</span><span class="s2">&quot;date_utc&quot;</span><span class="p">]</span> <span class="ow">is</span> <span class="kc">None</span><span class="p">:</span>
<span class="k">del</span> <span class="n">received</span><span class="p">[</span><span class="s2">&quot;date_utc&quot;</span><span class="p">]</span>
<span class="k">else</span><span class="p">:</span>
<span class="n">received</span><span class="p">[</span><span class="s2">&quot;date_utc&quot;</span><span class="p">]</span> <span class="o">=</span> <span class="n">received</span><span class="p">[</span><span class="s2">&quot;date_utc&quot;</span><span class="p">]</span><span class="o">.</span><span class="n">replace</span><span class="p">(</span><span class="s2">&quot;T&quot;</span><span class="p">,</span> <span class="s2">&quot; &quot;</span><span class="p">)</span>
<span class="k">if</span> <span class="s2">&quot;from&quot;</span> <span class="ow">not</span> <span class="ow">in</span> <span class="n">parsed_email</span><span class="p">:</span>
<span class="k">if</span> <span class="s2">&quot;From&quot;</span> <span class="ow">in</span> <span class="n">parsed_email</span><span class="p">[</span><span class="s2">&quot;headers&quot;</span><span class="p">]:</span>
<span class="n">parsed_email</span><span class="p">[</span><span class="s2">&quot;from&quot;</span><span class="p">]</span> <span class="o">=</span> <span class="n">parsed_email</span><span class="p">[</span><span class="s2">&quot;Headers&quot;</span><span class="p">][</span><span class="s2">&quot;From&quot;</span><span class="p">]</span>
<span class="k">else</span><span class="p">:</span>
<span class="n">parsed_email</span><span class="p">[</span><span class="s2">&quot;from&quot;</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span>
<span class="k">if</span> <span class="n">parsed_email</span><span class="p">[</span><span class="s2">&quot;from&quot;</span><span class="p">]</span> <span class="ow">is</span> <span class="ow">not</span> <span class="kc">None</span><span class="p">:</span>
<span class="n">parsed_email</span><span class="p">[</span><span class="s2">&quot;from&quot;</span><span class="p">]</span> <span class="o">=</span> <span class="n">parse_email_address</span><span class="p">(</span><span class="n">parsed_email</span><span class="p">[</span><span class="s2">&quot;from&quot;</span><span class="p">][</span><span class="mi">0</span><span class="p">])</span>
<span class="k">if</span> <span class="s2">&quot;date&quot;</span> <span class="ow">in</span> <span class="n">parsed_email</span><span class="p">:</span>
<span class="n">parsed_email</span><span class="p">[</span><span class="s2">&quot;date&quot;</span><span class="p">]</span> <span class="o">=</span> <span class="n">parsed_email</span><span class="p">[</span><span class="s2">&quot;date&quot;</span><span class="p">]</span><span class="o">.</span><span class="n">replace</span><span class="p">(</span><span class="s2">&quot;T&quot;</span><span class="p">,</span> <span class="s2">&quot; &quot;</span><span class="p">)</span>
<span class="k">else</span><span class="p">:</span>
<span class="n">parsed_email</span><span class="p">[</span><span class="s2">&quot;date&quot;</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span>
<span class="k">if</span> <span class="s2">&quot;reply_to&quot;</span> <span class="ow">in</span> <span class="n">parsed_email</span><span class="p">:</span>
<span class="n">parsed_email</span><span class="p">[</span><span class="s2">&quot;reply_to&quot;</span><span class="p">]</span> <span class="o">=</span> <span class="nb">list</span><span class="p">(</span>
<span class="nb">map</span><span class="p">(</span><span class="k">lambda</span> <span class="n">x</span><span class="p">:</span> <span class="n">parse_email_address</span><span class="p">(</span><span class="n">x</span><span class="p">),</span> <span class="n">parsed_email</span><span class="p">[</span><span class="s2">&quot;reply_to&quot;</span><span class="p">])</span>
<span class="p">)</span>
<span class="k">else</span><span class="p">:</span>
<span class="n">parsed_email</span><span class="p">[</span><span class="s2">&quot;reply_to&quot;</span><span class="p">]</span> <span class="o">=</span> <span class="p">[]</span>
<span class="k">if</span> <span class="s2">&quot;to&quot;</span> <span class="ow">in</span> <span class="n">parsed_email</span><span class="p">:</span>
<span class="n">parsed_email</span><span class="p">[</span><span class="s2">&quot;to&quot;</span><span class="p">]</span> <span class="o">=</span> <span class="nb">list</span><span class="p">(</span>
<span class="nb">map</span><span class="p">(</span><span class="k">lambda</span> <span class="n">x</span><span class="p">:</span> <span class="n">parse_email_address</span><span class="p">(</span><span class="n">x</span><span class="p">),</span> <span class="n">parsed_email</span><span class="p">[</span><span class="s2">&quot;to&quot;</span><span class="p">])</span>
<span class="p">)</span>
<span class="k">else</span><span class="p">:</span>
<span class="n">parsed_email</span><span class="p">[</span><span class="s2">&quot;to&quot;</span><span class="p">]</span> <span class="o">=</span> <span class="p">[]</span>
<span class="k">if</span> <span class="s2">&quot;cc&quot;</span> <span class="ow">in</span> <span class="n">parsed_email</span><span class="p">:</span>
<span class="n">parsed_email</span><span class="p">[</span><span class="s2">&quot;cc&quot;</span><span class="p">]</span> <span class="o">=</span> <span class="nb">list</span><span class="p">(</span>
<span class="nb">map</span><span class="p">(</span><span class="k">lambda</span> <span class="n">x</span><span class="p">:</span> <span class="n">parse_email_address</span><span class="p">(</span><span class="n">x</span><span class="p">),</span> <span class="n">parsed_email</span><span class="p">[</span><span class="s2">&quot;cc&quot;</span><span class="p">])</span>
<span class="p">)</span>
<span class="k">else</span><span class="p">:</span>
<span class="n">parsed_email</span><span class="p">[</span><span class="s2">&quot;cc&quot;</span><span class="p">]</span> <span class="o">=</span> <span class="p">[]</span>
<span class="k">if</span> <span class="s2">&quot;bcc&quot;</span> <span class="ow">in</span> <span class="n">parsed_email</span><span class="p">:</span>
<span class="n">parsed_email</span><span class="p">[</span><span class="s2">&quot;bcc&quot;</span><span class="p">]</span> <span class="o">=</span> <span class="nb">list</span><span class="p">(</span>
<span class="nb">map</span><span class="p">(</span><span class="k">lambda</span> <span class="n">x</span><span class="p">:</span> <span class="n">parse_email_address</span><span class="p">(</span><span class="n">x</span><span class="p">),</span> <span class="n">parsed_email</span><span class="p">[</span><span class="s2">&quot;bcc&quot;</span><span class="p">])</span>
<span class="p">)</span>
<span class="k">else</span><span class="p">:</span>
<span class="n">parsed_email</span><span class="p">[</span><span class="s2">&quot;bcc&quot;</span><span class="p">]</span> <span class="o">=</span> <span class="p">[]</span>
<span class="k">if</span> <span class="s2">&quot;delivered_to&quot;</span> <span class="ow">in</span> <span class="n">parsed_email</span><span class="p">:</span>
<span class="n">parsed_email</span><span class="p">[</span><span class="s2">&quot;delivered_to&quot;</span><span class="p">]</span> <span class="o">=</span> <span class="nb">list</span><span class="p">(</span>
<span class="nb">map</span><span class="p">(</span><span class="k">lambda</span> <span class="n">x</span><span class="p">:</span> <span class="n">parse_email_address</span><span class="p">(</span><span class="n">x</span><span class="p">),</span> <span class="n">parsed_email</span><span class="p">[</span><span class="s2">&quot;delivered_to&quot;</span><span class="p">])</span>
<span class="p">)</span>
<span class="k">if</span> <span class="s2">&quot;attachments&quot;</span> <span class="ow">not</span> <span class="ow">in</span> <span class="n">parsed_email</span><span class="p">:</span>
<span class="n">parsed_email</span><span class="p">[</span><span class="s2">&quot;attachments&quot;</span><span class="p">]</span> <span class="o">=</span> <span class="p">[]</span>
<span class="k">else</span><span class="p">:</span>
<span class="k">for</span> <span class="n">attachment</span> <span class="ow">in</span> <span class="n">parsed_email</span><span class="p">[</span><span class="s2">&quot;attachments&quot;</span><span class="p">]:</span>
<span class="k">if</span> <span class="s2">&quot;payload&quot;</span> <span class="ow">in</span> <span class="n">attachment</span><span class="p">:</span>
<span class="n">payload</span> <span class="o">=</span> <span class="n">attachment</span><span class="p">[</span><span class="s2">&quot;payload&quot;</span><span class="p">]</span>
<span class="k">try</span><span class="p">:</span>
<span class="k">if</span> <span class="s2">&quot;content_transfer_encoding&quot;</span> <span class="ow">in</span> <span class="n">attachment</span><span class="p">:</span>
<span class="k">if</span> <span class="n">attachment</span><span class="p">[</span><span class="s2">&quot;content_transfer_encoding&quot;</span><span class="p">]</span> <span class="o">==</span> <span class="s2">&quot;base64&quot;</span><span class="p">:</span>
<span class="n">payload</span> <span class="o">=</span> <span class="n">decode_base64</span><span class="p">(</span><span class="n">payload</span><span class="p">)</span>
<span class="k">else</span><span class="p">:</span>
<span class="n">payload</span> <span class="o">=</span> <span class="nb">str</span><span class="o">.</span><span class="n">encode</span><span class="p">(</span><span class="n">payload</span><span class="p">)</span>
<span class="n">attachment</span><span class="p">[</span><span class="s2">&quot;sha256&quot;</span><span class="p">]</span> <span class="o">=</span> <span class="n">hashlib</span><span class="o">.</span><span class="n">sha256</span><span class="p">(</span><span class="n">payload</span><span class="p">)</span><span class="o">.</span><span class="n">hexdigest</span><span class="p">()</span>
<span class="k">except</span> <span class="ne">Exception</span> <span class="k">as</span> <span class="n">e</span><span class="p">:</span>
<span class="n">logger</span><span class="o">.</span><span class="n">debug</span><span class="p">(</span><span class="s2">&quot;Unable to decode attachment: </span><span class="si">{0}</span><span class="s2">&quot;</span><span class="o">.</span><span class="n">format</span><span class="p">(</span><span class="n">e</span><span class="o">.</span><span class="fm">__str__</span><span class="p">()))</span>
<span class="k">if</span> <span class="n">strip_attachment_payloads</span><span class="p">:</span>
<span class="k">for</span> <span class="n">attachment</span> <span class="ow">in</span> <span class="n">parsed_email</span><span class="p">[</span><span class="s2">&quot;attachments&quot;</span><span class="p">]:</span>
<span class="k">if</span> <span class="s2">&quot;payload&quot;</span> <span class="ow">in</span> <span class="n">attachment</span><span class="p">:</span>
<span class="k">del</span> <span class="n">attachment</span><span class="p">[</span><span class="s2">&quot;payload&quot;</span><span class="p">]</span>
<span class="k">if</span> <span class="s2">&quot;subject&quot;</span> <span class="ow">not</span> <span class="ow">in</span> <span class="n">parsed_email</span><span class="p">:</span>
<span class="n">parsed_email</span><span class="p">[</span><span class="s2">&quot;subject&quot;</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span>
<span class="n">parsed_email</span><span class="p">[</span><span class="s2">&quot;filename_safe_subject&quot;</span><span class="p">]</span> <span class="o">=</span> <span class="n">get_filename_safe_string</span><span class="p">(</span>
<span class="n">parsed_email</span><span class="p">[</span><span class="s2">&quot;subject&quot;</span><span class="p">]</span>
<span class="p">)</span>
<span class="k">if</span> <span class="s2">&quot;body&quot;</span> <span class="ow">not</span> <span class="ow">in</span> <span class="n">parsed_email</span><span class="p">:</span>
<span class="n">parsed_email</span><span class="p">[</span><span class="s2">&quot;body&quot;</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span>
<span class="k">return</span> <span class="n">parsed_email</span></div>
</pre></div>
</div>
</div>
<footer>
<hr/>
<div role="contentinfo">
<p>&#169; Copyright 2018 - 2025, Sean Whalen and contributors.</p>
</div>
Built with <a href="https://www.sphinx-doc.org/">Sphinx</a> using a
<a href="https://github.com/readthedocs/sphinx_rtd_theme">theme</a>
provided by <a href="https://readthedocs.org">Read the Docs</a>.
</footer>
</div>
</div>
</section>
</div>
<script>
jQuery(function () {
SphinxRtdTheme.Navigation.enable(true);
});
</script>
</body>
</html>