Files
parsedmarc/_modules/parsedmarc/utils.html
Sean Whalen 10ef6fc135 Update docs
2024-03-04 10:50:50 -05:00

672 lines
70 KiB
HTML

<!DOCTYPE html>
<html class="writer-html5" lang="en">
<head>
<meta charset="utf-8" />
<meta name="viewport" content="width=device-width, initial-scale=1.0" />
<title>parsedmarc.utils &mdash; parsedmarc 8.8.0 documentation</title>
<link rel="stylesheet" type="text/css" href="../../_static/pygments.css" />
<link rel="stylesheet" type="text/css" href="../../_static/css/theme.css" />
<!--[if lt IE 9]>
<script src="../../_static/js/html5shiv.min.js"></script>
<![endif]-->
<script data-url_root="../../" id="documentation_options" src="../../_static/documentation_options.js"></script>
<script src="../../_static/jquery.js"></script>
<script src="../../_static/underscore.js"></script>
<script src="../../_static/_sphinx_javascript_frameworks_compat.js"></script>
<script src="../../_static/doctools.js"></script>
<script src="../../_static/sphinx_highlight.js"></script>
<script src="../../_static/js/theme.js"></script>
<link rel="index" title="Index" href="../../genindex.html" />
<link rel="search" title="Search" href="../../search.html" />
</head>
<body class="wy-body-for-nav">
<div class="wy-grid-for-nav">
<nav data-toggle="wy-nav-shift" class="wy-nav-side">
<div class="wy-side-scroll">
<div class="wy-side-nav-search" >
<a href="../../index.html" class="icon icon-home">
parsedmarc
</a>
<div class="version">
8.8.0
</div>
<div role="search">
<form id="rtd-search-form" class="wy-form" action="../../search.html" method="get">
<input type="text" name="q" placeholder="Search docs" aria-label="Search docs" />
<input type="hidden" name="check_keywords" value="yes" />
<input type="hidden" name="area" value="default" />
</form>
</div>
</div><div class="wy-menu wy-menu-vertical" data-spy="affix" role="navigation" aria-label="Navigation menu">
<p class="caption" role="heading"><span class="caption-text">Contents</span></p>
<ul>
<li class="toctree-l1"><a class="reference internal" href="../../installation.html">Installation</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../usage.html">Using parsedmarc</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../output.html">Sample outputs</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../elasticsearch.html">Elasticsearch and Kibana</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../opensearch.html">OpenSearch and Grafana</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../kibana.html">Using the Kibana dashboards</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../splunk.html">Splunk</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../davmail.html">Accessing an inbox using OWA/EWS</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../dmarc.html">Understanding DMARC</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../contributing.html">Contributing to parsedmarc</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../api.html">API reference</a></li>
</ul>
</div>
</div>
</nav>
<section data-toggle="wy-nav-shift" class="wy-nav-content-wrap"><nav class="wy-nav-top" aria-label="Mobile navigation menu" >
<i data-toggle="wy-nav-top" class="fa fa-bars"></i>
<a href="../../index.html">parsedmarc</a>
</nav>
<div class="wy-nav-content">
<div class="rst-content">
<div role="navigation" aria-label="Page navigation">
<ul class="wy-breadcrumbs">
<li><a href="../../index.html" class="icon icon-home" aria-label="Home"></a></li>
<li class="breadcrumb-item"><a href="../index.html">Module code</a></li>
<li class="breadcrumb-item"><a href="../parsedmarc.html">parsedmarc</a></li>
<li class="breadcrumb-item active">parsedmarc.utils</li>
<li class="wy-breadcrumbs-aside">
</li>
</ul>
<hr/>
</div>
<div role="main" class="document" itemscope="itemscope" itemtype="http://schema.org/Article">
<div itemprop="articleBody">
<h1>Source code for parsedmarc.utils</h1><div class="highlight"><pre>
<span></span><span class="sd">&quot;&quot;&quot;Utility functions that might be useful for other projects&quot;&quot;&quot;</span>
<span class="kn">import</span> <span class="nn">logging</span>
<span class="kn">import</span> <span class="nn">os</span>
<span class="kn">from</span> <span class="nn">datetime</span> <span class="kn">import</span> <span class="n">datetime</span>
<span class="kn">from</span> <span class="nn">datetime</span> <span class="kn">import</span> <span class="n">timezone</span>
<span class="kn">from</span> <span class="nn">datetime</span> <span class="kn">import</span> <span class="n">timedelta</span>
<span class="kn">from</span> <span class="nn">collections</span> <span class="kn">import</span> <span class="n">OrderedDict</span>
<span class="kn">import</span> <span class="nn">tempfile</span>
<span class="kn">import</span> <span class="nn">subprocess</span>
<span class="kn">import</span> <span class="nn">shutil</span>
<span class="kn">import</span> <span class="nn">mailparser</span>
<span class="kn">import</span> <span class="nn">json</span>
<span class="kn">import</span> <span class="nn">hashlib</span>
<span class="kn">import</span> <span class="nn">base64</span>
<span class="kn">import</span> <span class="nn">atexit</span>
<span class="kn">import</span> <span class="nn">mailbox</span>
<span class="kn">import</span> <span class="nn">re</span>
<span class="k">try</span><span class="p">:</span>
<span class="kn">import</span> <span class="nn">importlib.resources</span> <span class="k">as</span> <span class="nn">pkg_resources</span>
<span class="k">except</span> <span class="ne">ImportError</span><span class="p">:</span>
<span class="c1"># Try backported to PY&lt;37 `importlib_resources`</span>
<span class="kn">import</span> <span class="nn">importlib_resources</span> <span class="k">as</span> <span class="nn">pkg_resources</span>
<span class="kn">from</span> <span class="nn">dateutil.parser</span> <span class="kn">import</span> <span class="n">parse</span> <span class="k">as</span> <span class="n">parse_date</span>
<span class="kn">import</span> <span class="nn">dns.reversename</span>
<span class="kn">import</span> <span class="nn">dns.resolver</span>
<span class="kn">import</span> <span class="nn">dns.exception</span>
<span class="kn">import</span> <span class="nn">geoip2.database</span>
<span class="kn">import</span> <span class="nn">geoip2.errors</span>
<span class="kn">import</span> <span class="nn">publicsuffixlist</span>
<span class="kn">from</span> <span class="nn">parsedmarc.log</span> <span class="kn">import</span> <span class="n">logger</span>
<span class="kn">import</span> <span class="nn">parsedmarc.resources.dbip</span>
<span class="n">parenthesis_regex</span> <span class="o">=</span> <span class="n">re</span><span class="o">.</span><span class="n">compile</span><span class="p">(</span><span class="sa">r</span><span class="s1">&#39;\s*\(.*\)\s*&#39;</span><span class="p">)</span>
<span class="n">null_file</span> <span class="o">=</span> <span class="nb">open</span><span class="p">(</span><span class="n">os</span><span class="o">.</span><span class="n">devnull</span><span class="p">,</span> <span class="s2">&quot;w&quot;</span><span class="p">)</span>
<span class="n">mailparser_logger</span> <span class="o">=</span> <span class="n">logging</span><span class="o">.</span><span class="n">getLogger</span><span class="p">(</span><span class="s2">&quot;mailparser&quot;</span><span class="p">)</span>
<span class="n">mailparser_logger</span><span class="o">.</span><span class="n">setLevel</span><span class="p">(</span><span class="n">logging</span><span class="o">.</span><span class="n">CRITICAL</span><span class="p">)</span>
<span class="n">tempdir</span> <span class="o">=</span> <span class="n">tempfile</span><span class="o">.</span><span class="n">mkdtemp</span><span class="p">()</span>
<span class="k">def</span> <span class="nf">_cleanup</span><span class="p">():</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;Remove temporary files&quot;&quot;&quot;</span>
<span class="n">shutil</span><span class="o">.</span><span class="n">rmtree</span><span class="p">(</span><span class="n">tempdir</span><span class="p">)</span>
<span class="n">atexit</span><span class="o">.</span><span class="n">register</span><span class="p">(</span><span class="n">_cleanup</span><span class="p">)</span>
<div class="viewcode-block" id="EmailParserError"><a class="viewcode-back" href="../../api.html#parsedmarc.utils.EmailParserError">[docs]</a><span class="k">class</span> <span class="nc">EmailParserError</span><span class="p">(</span><span class="ne">RuntimeError</span><span class="p">):</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;Raised when an error parsing the email occurs&quot;&quot;&quot;</span></div>
<div class="viewcode-block" id="DownloadError"><a class="viewcode-back" href="../../api.html#parsedmarc.utils.DownloadError">[docs]</a><span class="k">class</span> <span class="nc">DownloadError</span><span class="p">(</span><span class="ne">RuntimeError</span><span class="p">):</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;Raised when an error occurs when downloading a file&quot;&quot;&quot;</span></div>
<div class="viewcode-block" id="decode_base64"><a class="viewcode-back" href="../../api.html#parsedmarc.utils.decode_base64">[docs]</a><span class="k">def</span> <span class="nf">decode_base64</span><span class="p">(</span><span class="n">data</span><span class="p">):</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Decodes a base64 string, with padding being optional</span>
<span class="sd"> Args:</span>
<span class="sd"> data: A base64 encoded string</span>
<span class="sd"> Returns:</span>
<span class="sd"> bytes: The decoded bytes</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="n">data</span> <span class="o">=</span> <span class="nb">bytes</span><span class="p">(</span><span class="n">data</span><span class="p">,</span> <span class="n">encoding</span><span class="o">=</span><span class="s2">&quot;ascii&quot;</span><span class="p">)</span>
<span class="n">missing_padding</span> <span class="o">=</span> <span class="nb">len</span><span class="p">(</span><span class="n">data</span><span class="p">)</span> <span class="o">%</span> <span class="mi">4</span>
<span class="k">if</span> <span class="n">missing_padding</span> <span class="o">!=</span> <span class="mi">0</span><span class="p">:</span>
<span class="n">data</span> <span class="o">+=</span> <span class="sa">b</span><span class="s1">&#39;=&#39;</span> <span class="o">*</span> <span class="p">(</span><span class="mi">4</span> <span class="o">-</span> <span class="n">missing_padding</span><span class="p">)</span>
<span class="k">return</span> <span class="n">base64</span><span class="o">.</span><span class="n">b64decode</span><span class="p">(</span><span class="n">data</span><span class="p">)</span></div>
<div class="viewcode-block" id="get_base_domain"><a class="viewcode-back" href="../../api.html#parsedmarc.utils.get_base_domain">[docs]</a><span class="k">def</span> <span class="nf">get_base_domain</span><span class="p">(</span><span class="n">domain</span><span class="p">):</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Gets the base domain name for the given domain</span>
<span class="sd"> .. note::</span>
<span class="sd"> Results are based on a list of public domain suffixes at</span>
<span class="sd"> https://publicsuffix.org/list/public_suffix_list.dat.</span>
<span class="sd"> Args:</span>
<span class="sd"> domain (str): A domain or subdomain</span>
<span class="sd"> Returns:</span>
<span class="sd"> str: The base domain of the given domain</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="n">psl</span> <span class="o">=</span> <span class="n">publicsuffixlist</span><span class="o">.</span><span class="n">PublicSuffixList</span><span class="p">()</span>
<span class="k">return</span> <span class="n">psl</span><span class="o">.</span><span class="n">privatesuffix</span><span class="p">(</span><span class="n">domain</span><span class="p">)</span></div>
<div class="viewcode-block" id="query_dns"><a class="viewcode-back" href="../../api.html#parsedmarc.utils.query_dns">[docs]</a><span class="k">def</span> <span class="nf">query_dns</span><span class="p">(</span><span class="n">domain</span><span class="p">,</span> <span class="n">record_type</span><span class="p">,</span> <span class="n">cache</span><span class="o">=</span><span class="kc">None</span><span class="p">,</span> <span class="n">nameservers</span><span class="o">=</span><span class="kc">None</span><span class="p">,</span> <span class="n">timeout</span><span class="o">=</span><span class="mf">2.0</span><span class="p">):</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Queries DNS</span>
<span class="sd"> Args:</span>
<span class="sd"> domain (str): The domain or subdomain to query about</span>
<span class="sd"> record_type (str): The record type to query for</span>
<span class="sd"> cache (ExpiringDict): Cache storage</span>
<span class="sd"> nameservers (list): A list of one or more nameservers to use</span>
<span class="sd"> (Cloudflare&#39;s public DNS resolvers by default)</span>
<span class="sd"> timeout (float): Sets the DNS timeout in seconds</span>
<span class="sd"> Returns:</span>
<span class="sd"> list: A list of answers</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="n">domain</span> <span class="o">=</span> <span class="nb">str</span><span class="p">(</span><span class="n">domain</span><span class="p">)</span><span class="o">.</span><span class="n">lower</span><span class="p">()</span>
<span class="n">record_type</span> <span class="o">=</span> <span class="n">record_type</span><span class="o">.</span><span class="n">upper</span><span class="p">()</span>
<span class="n">cache_key</span> <span class="o">=</span> <span class="s2">&quot;</span><span class="si">{0}</span><span class="s2">_</span><span class="si">{1}</span><span class="s2">&quot;</span><span class="o">.</span><span class="n">format</span><span class="p">(</span><span class="n">domain</span><span class="p">,</span> <span class="n">record_type</span><span class="p">)</span>
<span class="k">if</span> <span class="n">cache</span><span class="p">:</span>
<span class="n">records</span> <span class="o">=</span> <span class="n">cache</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="n">cache_key</span><span class="p">,</span> <span class="kc">None</span><span class="p">)</span>
<span class="k">if</span> <span class="n">records</span><span class="p">:</span>
<span class="k">return</span> <span class="n">records</span>
<span class="n">resolver</span> <span class="o">=</span> <span class="n">dns</span><span class="o">.</span><span class="n">resolver</span><span class="o">.</span><span class="n">Resolver</span><span class="p">()</span>
<span class="n">timeout</span> <span class="o">=</span> <span class="nb">float</span><span class="p">(</span><span class="n">timeout</span><span class="p">)</span>
<span class="k">if</span> <span class="n">nameservers</span> <span class="ow">is</span> <span class="kc">None</span><span class="p">:</span>
<span class="n">nameservers</span> <span class="o">=</span> <span class="p">[</span><span class="s2">&quot;1.1.1.1&quot;</span><span class="p">,</span> <span class="s2">&quot;1.0.0.1&quot;</span><span class="p">,</span>
<span class="s2">&quot;2606:4700:4700::1111&quot;</span><span class="p">,</span> <span class="s2">&quot;2606:4700:4700::1001&quot;</span><span class="p">,</span>
<span class="p">]</span>
<span class="n">resolver</span><span class="o">.</span><span class="n">nameservers</span> <span class="o">=</span> <span class="n">nameservers</span>
<span class="n">resolver</span><span class="o">.</span><span class="n">timeout</span> <span class="o">=</span> <span class="n">timeout</span>
<span class="n">resolver</span><span class="o">.</span><span class="n">lifetime</span> <span class="o">=</span> <span class="n">timeout</span>
<span class="k">if</span> <span class="n">record_type</span> <span class="o">==</span> <span class="s2">&quot;TXT&quot;</span><span class="p">:</span>
<span class="n">resource_records</span> <span class="o">=</span> <span class="nb">list</span><span class="p">(</span><span class="nb">map</span><span class="p">(</span>
<span class="k">lambda</span> <span class="n">r</span><span class="p">:</span> <span class="n">r</span><span class="o">.</span><span class="n">strings</span><span class="p">,</span>
<span class="n">resolver</span><span class="o">.</span><span class="n">resolve</span><span class="p">(</span><span class="n">domain</span><span class="p">,</span> <span class="n">record_type</span><span class="p">,</span> <span class="n">lifetime</span><span class="o">=</span><span class="n">timeout</span><span class="p">)))</span>
<span class="n">_resource_record</span> <span class="o">=</span> <span class="p">[</span>
<span class="n">resource_record</span><span class="p">[</span><span class="mi">0</span><span class="p">][:</span><span class="mi">0</span><span class="p">]</span><span class="o">.</span><span class="n">join</span><span class="p">(</span><span class="n">resource_record</span><span class="p">)</span>
<span class="k">for</span> <span class="n">resource_record</span> <span class="ow">in</span> <span class="n">resource_records</span> <span class="k">if</span> <span class="n">resource_record</span><span class="p">]</span>
<span class="n">records</span> <span class="o">=</span> <span class="p">[</span><span class="n">r</span><span class="o">.</span><span class="n">decode</span><span class="p">()</span> <span class="k">for</span> <span class="n">r</span> <span class="ow">in</span> <span class="n">_resource_record</span><span class="p">]</span>
<span class="k">else</span><span class="p">:</span>
<span class="n">records</span> <span class="o">=</span> <span class="nb">list</span><span class="p">(</span><span class="nb">map</span><span class="p">(</span>
<span class="k">lambda</span> <span class="n">r</span><span class="p">:</span> <span class="n">r</span><span class="o">.</span><span class="n">to_text</span><span class="p">()</span><span class="o">.</span><span class="n">replace</span><span class="p">(</span><span class="s1">&#39;&quot;&#39;</span><span class="p">,</span> <span class="s1">&#39;&#39;</span><span class="p">)</span><span class="o">.</span><span class="n">rstrip</span><span class="p">(</span><span class="s2">&quot;.&quot;</span><span class="p">),</span>
<span class="n">resolver</span><span class="o">.</span><span class="n">resolve</span><span class="p">(</span><span class="n">domain</span><span class="p">,</span> <span class="n">record_type</span><span class="p">,</span> <span class="n">lifetime</span><span class="o">=</span><span class="n">timeout</span><span class="p">)))</span>
<span class="k">if</span> <span class="n">cache</span><span class="p">:</span>
<span class="n">cache</span><span class="p">[</span><span class="n">cache_key</span><span class="p">]</span> <span class="o">=</span> <span class="n">records</span>
<span class="k">return</span> <span class="n">records</span></div>
<div class="viewcode-block" id="get_reverse_dns"><a class="viewcode-back" href="../../api.html#parsedmarc.utils.get_reverse_dns">[docs]</a><span class="k">def</span> <span class="nf">get_reverse_dns</span><span class="p">(</span><span class="n">ip_address</span><span class="p">,</span> <span class="n">cache</span><span class="o">=</span><span class="kc">None</span><span class="p">,</span> <span class="n">nameservers</span><span class="o">=</span><span class="kc">None</span><span class="p">,</span> <span class="n">timeout</span><span class="o">=</span><span class="mf">2.0</span><span class="p">):</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Resolves an IP address to a hostname using a reverse DNS query</span>
<span class="sd"> Args:</span>
<span class="sd"> ip_address (str): The IP address to resolve</span>
<span class="sd"> cache (ExpiringDict): Cache storage</span>
<span class="sd"> nameservers (list): A list of one or more nameservers to use</span>
<span class="sd"> (Cloudflare&#39;s public DNS resolvers by default)</span>
<span class="sd"> timeout (float): Sets the DNS query timeout in seconds</span>
<span class="sd"> Returns:</span>
<span class="sd"> str: The reverse DNS hostname (if any)</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="n">hostname</span> <span class="o">=</span> <span class="kc">None</span>
<span class="k">try</span><span class="p">:</span>
<span class="n">address</span> <span class="o">=</span> <span class="n">dns</span><span class="o">.</span><span class="n">reversename</span><span class="o">.</span><span class="n">from_address</span><span class="p">(</span><span class="n">ip_address</span><span class="p">)</span>
<span class="n">hostname</span> <span class="o">=</span> <span class="n">query_dns</span><span class="p">(</span><span class="n">address</span><span class="p">,</span> <span class="s2">&quot;PTR&quot;</span><span class="p">,</span> <span class="n">cache</span><span class="o">=</span><span class="n">cache</span><span class="p">,</span>
<span class="n">nameservers</span><span class="o">=</span><span class="n">nameservers</span><span class="p">,</span>
<span class="n">timeout</span><span class="o">=</span><span class="n">timeout</span><span class="p">)[</span><span class="mi">0</span><span class="p">]</span>
<span class="k">except</span> <span class="n">dns</span><span class="o">.</span><span class="n">exception</span><span class="o">.</span><span class="n">DNSException</span><span class="p">:</span>
<span class="k">pass</span>
<span class="k">return</span> <span class="n">hostname</span></div>
<div class="viewcode-block" id="timestamp_to_datetime"><a class="viewcode-back" href="../../api.html#parsedmarc.utils.timestamp_to_datetime">[docs]</a><span class="k">def</span> <span class="nf">timestamp_to_datetime</span><span class="p">(</span><span class="n">timestamp</span><span class="p">):</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Converts a UNIX/DMARC timestamp to a Python ``datetime`` object</span>
<span class="sd"> Args:</span>
<span class="sd"> timestamp (int): The timestamp</span>
<span class="sd"> Returns:</span>
<span class="sd"> datetime: The converted timestamp as a Python ``datetime`` object</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="k">return</span> <span class="n">datetime</span><span class="o">.</span><span class="n">fromtimestamp</span><span class="p">(</span><span class="nb">int</span><span class="p">(</span><span class="n">timestamp</span><span class="p">))</span></div>
<div class="viewcode-block" id="timestamp_to_human"><a class="viewcode-back" href="../../api.html#parsedmarc.utils.timestamp_to_human">[docs]</a><span class="k">def</span> <span class="nf">timestamp_to_human</span><span class="p">(</span><span class="n">timestamp</span><span class="p">):</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Converts a UNIX/DMARC timestamp to a human-readable string</span>
<span class="sd"> Args:</span>
<span class="sd"> timestamp: The timestamp</span>
<span class="sd"> Returns:</span>
<span class="sd"> str: The converted timestamp in ``YYYY-MM-DD HH:MM:SS`` format</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="k">return</span> <span class="n">timestamp_to_datetime</span><span class="p">(</span><span class="n">timestamp</span><span class="p">)</span><span class="o">.</span><span class="n">strftime</span><span class="p">(</span><span class="s2">&quot;%Y-%m-</span><span class="si">%d</span><span class="s2"> %H:%M:%S&quot;</span><span class="p">)</span></div>
<div class="viewcode-block" id="human_timestamp_to_datetime"><a class="viewcode-back" href="../../api.html#parsedmarc.utils.human_timestamp_to_datetime">[docs]</a><span class="k">def</span> <span class="nf">human_timestamp_to_datetime</span><span class="p">(</span><span class="n">human_timestamp</span><span class="p">,</span> <span class="n">to_utc</span><span class="o">=</span><span class="kc">False</span><span class="p">):</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Converts a human-readable timestamp into a Python ``datetime`` object</span>
<span class="sd"> Args:</span>
<span class="sd"> human_timestamp (str): A timestamp string</span>
<span class="sd"> to_utc (bool): Convert the timestamp to UTC</span>
<span class="sd"> Returns:</span>
<span class="sd"> datetime: The converted timestamp</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="n">human_timestamp</span> <span class="o">=</span> <span class="n">human_timestamp</span><span class="o">.</span><span class="n">replace</span><span class="p">(</span><span class="s2">&quot;-0000&quot;</span><span class="p">,</span> <span class="s2">&quot;&quot;</span><span class="p">)</span>
<span class="n">human_timestamp</span> <span class="o">=</span> <span class="n">parenthesis_regex</span><span class="o">.</span><span class="n">sub</span><span class="p">(</span><span class="s2">&quot;&quot;</span><span class="p">,</span> <span class="n">human_timestamp</span><span class="p">)</span>
<span class="n">dt</span> <span class="o">=</span> <span class="n">parse_date</span><span class="p">(</span><span class="n">human_timestamp</span><span class="p">)</span>
<span class="k">return</span> <span class="n">dt</span><span class="o">.</span><span class="n">astimezone</span><span class="p">(</span><span class="n">timezone</span><span class="o">.</span><span class="n">utc</span><span class="p">)</span> <span class="k">if</span> <span class="n">to_utc</span> <span class="k">else</span> <span class="n">dt</span></div>
<div class="viewcode-block" id="human_timestamp_to_unix_timestamp"><a class="viewcode-back" href="../../api.html#parsedmarc.utils.human_timestamp_to_unix_timestamp">[docs]</a><span class="k">def</span> <span class="nf">human_timestamp_to_unix_timestamp</span><span class="p">(</span><span class="n">human_timestamp</span><span class="p">):</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Converts a human-readable timestamp into a UNIX timestamp</span>
<span class="sd"> Args:</span>
<span class="sd"> human_timestamp (str): A timestamp in `YYYY-MM-DD HH:MM:SS`` format</span>
<span class="sd"> Returns:</span>
<span class="sd"> float: The converted timestamp</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="n">human_timestamp</span> <span class="o">=</span> <span class="n">human_timestamp</span><span class="o">.</span><span class="n">replace</span><span class="p">(</span><span class="s2">&quot;T&quot;</span><span class="p">,</span> <span class="s2">&quot; &quot;</span><span class="p">)</span>
<span class="k">return</span> <span class="n">human_timestamp_to_datetime</span><span class="p">(</span><span class="n">human_timestamp</span><span class="p">)</span><span class="o">.</span><span class="n">timestamp</span><span class="p">()</span></div>
<div class="viewcode-block" id="get_ip_address_country"><a class="viewcode-back" href="../../api.html#parsedmarc.utils.get_ip_address_country">[docs]</a><span class="k">def</span> <span class="nf">get_ip_address_country</span><span class="p">(</span><span class="n">ip_address</span><span class="p">,</span> <span class="n">db_path</span><span class="o">=</span><span class="kc">None</span><span class="p">):</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Returns the ISO code for the country associated</span>
<span class="sd"> with the given IPv4 or IPv6 address</span>
<span class="sd"> Args:</span>
<span class="sd"> ip_address (str): The IP address to query for</span>
<span class="sd"> db_path (str): Path to a MMDB file from MaxMind or DBIP</span>
<span class="sd"> Returns:</span>
<span class="sd"> str: And ISO country code associated with the given IP address</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="n">db_paths</span> <span class="o">=</span> <span class="p">[</span>
<span class="s2">&quot;GeoLite2-Country.mmdb&quot;</span><span class="p">,</span>
<span class="s2">&quot;/usr/local/share/GeoIP/GeoLite2-Country.mmdb&quot;</span><span class="p">,</span>
<span class="s2">&quot;/usr/share/GeoIP/GeoLite2-Country.mmdb&quot;</span><span class="p">,</span>
<span class="s2">&quot;/var/lib/GeoIP/GeoLite2-Country.mmdb&quot;</span><span class="p">,</span>
<span class="s2">&quot;/var/local/lib/GeoIP/GeoLite2-Country.mmdb&quot;</span><span class="p">,</span>
<span class="s2">&quot;/usr/local/var/GeoIP/GeoLite2-Country.mmdb&quot;</span><span class="p">,</span>
<span class="s2">&quot;%SystemDrive%</span><span class="se">\\</span><span class="s2">ProgramData</span><span class="se">\\</span><span class="s2">MaxMind</span><span class="se">\\</span><span class="s2">GeoIPUpdate</span><span class="se">\\</span><span class="s2">GeoIP</span><span class="se">\\</span><span class="s2">&quot;</span>
<span class="s2">&quot;GeoLite2-Country.mmdb&quot;</span><span class="p">,</span>
<span class="s2">&quot;C:</span><span class="se">\\</span><span class="s2">GeoIP</span><span class="se">\\</span><span class="s2">GeoLite2-Country.mmdb&quot;</span><span class="p">,</span>
<span class="s2">&quot;dbip-country-lite.mmdb&quot;</span><span class="p">,</span>
<span class="s2">&quot;dbip-country.mmdb&quot;</span><span class="p">,</span>
<span class="p">]</span>
<span class="k">if</span> <span class="n">db_path</span> <span class="ow">is</span> <span class="ow">not</span> <span class="kc">None</span><span class="p">:</span>
<span class="k">if</span> <span class="n">os</span><span class="o">.</span><span class="n">path</span><span class="o">.</span><span class="n">isfile</span><span class="p">(</span><span class="n">db_path</span><span class="p">)</span> <span class="ow">is</span> <span class="kc">False</span><span class="p">:</span>
<span class="n">db_path</span> <span class="o">=</span> <span class="kc">None</span>
<span class="n">logger</span><span class="o">.</span><span class="n">warning</span><span class="p">(</span><span class="sa">f</span><span class="s2">&quot;No file exists at </span><span class="si">{</span><span class="n">db_path</span><span class="si">}</span><span class="s2">. Falling back to an &quot;</span>
<span class="s2">&quot;included copy of the IPDB IP to Country &quot;</span>
<span class="s2">&quot;Lite database.&quot;</span><span class="p">)</span>
<span class="k">if</span> <span class="n">db_path</span> <span class="ow">is</span> <span class="kc">None</span><span class="p">:</span>
<span class="k">for</span> <span class="n">system_path</span> <span class="ow">in</span> <span class="n">db_paths</span><span class="p">:</span>
<span class="k">if</span> <span class="n">os</span><span class="o">.</span><span class="n">path</span><span class="o">.</span><span class="n">exists</span><span class="p">(</span><span class="n">system_path</span><span class="p">):</span>
<span class="n">db_path</span> <span class="o">=</span> <span class="n">system_path</span>
<span class="k">break</span>
<span class="k">if</span> <span class="n">db_path</span> <span class="ow">is</span> <span class="kc">None</span><span class="p">:</span>
<span class="k">with</span> <span class="n">pkg_resources</span><span class="o">.</span><span class="n">path</span><span class="p">(</span><span class="n">parsedmarc</span><span class="o">.</span><span class="n">resources</span><span class="o">.</span><span class="n">dbip</span><span class="p">,</span>
<span class="s2">&quot;dbip-country-lite.mmdb&quot;</span><span class="p">)</span> <span class="k">as</span> <span class="n">path</span><span class="p">:</span>
<span class="n">db_path</span> <span class="o">=</span> <span class="n">path</span>
<span class="n">db_age</span> <span class="o">=</span> <span class="n">datetime</span><span class="o">.</span><span class="n">now</span><span class="p">()</span> <span class="o">-</span> <span class="n">datetime</span><span class="o">.</span><span class="n">fromtimestamp</span><span class="p">(</span>
<span class="n">os</span><span class="o">.</span><span class="n">stat</span><span class="p">(</span><span class="n">db_path</span><span class="p">)</span><span class="o">.</span><span class="n">st_mtime</span><span class="p">)</span>
<span class="k">if</span> <span class="n">db_age</span> <span class="o">&gt;</span> <span class="n">timedelta</span><span class="p">(</span><span class="n">days</span><span class="o">=</span><span class="mi">30</span><span class="p">):</span>
<span class="n">logger</span><span class="o">.</span><span class="n">warning</span><span class="p">(</span><span class="s2">&quot;IP database is more than a month old&quot;</span><span class="p">)</span>
<span class="n">db_reader</span> <span class="o">=</span> <span class="n">geoip2</span><span class="o">.</span><span class="n">database</span><span class="o">.</span><span class="n">Reader</span><span class="p">(</span><span class="n">db_path</span><span class="p">)</span>
<span class="n">country</span> <span class="o">=</span> <span class="kc">None</span>
<span class="k">try</span><span class="p">:</span>
<span class="n">country</span> <span class="o">=</span> <span class="n">db_reader</span><span class="o">.</span><span class="n">country</span><span class="p">(</span><span class="n">ip_address</span><span class="p">)</span><span class="o">.</span><span class="n">country</span><span class="o">.</span><span class="n">iso_code</span>
<span class="k">except</span> <span class="n">geoip2</span><span class="o">.</span><span class="n">errors</span><span class="o">.</span><span class="n">AddressNotFoundError</span><span class="p">:</span>
<span class="k">pass</span>
<span class="k">return</span> <span class="n">country</span></div>
<div class="viewcode-block" id="get_ip_address_info"><a class="viewcode-back" href="../../api.html#parsedmarc.utils.get_ip_address_info">[docs]</a><span class="k">def</span> <span class="nf">get_ip_address_info</span><span class="p">(</span><span class="n">ip_address</span><span class="p">,</span> <span class="n">ip_db_path</span><span class="o">=</span><span class="kc">None</span><span class="p">,</span> <span class="n">cache</span><span class="o">=</span><span class="kc">None</span><span class="p">,</span> <span class="n">offline</span><span class="o">=</span><span class="kc">False</span><span class="p">,</span>
<span class="n">nameservers</span><span class="o">=</span><span class="kc">None</span><span class="p">,</span> <span class="n">timeout</span><span class="o">=</span><span class="mf">2.0</span><span class="p">,</span> <span class="n">parallel</span><span class="o">=</span><span class="kc">False</span><span class="p">):</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Returns reverse DNS and country information for the given IP address</span>
<span class="sd"> Args:</span>
<span class="sd"> ip_address (str): The IP address to check</span>
<span class="sd"> ip_db_path (str): path to a MMDB file from MaxMind or DBIP</span>
<span class="sd"> cache (ExpiringDict): Cache storage</span>
<span class="sd"> offline (bool): Do not make online queries for geolocation or DNS</span>
<span class="sd"> nameservers (list): A list of one or more nameservers to use</span>
<span class="sd"> (Cloudflare&#39;s public DNS resolvers by default)</span>
<span class="sd"> timeout (float): Sets the DNS timeout in seconds</span>
<span class="sd"> parallel (bool): parallel processing</span>
<span class="sd"> Returns:</span>
<span class="sd"> OrderedDict: ``ip_address``, ``reverse_dns``</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="n">ip_address</span> <span class="o">=</span> <span class="n">ip_address</span><span class="o">.</span><span class="n">lower</span><span class="p">()</span>
<span class="k">if</span> <span class="n">cache</span><span class="p">:</span>
<span class="n">info</span> <span class="o">=</span> <span class="n">cache</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="n">ip_address</span><span class="p">,</span> <span class="kc">None</span><span class="p">)</span>
<span class="k">if</span> <span class="n">info</span><span class="p">:</span>
<span class="k">return</span> <span class="n">info</span>
<span class="n">info</span> <span class="o">=</span> <span class="n">OrderedDict</span><span class="p">()</span>
<span class="n">info</span><span class="p">[</span><span class="s2">&quot;ip_address&quot;</span><span class="p">]</span> <span class="o">=</span> <span class="n">ip_address</span>
<span class="k">if</span> <span class="n">offline</span><span class="p">:</span>
<span class="n">reverse_dns</span> <span class="o">=</span> <span class="kc">None</span>
<span class="k">else</span><span class="p">:</span>
<span class="n">reverse_dns</span> <span class="o">=</span> <span class="n">get_reverse_dns</span><span class="p">(</span><span class="n">ip_address</span><span class="p">,</span>
<span class="n">nameservers</span><span class="o">=</span><span class="n">nameservers</span><span class="p">,</span>
<span class="n">timeout</span><span class="o">=</span><span class="n">timeout</span><span class="p">)</span>
<span class="n">country</span> <span class="o">=</span> <span class="n">get_ip_address_country</span><span class="p">(</span><span class="n">ip_address</span><span class="p">,</span> <span class="n">db_path</span><span class="o">=</span><span class="n">ip_db_path</span><span class="p">)</span>
<span class="n">info</span><span class="p">[</span><span class="s2">&quot;country&quot;</span><span class="p">]</span> <span class="o">=</span> <span class="n">country</span>
<span class="n">info</span><span class="p">[</span><span class="s2">&quot;reverse_dns&quot;</span><span class="p">]</span> <span class="o">=</span> <span class="n">reverse_dns</span>
<span class="n">info</span><span class="p">[</span><span class="s2">&quot;base_domain&quot;</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span>
<span class="k">if</span> <span class="n">reverse_dns</span> <span class="ow">is</span> <span class="ow">not</span> <span class="kc">None</span><span class="p">:</span>
<span class="n">base_domain</span> <span class="o">=</span> <span class="n">get_base_domain</span><span class="p">(</span><span class="n">reverse_dns</span><span class="p">)</span>
<span class="n">info</span><span class="p">[</span><span class="s2">&quot;base_domain&quot;</span><span class="p">]</span> <span class="o">=</span> <span class="n">base_domain</span>
<span class="k">return</span> <span class="n">info</span></div>
<span class="k">def</span> <span class="nf">parse_email_address</span><span class="p">(</span><span class="n">original_address</span><span class="p">):</span>
<span class="k">if</span> <span class="n">original_address</span><span class="p">[</span><span class="mi">0</span><span class="p">]</span> <span class="o">==</span> <span class="s2">&quot;&quot;</span><span class="p">:</span>
<span class="n">display_name</span> <span class="o">=</span> <span class="kc">None</span>
<span class="k">else</span><span class="p">:</span>
<span class="n">display_name</span> <span class="o">=</span> <span class="n">original_address</span><span class="p">[</span><span class="mi">0</span><span class="p">]</span>
<span class="n">address</span> <span class="o">=</span> <span class="n">original_address</span><span class="p">[</span><span class="mi">1</span><span class="p">]</span>
<span class="n">address_parts</span> <span class="o">=</span> <span class="n">address</span><span class="o">.</span><span class="n">split</span><span class="p">(</span><span class="s2">&quot;@&quot;</span><span class="p">)</span>
<span class="n">local</span> <span class="o">=</span> <span class="kc">None</span>
<span class="n">domain</span> <span class="o">=</span> <span class="kc">None</span>
<span class="k">if</span> <span class="nb">len</span><span class="p">(</span><span class="n">address_parts</span><span class="p">)</span> <span class="o">&gt;</span> <span class="mi">1</span><span class="p">:</span>
<span class="n">local</span> <span class="o">=</span> <span class="n">address_parts</span><span class="p">[</span><span class="mi">0</span><span class="p">]</span><span class="o">.</span><span class="n">lower</span><span class="p">()</span>
<span class="n">domain</span> <span class="o">=</span> <span class="n">address_parts</span><span class="p">[</span><span class="o">-</span><span class="mi">1</span><span class="p">]</span><span class="o">.</span><span class="n">lower</span><span class="p">()</span>
<span class="k">return</span> <span class="n">OrderedDict</span><span class="p">([(</span><span class="s2">&quot;display_name&quot;</span><span class="p">,</span> <span class="n">display_name</span><span class="p">),</span>
<span class="p">(</span><span class="s2">&quot;address&quot;</span><span class="p">,</span> <span class="n">address</span><span class="p">),</span>
<span class="p">(</span><span class="s2">&quot;local&quot;</span><span class="p">,</span> <span class="n">local</span><span class="p">),</span>
<span class="p">(</span><span class="s2">&quot;domain&quot;</span><span class="p">,</span> <span class="n">domain</span><span class="p">)])</span>
<div class="viewcode-block" id="get_filename_safe_string"><a class="viewcode-back" href="../../api.html#parsedmarc.utils.get_filename_safe_string">[docs]</a><span class="k">def</span> <span class="nf">get_filename_safe_string</span><span class="p">(</span><span class="n">string</span><span class="p">):</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Converts a string to a string that is safe for a filename</span>
<span class="sd"> Args:</span>
<span class="sd"> string (str): A string to make safe for a filename</span>
<span class="sd"> Returns:</span>
<span class="sd"> str: A string safe for a filename</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="n">invalid_filename_chars</span> <span class="o">=</span> <span class="p">[</span><span class="s1">&#39;</span><span class="se">\\</span><span class="s1">&#39;</span><span class="p">,</span> <span class="s1">&#39;/&#39;</span><span class="p">,</span> <span class="s1">&#39;:&#39;</span><span class="p">,</span> <span class="s1">&#39;&quot;&#39;</span><span class="p">,</span> <span class="s1">&#39;*&#39;</span><span class="p">,</span> <span class="s1">&#39;?&#39;</span><span class="p">,</span> <span class="s1">&#39;|&#39;</span><span class="p">,</span> <span class="s1">&#39;</span><span class="se">\n</span><span class="s1">&#39;</span><span class="p">,</span>
<span class="s1">&#39;</span><span class="se">\r</span><span class="s1">&#39;</span><span class="p">]</span>
<span class="k">if</span> <span class="n">string</span> <span class="ow">is</span> <span class="kc">None</span><span class="p">:</span>
<span class="n">string</span> <span class="o">=</span> <span class="s2">&quot;None&quot;</span>
<span class="k">for</span> <span class="n">char</span> <span class="ow">in</span> <span class="n">invalid_filename_chars</span><span class="p">:</span>
<span class="n">string</span> <span class="o">=</span> <span class="n">string</span><span class="o">.</span><span class="n">replace</span><span class="p">(</span><span class="n">char</span><span class="p">,</span> <span class="s2">&quot;&quot;</span><span class="p">)</span>
<span class="n">string</span> <span class="o">=</span> <span class="n">string</span><span class="o">.</span><span class="n">rstrip</span><span class="p">(</span><span class="s2">&quot;.&quot;</span><span class="p">)</span>
<span class="n">string</span> <span class="o">=</span> <span class="p">(</span><span class="n">string</span><span class="p">[:</span><span class="mi">100</span><span class="p">])</span> <span class="k">if</span> <span class="nb">len</span><span class="p">(</span><span class="n">string</span><span class="p">)</span> <span class="o">&gt;</span> <span class="mi">100</span> <span class="k">else</span> <span class="n">string</span>
<span class="k">return</span> <span class="n">string</span></div>
<div class="viewcode-block" id="is_mbox"><a class="viewcode-back" href="../../api.html#parsedmarc.utils.is_mbox">[docs]</a><span class="k">def</span> <span class="nf">is_mbox</span><span class="p">(</span><span class="n">path</span><span class="p">):</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Checks if the given content is an MBOX mailbox file</span>
<span class="sd"> Args:</span>
<span class="sd"> path: Content to check</span>
<span class="sd"> Returns:</span>
<span class="sd"> bool: A flag that indicates if the file is an MBOX mailbox file</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="n">_is_mbox</span> <span class="o">=</span> <span class="kc">False</span>
<span class="k">try</span><span class="p">:</span>
<span class="n">mbox</span> <span class="o">=</span> <span class="n">mailbox</span><span class="o">.</span><span class="n">mbox</span><span class="p">(</span><span class="n">path</span><span class="p">)</span>
<span class="k">if</span> <span class="nb">len</span><span class="p">(</span><span class="n">mbox</span><span class="o">.</span><span class="n">keys</span><span class="p">())</span> <span class="o">&gt;</span> <span class="mi">0</span><span class="p">:</span>
<span class="n">_is_mbox</span> <span class="o">=</span> <span class="kc">True</span>
<span class="k">except</span> <span class="ne">Exception</span> <span class="k">as</span> <span class="n">e</span><span class="p">:</span>
<span class="n">logger</span><span class="o">.</span><span class="n">debug</span><span class="p">(</span><span class="s2">&quot;Error checking for MBOX file: </span><span class="si">{0}</span><span class="s2">&quot;</span><span class="o">.</span><span class="n">format</span><span class="p">(</span><span class="n">e</span><span class="o">.</span><span class="fm">__str__</span><span class="p">()))</span>
<span class="k">return</span> <span class="n">_is_mbox</span></div>
<div class="viewcode-block" id="is_outlook_msg"><a class="viewcode-back" href="../../api.html#parsedmarc.utils.is_outlook_msg">[docs]</a><span class="k">def</span> <span class="nf">is_outlook_msg</span><span class="p">(</span><span class="n">content</span><span class="p">):</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Checks if the given content is an Outlook msg OLE/MSG file</span>
<span class="sd"> Args:</span>
<span class="sd"> content: Content to check</span>
<span class="sd"> Returns:</span>
<span class="sd"> bool: A flag that indicates if the file is an Outlook MSG file</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="k">return</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">content</span><span class="p">,</span> <span class="nb">bytes</span><span class="p">)</span> <span class="ow">and</span> <span class="n">content</span><span class="o">.</span><span class="n">startswith</span><span class="p">(</span>
<span class="sa">b</span><span class="s2">&quot;</span><span class="se">\xD0\xCF\x11\xE0\xA1\xB1\x1A\xE1</span><span class="s2">&quot;</span><span class="p">)</span></div>
<div class="viewcode-block" id="convert_outlook_msg"><a class="viewcode-back" href="../../api.html#parsedmarc.utils.convert_outlook_msg">[docs]</a><span class="k">def</span> <span class="nf">convert_outlook_msg</span><span class="p">(</span><span class="n">msg_bytes</span><span class="p">):</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Uses the ``msgconvert`` Perl utility to convert an Outlook MS file to</span>
<span class="sd"> standard RFC 822 format</span>
<span class="sd"> Args:</span>
<span class="sd"> msg_bytes (bytes): the content of the .msg file</span>
<span class="sd"> Returns:</span>
<span class="sd"> A RFC 822 string</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="k">if</span> <span class="ow">not</span> <span class="n">is_outlook_msg</span><span class="p">(</span><span class="n">msg_bytes</span><span class="p">):</span>
<span class="k">raise</span> <span class="ne">ValueError</span><span class="p">(</span><span class="s2">&quot;The supplied bytes are not an Outlook MSG file&quot;</span><span class="p">)</span>
<span class="n">orig_dir</span> <span class="o">=</span> <span class="n">os</span><span class="o">.</span><span class="n">getcwd</span><span class="p">()</span>
<span class="n">tmp_dir</span> <span class="o">=</span> <span class="n">tempfile</span><span class="o">.</span><span class="n">mkdtemp</span><span class="p">()</span>
<span class="n">os</span><span class="o">.</span><span class="n">chdir</span><span class="p">(</span><span class="n">tmp_dir</span><span class="p">)</span>
<span class="k">with</span> <span class="nb">open</span><span class="p">(</span><span class="s2">&quot;sample.msg&quot;</span><span class="p">,</span> <span class="s2">&quot;wb&quot;</span><span class="p">)</span> <span class="k">as</span> <span class="n">msg_file</span><span class="p">:</span>
<span class="n">msg_file</span><span class="o">.</span><span class="n">write</span><span class="p">(</span><span class="n">msg_bytes</span><span class="p">)</span>
<span class="k">try</span><span class="p">:</span>
<span class="n">subprocess</span><span class="o">.</span><span class="n">check_call</span><span class="p">([</span><span class="s2">&quot;msgconvert&quot;</span><span class="p">,</span> <span class="s2">&quot;sample.msg&quot;</span><span class="p">],</span>
<span class="n">stdout</span><span class="o">=</span><span class="n">null_file</span><span class="p">,</span> <span class="n">stderr</span><span class="o">=</span><span class="n">null_file</span><span class="p">)</span>
<span class="n">eml_path</span> <span class="o">=</span> <span class="s2">&quot;sample.eml&quot;</span>
<span class="k">with</span> <span class="nb">open</span><span class="p">(</span><span class="n">eml_path</span><span class="p">,</span> <span class="s2">&quot;rb&quot;</span><span class="p">)</span> <span class="k">as</span> <span class="n">eml_file</span><span class="p">:</span>
<span class="n">rfc822</span> <span class="o">=</span> <span class="n">eml_file</span><span class="o">.</span><span class="n">read</span><span class="p">()</span>
<span class="k">except</span> <span class="ne">FileNotFoundError</span><span class="p">:</span>
<span class="k">raise</span> <span class="n">EmailParserError</span><span class="p">(</span>
<span class="s2">&quot;Failed to convert Outlook MSG: msgconvert utility not found&quot;</span><span class="p">)</span>
<span class="k">finally</span><span class="p">:</span>
<span class="n">os</span><span class="o">.</span><span class="n">chdir</span><span class="p">(</span><span class="n">orig_dir</span><span class="p">)</span>
<span class="n">shutil</span><span class="o">.</span><span class="n">rmtree</span><span class="p">(</span><span class="n">tmp_dir</span><span class="p">)</span>
<span class="k">return</span> <span class="n">rfc822</span></div>
<div class="viewcode-block" id="parse_email"><a class="viewcode-back" href="../../api.html#parsedmarc.utils.parse_email">[docs]</a><span class="k">def</span> <span class="nf">parse_email</span><span class="p">(</span><span class="n">data</span><span class="p">,</span> <span class="n">strip_attachment_payloads</span><span class="o">=</span><span class="kc">False</span><span class="p">):</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> A simplified email parser</span>
<span class="sd"> Args:</span>
<span class="sd"> data: The RFC 822 message string, or MSG binary</span>
<span class="sd"> strip_attachment_payloads (bool): Remove attachment payloads</span>
<span class="sd"> Returns:</span>
<span class="sd"> dict: Parsed email data</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="k">if</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">data</span><span class="p">,</span> <span class="nb">bytes</span><span class="p">):</span>
<span class="k">if</span> <span class="n">is_outlook_msg</span><span class="p">(</span><span class="n">data</span><span class="p">):</span>
<span class="n">data</span> <span class="o">=</span> <span class="n">convert_outlook_msg</span><span class="p">(</span><span class="n">data</span><span class="p">)</span>
<span class="n">data</span> <span class="o">=</span> <span class="n">data</span><span class="o">.</span><span class="n">decode</span><span class="p">(</span><span class="s2">&quot;utf-8&quot;</span><span class="p">,</span> <span class="n">errors</span><span class="o">=</span><span class="s2">&quot;replace&quot;</span><span class="p">)</span>
<span class="n">parsed_email</span> <span class="o">=</span> <span class="n">mailparser</span><span class="o">.</span><span class="n">parse_from_string</span><span class="p">(</span><span class="n">data</span><span class="p">)</span>
<span class="n">headers</span> <span class="o">=</span> <span class="n">json</span><span class="o">.</span><span class="n">loads</span><span class="p">(</span><span class="n">parsed_email</span><span class="o">.</span><span class="n">headers_json</span><span class="p">)</span><span class="o">.</span><span class="n">copy</span><span class="p">()</span>
<span class="n">parsed_email</span> <span class="o">=</span> <span class="n">json</span><span class="o">.</span><span class="n">loads</span><span class="p">(</span><span class="n">parsed_email</span><span class="o">.</span><span class="n">mail_json</span><span class="p">)</span><span class="o">.</span><span class="n">copy</span><span class="p">()</span>
<span class="n">parsed_email</span><span class="p">[</span><span class="s2">&quot;headers&quot;</span><span class="p">]</span> <span class="o">=</span> <span class="n">headers</span>
<span class="k">if</span> <span class="s2">&quot;received&quot;</span> <span class="ow">in</span> <span class="n">parsed_email</span><span class="p">:</span>
<span class="k">for</span> <span class="n">received</span> <span class="ow">in</span> <span class="n">parsed_email</span><span class="p">[</span><span class="s2">&quot;received&quot;</span><span class="p">]:</span>
<span class="k">if</span> <span class="s2">&quot;date_utc&quot;</span> <span class="ow">in</span> <span class="n">received</span><span class="p">:</span>
<span class="k">if</span> <span class="n">received</span><span class="p">[</span><span class="s2">&quot;date_utc&quot;</span><span class="p">]</span> <span class="ow">is</span> <span class="kc">None</span><span class="p">:</span>
<span class="k">del</span> <span class="n">received</span><span class="p">[</span><span class="s2">&quot;date_utc&quot;</span><span class="p">]</span>
<span class="k">else</span><span class="p">:</span>
<span class="n">received</span><span class="p">[</span><span class="s2">&quot;date_utc&quot;</span><span class="p">]</span> <span class="o">=</span> <span class="n">received</span><span class="p">[</span><span class="s2">&quot;date_utc&quot;</span><span class="p">]</span><span class="o">.</span><span class="n">replace</span><span class="p">(</span><span class="s2">&quot;T&quot;</span><span class="p">,</span>
<span class="s2">&quot; &quot;</span><span class="p">)</span>
<span class="k">if</span> <span class="s2">&quot;from&quot;</span> <span class="ow">not</span> <span class="ow">in</span> <span class="n">parsed_email</span><span class="p">:</span>
<span class="k">if</span> <span class="s2">&quot;From&quot;</span> <span class="ow">in</span> <span class="n">parsed_email</span><span class="p">[</span><span class="s2">&quot;headers&quot;</span><span class="p">]:</span>
<span class="n">parsed_email</span><span class="p">[</span><span class="s2">&quot;from&quot;</span><span class="p">]</span> <span class="o">=</span> <span class="n">parsed_email</span><span class="p">[</span><span class="s2">&quot;Headers&quot;</span><span class="p">][</span><span class="s2">&quot;From&quot;</span><span class="p">]</span>
<span class="k">else</span><span class="p">:</span>
<span class="n">parsed_email</span><span class="p">[</span><span class="s2">&quot;from&quot;</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span>
<span class="k">if</span> <span class="n">parsed_email</span><span class="p">[</span><span class="s2">&quot;from&quot;</span><span class="p">]</span> <span class="ow">is</span> <span class="ow">not</span> <span class="kc">None</span><span class="p">:</span>
<span class="n">parsed_email</span><span class="p">[</span><span class="s2">&quot;from&quot;</span><span class="p">]</span> <span class="o">=</span> <span class="n">parse_email_address</span><span class="p">(</span><span class="n">parsed_email</span><span class="p">[</span><span class="s2">&quot;from&quot;</span><span class="p">][</span><span class="mi">0</span><span class="p">])</span>
<span class="k">if</span> <span class="s2">&quot;date&quot;</span> <span class="ow">in</span> <span class="n">parsed_email</span><span class="p">:</span>
<span class="n">parsed_email</span><span class="p">[</span><span class="s2">&quot;date&quot;</span><span class="p">]</span> <span class="o">=</span> <span class="n">parsed_email</span><span class="p">[</span><span class="s2">&quot;date&quot;</span><span class="p">]</span><span class="o">.</span><span class="n">replace</span><span class="p">(</span><span class="s2">&quot;T&quot;</span><span class="p">,</span> <span class="s2">&quot; &quot;</span><span class="p">)</span>
<span class="k">else</span><span class="p">:</span>
<span class="n">parsed_email</span><span class="p">[</span><span class="s2">&quot;date&quot;</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span>
<span class="k">if</span> <span class="s2">&quot;reply_to&quot;</span> <span class="ow">in</span> <span class="n">parsed_email</span><span class="p">:</span>
<span class="n">parsed_email</span><span class="p">[</span><span class="s2">&quot;reply_to&quot;</span><span class="p">]</span> <span class="o">=</span> <span class="nb">list</span><span class="p">(</span><span class="nb">map</span><span class="p">(</span><span class="k">lambda</span> <span class="n">x</span><span class="p">:</span> <span class="n">parse_email_address</span><span class="p">(</span><span class="n">x</span><span class="p">),</span>
<span class="n">parsed_email</span><span class="p">[</span><span class="s2">&quot;reply_to&quot;</span><span class="p">]))</span>
<span class="k">else</span><span class="p">:</span>
<span class="n">parsed_email</span><span class="p">[</span><span class="s2">&quot;reply_to&quot;</span><span class="p">]</span> <span class="o">=</span> <span class="p">[]</span>
<span class="k">if</span> <span class="s2">&quot;to&quot;</span> <span class="ow">in</span> <span class="n">parsed_email</span><span class="p">:</span>
<span class="n">parsed_email</span><span class="p">[</span><span class="s2">&quot;to&quot;</span><span class="p">]</span> <span class="o">=</span> <span class="nb">list</span><span class="p">(</span><span class="nb">map</span><span class="p">(</span><span class="k">lambda</span> <span class="n">x</span><span class="p">:</span> <span class="n">parse_email_address</span><span class="p">(</span><span class="n">x</span><span class="p">),</span>
<span class="n">parsed_email</span><span class="p">[</span><span class="s2">&quot;to&quot;</span><span class="p">]))</span>
<span class="k">else</span><span class="p">:</span>
<span class="n">parsed_email</span><span class="p">[</span><span class="s2">&quot;to&quot;</span><span class="p">]</span> <span class="o">=</span> <span class="p">[]</span>
<span class="k">if</span> <span class="s2">&quot;cc&quot;</span> <span class="ow">in</span> <span class="n">parsed_email</span><span class="p">:</span>
<span class="n">parsed_email</span><span class="p">[</span><span class="s2">&quot;cc&quot;</span><span class="p">]</span> <span class="o">=</span> <span class="nb">list</span><span class="p">(</span><span class="nb">map</span><span class="p">(</span><span class="k">lambda</span> <span class="n">x</span><span class="p">:</span> <span class="n">parse_email_address</span><span class="p">(</span><span class="n">x</span><span class="p">),</span>
<span class="n">parsed_email</span><span class="p">[</span><span class="s2">&quot;cc&quot;</span><span class="p">]))</span>
<span class="k">else</span><span class="p">:</span>
<span class="n">parsed_email</span><span class="p">[</span><span class="s2">&quot;cc&quot;</span><span class="p">]</span> <span class="o">=</span> <span class="p">[]</span>
<span class="k">if</span> <span class="s2">&quot;bcc&quot;</span> <span class="ow">in</span> <span class="n">parsed_email</span><span class="p">:</span>
<span class="n">parsed_email</span><span class="p">[</span><span class="s2">&quot;bcc&quot;</span><span class="p">]</span> <span class="o">=</span> <span class="nb">list</span><span class="p">(</span><span class="nb">map</span><span class="p">(</span><span class="k">lambda</span> <span class="n">x</span><span class="p">:</span> <span class="n">parse_email_address</span><span class="p">(</span><span class="n">x</span><span class="p">),</span>
<span class="n">parsed_email</span><span class="p">[</span><span class="s2">&quot;bcc&quot;</span><span class="p">]))</span>
<span class="k">else</span><span class="p">:</span>
<span class="n">parsed_email</span><span class="p">[</span><span class="s2">&quot;bcc&quot;</span><span class="p">]</span> <span class="o">=</span> <span class="p">[]</span>
<span class="k">if</span> <span class="s2">&quot;delivered_to&quot;</span> <span class="ow">in</span> <span class="n">parsed_email</span><span class="p">:</span>
<span class="n">parsed_email</span><span class="p">[</span><span class="s2">&quot;delivered_to&quot;</span><span class="p">]</span> <span class="o">=</span> <span class="nb">list</span><span class="p">(</span>
<span class="nb">map</span><span class="p">(</span><span class="k">lambda</span> <span class="n">x</span><span class="p">:</span> <span class="n">parse_email_address</span><span class="p">(</span><span class="n">x</span><span class="p">),</span>
<span class="n">parsed_email</span><span class="p">[</span><span class="s2">&quot;delivered_to&quot;</span><span class="p">])</span>
<span class="p">)</span>
<span class="k">if</span> <span class="s2">&quot;attachments&quot;</span> <span class="ow">not</span> <span class="ow">in</span> <span class="n">parsed_email</span><span class="p">:</span>
<span class="n">parsed_email</span><span class="p">[</span><span class="s2">&quot;attachments&quot;</span><span class="p">]</span> <span class="o">=</span> <span class="p">[]</span>
<span class="k">else</span><span class="p">:</span>
<span class="k">for</span> <span class="n">attachment</span> <span class="ow">in</span> <span class="n">parsed_email</span><span class="p">[</span><span class="s2">&quot;attachments&quot;</span><span class="p">]:</span>
<span class="k">if</span> <span class="s2">&quot;payload&quot;</span> <span class="ow">in</span> <span class="n">attachment</span><span class="p">:</span>
<span class="n">payload</span> <span class="o">=</span> <span class="n">attachment</span><span class="p">[</span><span class="s2">&quot;payload&quot;</span><span class="p">]</span>
<span class="k">try</span><span class="p">:</span>
<span class="k">if</span> <span class="s2">&quot;content_transfer_encoding&quot;</span> <span class="ow">in</span> <span class="n">attachment</span><span class="p">:</span>
<span class="k">if</span> <span class="n">attachment</span><span class="p">[</span><span class="s2">&quot;content_transfer_encoding&quot;</span><span class="p">]</span> <span class="o">==</span> <span class="s2">&quot;base64&quot;</span><span class="p">:</span>
<span class="n">payload</span> <span class="o">=</span> <span class="n">decode_base64</span><span class="p">(</span><span class="n">payload</span><span class="p">)</span>
<span class="k">else</span><span class="p">:</span>
<span class="n">payload</span> <span class="o">=</span> <span class="nb">str</span><span class="o">.</span><span class="n">encode</span><span class="p">(</span><span class="n">payload</span><span class="p">)</span>
<span class="n">attachment</span><span class="p">[</span><span class="s2">&quot;sha256&quot;</span><span class="p">]</span> <span class="o">=</span> <span class="n">hashlib</span><span class="o">.</span><span class="n">sha256</span><span class="p">(</span><span class="n">payload</span><span class="p">)</span><span class="o">.</span><span class="n">hexdigest</span><span class="p">()</span>
<span class="k">except</span> <span class="ne">Exception</span> <span class="k">as</span> <span class="n">e</span><span class="p">:</span>
<span class="n">logger</span><span class="o">.</span><span class="n">debug</span><span class="p">(</span><span class="s2">&quot;Unable to decode attachment: </span><span class="si">{0}</span><span class="s2">&quot;</span><span class="o">.</span><span class="n">format</span><span class="p">(</span>
<span class="n">e</span><span class="o">.</span><span class="fm">__str__</span><span class="p">()</span>
<span class="p">))</span>
<span class="k">if</span> <span class="n">strip_attachment_payloads</span><span class="p">:</span>
<span class="k">for</span> <span class="n">attachment</span> <span class="ow">in</span> <span class="n">parsed_email</span><span class="p">[</span><span class="s2">&quot;attachments&quot;</span><span class="p">]:</span>
<span class="k">if</span> <span class="s2">&quot;payload&quot;</span> <span class="ow">in</span> <span class="n">attachment</span><span class="p">:</span>
<span class="k">del</span> <span class="n">attachment</span><span class="p">[</span><span class="s2">&quot;payload&quot;</span><span class="p">]</span>
<span class="k">if</span> <span class="s2">&quot;subject&quot;</span> <span class="ow">not</span> <span class="ow">in</span> <span class="n">parsed_email</span><span class="p">:</span>
<span class="n">parsed_email</span><span class="p">[</span><span class="s2">&quot;subject&quot;</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span>
<span class="n">parsed_email</span><span class="p">[</span><span class="s2">&quot;filename_safe_subject&quot;</span><span class="p">]</span> <span class="o">=</span> <span class="n">get_filename_safe_string</span><span class="p">(</span>
<span class="n">parsed_email</span><span class="p">[</span><span class="s2">&quot;subject&quot;</span><span class="p">])</span>
<span class="k">if</span> <span class="s2">&quot;body&quot;</span> <span class="ow">not</span> <span class="ow">in</span> <span class="n">parsed_email</span><span class="p">:</span>
<span class="n">parsed_email</span><span class="p">[</span><span class="s2">&quot;body&quot;</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span>
<span class="k">return</span> <span class="n">parsed_email</span></div>
</pre></div>
</div>
</div>
<footer>
<hr/>
<div role="contentinfo">
<p>&#169; Copyright 2018 - 2023, Sean Whalen and contributors.</p>
</div>
Built with <a href="https://www.sphinx-doc.org/">Sphinx</a> using a
<a href="https://github.com/readthedocs/sphinx_rtd_theme">theme</a>
provided by <a href="https://readthedocs.org">Read the Docs</a>.
</footer>
</div>
</div>
</section>
</div>
<script>
jQuery(function () {
SphinxRtdTheme.Navigation.enable(true);
});
</script>
</body>
</html>