Change publicsuffix2 to publicsuffixlist (#406)

* change to publicsuffixlist

* update publicsuffixlist (now auto-updating)

* Fix unused imports
This commit is contained in:
Jed Laundry
2023-05-10 00:49:41 +12:00
committed by GitHub
parent 062d6ea821
commit a06fdc586f
4 changed files with 11 additions and 46 deletions

View File

@@ -13,7 +13,6 @@ import mailparser
import json
import hashlib
import base64
import platform
import atexit
import mailbox
import re
@@ -29,16 +28,11 @@ import dns.resolver
import dns.exception
import geoip2.database
import geoip2.errors
import requests
import publicsuffix2
import publicsuffixlist
from parsedmarc.log import logger
import parsedmarc.resources
USER_AGENT = "Mozilla/5.0 (({0} {1})) parsedmarc".format(
platform.system(),
platform.release(),
)
parenthesis_regex = re.compile(r'\s*\(.*\)\s*')
@@ -83,7 +77,7 @@ def decode_base64(data):
return base64.b64decode(data)
def get_base_domain(domain, use_fresh_psl=False):
def get_base_domain(domain):
"""
Gets the base domain name for the given domain
@@ -93,41 +87,13 @@ def get_base_domain(domain, use_fresh_psl=False):
Args:
domain (str): A domain or subdomain
use_fresh_psl (bool): Download a fresh Public Suffix List
Returns:
str: The base domain of the given domain
"""
psl_path = os.path.join(tempdir, "public_suffix_list.dat")
def download_psl():
url = "https://publicsuffix.org/list/public_suffix_list.dat"
# Use a browser-like user agent string to bypass some proxy blocks
headers = {"User-Agent": USER_AGENT}
try:
fresh_psl = requests.get(url, headers=headers).text
with open(psl_path, "w", encoding="utf-8") as fresh_psl_file:
fresh_psl_file.write(fresh_psl)
except Exception as error:
raise DownloadError(
"Failed to download an updated PSL {0}".format(error))
if use_fresh_psl:
if not os.path.exists(psl_path):
download_psl()
else:
psl_age = datetime.now() - datetime.fromtimestamp(
os.stat(psl_path).st_mtime)
if psl_age > timedelta(hours=24):
download_psl()
with open(psl_path, encoding="utf-8") as psl_file:
psl = publicsuffix2.PublicSuffixList(psl_file)
return psl.get_public_suffix(domain)
else:
return publicsuffix2.get_sld(domain)
psl = publicsuffixlist.PublicSuffixList()
return psl.privatesuffix(domain)
def query_dns(domain, record_type, cache=None, nameservers=None, timeout=2.0):

View File

@@ -48,7 +48,7 @@ dependencies = [
"lxml>=4.4.0",
"mailsuite>=1.6.1",
"msgraph-core>=0.2.2",
"publicsuffix2>=2.20190812",
"publicsuffixlist>=0.10.0",
"requests>=2.22.0",
"tqdm>=4.31.1",
"urllib3>=1.25.7",

View File

@@ -4,7 +4,7 @@ dnspython>=2.0.0
expiringdict>=1.1.4
urllib3>=1.25.7
requests>=2.22.0
publicsuffix2>=2.20190812
publicsuffixlist>=0.10.0
xmltodict>=0.12.0
geoip2>=3.0.0
imapclient>=2.1.0

View File

@@ -18,14 +18,13 @@ class Test(unittest.TestCase):
def testPSLDownload(self):
subdomain = "foo.example.com"
result = parsedmarc.utils.get_base_domain(subdomain,
use_fresh_psl=True)
result = parsedmarc.utils.get_base_domain(subdomain)
assert result == "example.com"
# Test PSL caching
result = parsedmarc.utils.get_base_domain(subdomain,
use_fresh_psl=True)
assert result == "example.com"
# Test newer PSL entries
subdomain = "e3191.c.akamaiedge.net"
result = parsedmarc.utils.get_base_domain(subdomain)
assert result == "c.akamaiedge.net"
def testAggregateSamples(self):
"""Test sample aggregate/rua DMARC reports"""