diff --git a/parsedmarc/resources/senders/README.md b/parsedmarc/resources/senders/README.md index 1ebd729..56b84de 100644 --- a/parsedmarc/resources/senders/README.md +++ b/parsedmarc/resources/senders/README.md @@ -1,5 +1,12 @@ # About +A database of information about services that send email. + +## Updates + +To update the database with the latest information from dmarc.io][dmarc.io], start a +[Splash][splash] Docker container, then run `updatedb.py` + ## Credit `senders.sqlite` contains information from [dmarc.io][dmarc.io] that is licensed under a @@ -11,4 +18,5 @@ [Creative Commons Attribution-ShareAlike 4.0 International License][cc]. [dmarc.io]: https://dmarc.io/ +[splash]: https://splash.readthedocs.io/en/stable/ [cc]: https://creativecommons.org/licenses/by-sa/4.0/ \ No newline at end of file diff --git a/parsedmarc/resources/senders/senders.sqlite b/parsedmarc/resources/senders/senders.sqlite index 2069208..43e8e0c 100644 Binary files a/parsedmarc/resources/senders/senders.sqlite and b/parsedmarc/resources/senders/senders.sqlite differ diff --git a/parsedmarc/resources/senders/updatedb.py b/parsedmarc/resources/senders/updatedb.py index ea34221..2a8fc52 100644 --- a/parsedmarc/resources/senders/updatedb.py +++ b/parsedmarc/resources/senders/updatedb.py @@ -1,3 +1,5 @@ +import requests + import sqlite3 from bs4 import BeautifulSoup @@ -35,8 +37,8 @@ CREATE TABLE IF NOT EXISTS "reverse_dns" ( ) """) curser = db.cursor() -with open("sources.html") as sources_file: - content = sources_file.read() +content = requests.get("http://localhost:8050/render.html", + params=dict(url="https://dmarc.io/sources/")).content soup = BeautifulSoup(content, "html.parser") table = soup.find("tbody") rows = table.find_all("tr") @@ -61,3 +63,29 @@ for row in rows: INSERT OR IGNORE INTO senders(name, spf_aligned, dkim_aligned, known_to_forward, dmarc_io_uri) values (?,?,?,?,?)""", params) db.commit() +content = requests.get("http://localhost:8050/render.html", + params=dict(url="https://dmarc.io/forwarders/")).content +soup = BeautifulSoup(content, "html.parser") +table = soup.find("tbody") +rows = table.find_all("tr") +for row in rows: + data = row.find_all("td") + link = data[0].find("a") + name = link.text + dmarc_io_uri = link.get("href") + forward_dkim_intact = len(data[1].find_all("i")) + forward_own_envelope_domain = len(data[2].find_all("i")) + params = (name, forward_dkim_intact, forward_own_envelope_domain, 1, + dmarc_io_uri) + curser.execute(""" + UPDATE senders + SET name = ?, + forward_dkim_intact = ?, + forward_own_envelope_domain = ?, + known_to_forward = ? + WHERE dmarc_io_uri = ?""", params) + db.commit() + curser.execute(""" + INSERT OR IGNORE INTO senders(name, spf_aligned, dkim_aligned, + known_to_forward, dmarc_io_uri) values (?,?,?,?,?)""", params) + db.commit()