diff --git a/.gitignore b/.gitignore index bce2c73..1814944 100644 --- a/.gitignore +++ b/.gitignore @@ -130,5 +130,7 @@ stage*.ini dev*.ini # Private samples +samples/private -samples/private \ No newline at end of file +*.html +*.sqlite-journal diff --git a/parsedmarc/resources/senders/senders.sqlite b/parsedmarc/resources/senders/senders.sqlite index e69de29..2069208 100644 Binary files a/parsedmarc/resources/senders/senders.sqlite and b/parsedmarc/resources/senders/senders.sqlite differ diff --git a/parsedmarc/resources/senders/updatedb.py b/parsedmarc/resources/senders/updatedb.py index 65c4052..ea34221 100644 --- a/parsedmarc/resources/senders/updatedb.py +++ b/parsedmarc/resources/senders/updatedb.py @@ -1,9 +1,40 @@ -import urllib.parse import sqlite3 from bs4 import BeautifulSoup -dmarc_io = "https://dmarc.io" + +db = sqlite3.connect("senders.sqlite") +db.execute(""" +CREATE TABLE IF NOT EXISTS "senders" ( + "id" INTEGER UNIQUE NOT NULL, + "name" TEXT UNIQUE NOT NULL, + "spf_aligned" INTEGER, + "dkim_aligned" INTEGER, + "known_to_forward" INTEGER, + "forward_dkim_intact" INTEGER, + "forward_own_envelope_domain" INTEGER, + "support_url" TEXT, + "dmarc_io_uri" TEXT UNIQUE, + PRIMARY KEY("id" AUTOINCREMENT), + CHECK("spf_aligned" = 0 or "spf_aligned" = 1), + CHECK("dkim_aligned" = 0 or "dkim_aligned" = 1), + CHECK("known_to_forward" = 0 or "known_to_forward" = 1), + CHECK("forward_dkim_intact" = 0 or "forward_dkim_intact" = 1), + CHECK( + "forward_own_envelope_domain" = 0 or "forward_own_envelope_domain" = 1 + ) +) +""") +db.execute(""" +CREATE TABLE IF NOT EXISTS "reverse_dns" ( + "id" INTEGER UNIQUE NOT NULL, + "base_domain" TEXT UNIQUE NOT NULL, + "sender_id" INTEGER NOT NULL, + PRIMARY KEY("id" AUTOINCREMENT), + FOREIGN KEY(sender_id) REFERENCES senders(id) +) +""") +curser = db.cursor() with open("sources.html") as sources_file: content = sources_file.read() soup = BeautifulSoup(content, "html.parser") @@ -13,7 +44,20 @@ for row in rows: data = row.find_all("td") link = data[0].find("a") name = link.text - dmarc_io_url = urllib.parse.urljoin(dmarc_io, link.get("href")) + dmarc_io_uri = link.get("href") spf_aligned = len(data[1].find_all("i")) dkim_aligned = len(data[2].find_all("i")) - print(name) + params = (name, spf_aligned, dkim_aligned, 0, + dmarc_io_uri) + curser.execute(""" + UPDATE senders + SET name = ?, + spf_aligned = ?, + dkim_aligned = ?, + known_to_forward = ? + WHERE dmarc_io_uri = ?""", params) + db.commit() + curser.execute(""" + INSERT OR IGNORE INTO senders(name, spf_aligned, dkim_aligned, + known_to_forward, dmarc_io_uri) values (?,?,?,?,?)""", params) + db.commit() diff --git a/requirements.txt b/requirements.txt index ac19106..30dfa3f 100644 --- a/requirements.txt +++ b/requirements.txt @@ -38,3 +38,5 @@ google-auth-oauthlib>=0.4.6 hatch>=1.5.0 myst-parser>=0.18.0 myst-parser[linkify] +requests +bs4