Compare commits

...

28 Commits

Author SHA1 Message Date
Sean Whalen
98342ecac6 8.19.1 (#627)
- Ignore HTML content type in report email parsing (#626)
2025-11-29 11:37:31 -05:00
Sean Whalen
38a3d4eaae Code formatting 2025-11-28 12:48:55 -05:00
Sean Whalen
a05c230152 8.19.0 (#622)
8.19.0

- Add multi-tenant support via an index-prefix domain mapping file
- PSL overrides so that services like AWS are correctly identified
- Additional improvements to report type detection
- Fix webhook timeout parsing (PR #623)
- Output to STDOUT when the new general config boolean `silent` is set to `False` (Close #614)
- Additional services added to `base_reverse_dns_map.csv`

---------

Co-authored-by: Sean Whalen <seanthegeek@users.noreply.github.com>
Co-authored-by: Félix <felix.debloisbeaucage@gmail.com>
2025-11-28 12:47:00 -05:00
Sean Whalen
17bdc3a134 More tests cleanup 2025-11-21 09:10:59 -05:00
Sean Whalen
858be00f22 Fix badge links and update image source branch 2025-11-21 09:03:04 -05:00
Sean Whalen
597ca64f9f Clean up tests 2025-11-21 00:09:28 -05:00
Sean Whalen
c5dbe2c4dc 8.10.9
- Complete fix for #687 and more robust report type detection
2025-11-20 23:50:42 -05:00
Sean Whalen
082b3d355f 8.18.8
- Fix parsing emails with an uncompressed aggregate report attachment (Closes #607)
- Add `--no-prettify-json` CLI option (PR #617)
2025-11-20 20:47:57 -05:00
Sean Whalen
2a7ce47bb1 Update code coverage badge link to main branch 2025-11-20 20:28:10 -05:00
daminoux
9882405d96 Update README.md fix url screenshot (#620)
the url of screenshot is broken
2025-11-20 20:27:15 -05:00
Andrew
fce84763b9 add --no-prettify-json CLI option (#617)
* updates process_reports to respect newly added prettify_json option

* removes duplicate definition

* removes redundant option

* fixes typo
2025-11-02 15:54:59 -05:00
Rowan
8a299b8600 Updated default python docker base image to 3.13-slim (#618)
* Updated default python docker base image to 3.13-slim

* Added python 3.13 to tests
2025-10-29 22:34:06 -04:00
jandr
b4c2b21547 Sorted usage of TLS on SMTP (#613)
Added a line for the `email_results` function to take into account the smtp_ssl setting.
2025-08-25 13:51:10 -04:00
Sean Whalen
865c249437 Update features list 2025-08-24 13:39:50 -04:00
Sean Whalen
013859f10e Fix find_unknown_base_reverse_dns.py 2025-08-19 21:18:14 -04:00
Sean Whalen
6d4a31a120 Fix find_unknown_base_reverse_dns.py and sortlist.py 2025-08-19 20:59:42 -04:00
Sean Whalen
45d3dc3b2e Fiz sortlists.py 2025-08-19 20:23:55 -04:00
Sean Whalen
4bbd97dbaa Improve list verification 2025-08-19 20:02:55 -04:00
Sean Whalen
5df152d469 Refactor find_unknown_base_reverse_dns.py 2025-08-18 12:59:54 -04:00
Sean Whalen
d990bef342 Use \n here too 2025-08-17 21:08:28 -04:00
Sean Whalen
caf77ca6d4 Use \n when writing CSVs 2025-08-17 21:01:07 -04:00
Sean Whalen
4b3d32c5a6 Actual, actual Actual 6.18.7 release
Revert back to using python csv instead of pandas to avoid conflicts with numpy in elasticsearch
2025-08-17 20:36:15 -04:00
Sean Whalen
5df5c10f80 Pin pandas an numpy versions 2025-08-17 19:59:53 -04:00
Sean Whalen
308d4657ab Make sort_csv function more flexible 2025-08-17 19:43:19 -04:00
Sean Whalen
0f74e33094 Fix typo 2025-08-17 19:35:16 -04:00
Sean Whalen
9f339e11f5 Actual 6.18.7 release 2025-08-17 19:34:14 -04:00
Sean Whalen
391e84b717 Fix map sorting 2025-08-17 18:15:20 -04:00
Sean Whalen
8bf06ce5af 8.18.7
Removed improper spaces from  `base_reverse_dns_map.csv` (Closes #612)
2025-08-17 18:13:49 -04:00
24 changed files with 1174 additions and 212 deletions

View File

@@ -30,7 +30,7 @@ jobs:
strategy:
fail-fast: false
matrix:
python-version: ["3.9", "3.10", "3.11", "3.12"]
python-version: ["3.9", "3.10", "3.11", "3.12", "3.13"]
steps:
- uses: actions/checkout@v4

4
.gitignore vendored
View File

@@ -106,7 +106,7 @@ ENV/
.idea/
# VS Code launch config
.vscode/launch.json
#.vscode/launch.json
# Visual Studio Code settings
#.vscode/
@@ -142,4 +142,6 @@ scratch.py
parsedmarc/resources/maps/base_reverse_dns.csv
parsedmarc/resources/maps/unknown_base_reverse_dns.csv
parsedmarc/resources/maps/sus_domains.csv
parsedmarc/resources/maps/unknown_domains.txt
*.bak

54
.vscode/launch.json vendored Normal file
View File

@@ -0,0 +1,54 @@
{
// Use IntelliSense to learn about possible attributes.
// Hover to view descriptions of existing attributes.
// For more information, visit: https://go.microsoft.com/fwlink/?linkid=830387
"version": "0.2.0",
"configurations": [
{
"name": "Python Debugger: Current File",
"type": "debugpy",
"request": "launch",
"program": "${file}",
"console": "integratedTerminal"
},
{
"name": "tests.py",
"type": "debugpy",
"request": "launch",
"program": "tests.py",
"console": "integratedTerminal"
},
{
"name": "sample.eml",
"type": "debugpy",
"request": "launch",
"module": "parsedmarc.cli",
"args": ["samples/private/sample.eml"]
},
{
"name": "find_sus_domains.py",
"type": "debugpy",
"request": "launch",
"program": "find_sus_domains.py",
"args": ["-i", "unknown_domains.txt", "-o", "sus_domains.csv"],
"cwd": "${workspaceFolder}/parsedmarc/resources/maps",
"console": "integratedTerminal"
},
{
"name": "sortlists.py",
"type": "debugpy",
"request": "launch",
"program": "sortlists.py",
"cwd": "${workspaceFolder}/parsedmarc/resources/maps",
"console": "integratedTerminal"
},
{
"name": "find_unknown_base_reverse_dns.py",
"type": "debugpy",
"request": "launch",
"program": "find_unknown_base_reverse_dns.py",
"cwd": "${workspaceFolder}/parsedmarc/resources/maps",
"console": "integratedTerminal"
}
]
}

View File

@@ -109,6 +109,7 @@
"setuptools",
"smartquotes",
"SMTPTLS",
"sortlists",
"sortmaps",
"sourcetype",
"STARTTLS",

View File

@@ -1,6 +1,37 @@
Changelog
=========
8.19.1
------
- Ignore HTML content type in report email parsing (#626)
8.19.0
------
- Add multi-tenant support via an index-prefix domain mapping file
- PSL overrides so that services like AWS are correctly identified
- Additional improvements to report type detection
- Fix webhook timeout parsing (PR #623)
- Output to STDOUT when the new general config boolean `silent` is set to `False` (Close #614)
- Additional services added to `base_reverse_dns_map.csv`
8.18.9
------
- Complete fix for #687 and more robust report type detection
8.18.8
------
- Fix parsing emails with an uncompressed aggregate report attachment (Closes #607)
- Add `--no-prettify-json` CLI option (PR #617)
8.18.7
------
Removed improper spaces from `base_reverse_dns_map.csv` (Closes #612)
8.18.6
------

View File

@@ -1,4 +1,4 @@
ARG BASE_IMAGE=python:3.9-slim
ARG BASE_IMAGE=python:3.13-slim
ARG USERNAME=parsedmarc
ARG USER_UID=1000
ARG USER_GID=$USER_UID

View File

@@ -9,7 +9,7 @@ Package](https://img.shields.io/pypi/v/parsedmarc.svg)](https://pypi.org/project
[![PyPI - Downloads](https://img.shields.io/pypi/dm/parsedmarc?color=blue)](https://pypistats.org/packages/parsedmarc)
<p align="center">
<img src="https://github.com/domainaware/parsedmarc/raw/master/docs/source/_static/screenshots/dmarc-summary-charts.png?raw=true" alt="A screenshot of DMARC summary charts in Kibana"/>
<img src="https://raw.githubusercontent.com/domainaware/parsedmarc/refs/heads/master/docs/source/_static/screenshots/dmarc-summary-charts.png?raw=true" alt="A screenshot of DMARC summary charts in Kibana"/>
</p>
`parsedmarc` is a Python module and CLI utility for parsing DMARC
@@ -34,10 +34,10 @@ Thanks to all
## Features
- Parses draft and 1.0 standard aggregate/rua reports
- Parses forensic/failure/ruf reports
- Can parse reports from an inbox over IMAP, Microsoft Graph, or Gmail
API
- Parses draft and 1.0 standard aggregate/rua DMARC reports
- Parses forensic/failure/ruf DMARC reports
- Parses reports from SMTP TLS Reporting
- Can parse reports from an inbox over IMAP, Microsoft Graph, or Gmail API
- Transparently handles gzip or zip compressed reports
- Consistent data structures
- Simple JSON and/or CSV output

View File

@@ -19,7 +19,7 @@ if [ -d "./../parsedmarc-docs" ]; then
fi
cd ..
cd parsedmarc/resources/maps
python3 sortmaps.py
python3 sortlists.py
echo "Checking for invalid UTF-8 bytes in base_reverse_dns_map.csv"
python3 find_bad_utf8.py base_reverse_dns_map.csv
cd ../../..

View File

@@ -21,7 +21,6 @@
:members:
```
## parsedmarc.splunk
```{eval-rst}

View File

@@ -33,15 +33,16 @@ and Valimail.
## Features
- Parses draft and 1.0 standard aggregate/rua reports
- Parses forensic/failure/ruf reports
- Parses draft and 1.0 standard aggregate/rua DMARC reports
- Parses forensic/failure/ruf DMARC reports
- Parses reports from SMTP TLS Reporting
- Can parse reports from an inbox over IMAP, Microsoft Graph, or Gmail API
- Transparently handles gzip or zip compressed reports
- Consistent data structures
- Simple JSON and/or CSV output
- Optionally email the results
- Optionally send the results to Elasticsearch/OpenSearch and/or Splunk, for use with
premade dashboards
- Optionally send the results to Elasticsearch, Opensearch, and/or Splunk, for use
with premade dashboards
- Optionally send reports to Apache Kafka
```{toctree}

View File

@@ -120,8 +120,10 @@ The full set of configuration options are:
Elasticsearch, Splunk and/or S3
- `save_smtp_tls` - bool: Save SMTP-STS report data to
Elasticsearch, Splunk and/or S3
- `index_prefix_domain_map` - bool: A path mapping of Opensearch/Elasticsearch index prefixes to domain names
- `strip_attachment_payloads` - bool: Remove attachment
payloads from results
- `silent` - bool: Set this to `False` to output results to STDOUT
- `output` - str: Directory to place JSON and CSV files in. This is required if you set either of the JSON output file options.
- `aggregate_json_filename` - str: filename for the aggregate
JSON output file
@@ -445,6 +447,28 @@ PUT _cluster/settings
Increasing this value increases resource usage.
:::
## Multi-tenant support
Starting in `8.19.0`, ParseDMARC provides multi-tenant support by placing data into separate OpenSearch or Elasticsearch index prefixes. To set this up, create a YAML file that is formatted where each key is a tenant name, and the value is a list of domains related to that tenant, not including subdomains, like this:
```yaml
example:
- example.com
- example.net
- example.org
whalensolutions:
- whalensolutions.com
```
Save it to disk where the user running ParseDMARC can read it, then set `index_prefix_domain_map` to that filepath in the `[general]` section of the ParseDMARC configuration file and do not set an `index_prefix` option in the `[elasticsearch]` or `[opensearch]` sections.
When configured correctly, if ParseDMARC finds that a report is related to a domain in the mapping, the report will be saved in an index name that has the tenant name prefixed to it with a trailing underscore. Then, you can use the security features of Opensearch or the ELK stack to only grant users access to the indexes that they need.
:::{note}
A domain cannot be used in multiple tenant lists. Only the first prefix list that contains the matching domain is used.
:::
## Running parsedmarc as a systemd service
Use systemd to run `parsedmarc` as a service and process reports as

View File

@@ -17,7 +17,7 @@ import zlib
from base64 import b64decode
from collections import OrderedDict
from csv import DictWriter
from datetime import datetime, timedelta
from datetime import datetime, timedelta, timezone
from io import BytesIO, StringIO
from typing import Callable
@@ -1184,7 +1184,7 @@ def parse_report_email(
input_ = input_.decode(encoding="utf8", errors="replace")
msg = mailparser.parse_from_string(input_)
msg_headers = json.loads(msg.headers_json)
date = email.utils.format_datetime(datetime.utcnow())
date = email.utils.format_datetime(datetime.now(timezone.utc))
if "Date" in msg_headers:
date = human_timestamp_to_datetime(msg_headers["Date"])
msg = email.message_from_string(input_)
@@ -1200,12 +1200,16 @@ def parse_report_email(
if "Subject" in msg_headers:
subject = msg_headers["Subject"]
for part in msg.walk():
content_type = part.get_content_type()
content_type = part.get_content_type().lower()
payload = part.get_payload()
if not isinstance(payload, list):
payload = [payload]
payload = payload[0].__str__()
if content_type == "message/feedback-report":
if content_type.startswith("multipart/"):
continue
if content_type == "text/html":
continue
elif content_type == "message/feedback-report":
try:
if "Feedback-Type" in payload:
feedback_report = payload
@@ -1216,13 +1220,12 @@ def parse_report_email(
feedback_report = feedback_report.replace("\\n", "\n")
except (ValueError, TypeError, binascii.Error):
feedback_report = payload
elif content_type == "text/rfc822-headers":
sample = payload
elif content_type == "message/rfc822":
sample = payload
elif content_type == "application/tlsrpt+json":
if "{" not in payload:
if not payload.strip().startswith("{"):
payload = str(b64decode(payload))
smtp_tls_report = parse_smtp_tls_report_json(payload)
return OrderedDict(
@@ -1234,7 +1237,6 @@ def parse_report_email(
return OrderedDict(
[("report_type", "smtp_tls"), ("report", smtp_tls_report)]
)
elif content_type == "text/plain":
if "A message claiming to be from you has failed" in payload:
try:
@@ -1261,13 +1263,14 @@ def parse_report_email(
payload = b64decode(payload)
if payload.startswith(MAGIC_ZIP) or payload.startswith(MAGIC_GZIP):
payload = extract_report(payload)
ns = nameservers
if payload.startswith("{"):
smtp_tls_report = parse_smtp_tls_report_json(payload)
result = OrderedDict(
[("report_type", "smtp_tls"), ("report", smtp_tls_report)]
)
return result
if isinstance(payload, bytes):
payload = payload.decode("utf-8", errors="replace")
if payload.strip().startswith("{"):
smtp_tls_report = parse_smtp_tls_report_json(payload)
result = OrderedDict(
[("report_type", "smtp_tls"), ("report", smtp_tls_report)]
)
elif payload.strip().startswith("<"):
aggregate_report = parse_aggregate_report_xml(
payload,
ip_db_path=ip_db_path,
@@ -1275,25 +1278,24 @@ def parse_report_email(
reverse_dns_map_path=reverse_dns_map_path,
reverse_dns_map_url=reverse_dns_map_url,
offline=offline,
nameservers=ns,
nameservers=nameservers,
timeout=dns_timeout,
keep_alive=keep_alive,
)
result = OrderedDict(
[("report_type", "aggregate"), ("report", aggregate_report)]
)
return result
except (TypeError, ValueError, binascii.Error):
pass
except InvalidAggregateReport as e:
error = (
'Message with subject "{0}" '
"is not a valid "
"aggregate DMARC report: {1}".format(subject, e)
except InvalidDMARCReport:
error = 'Message with subject "{0}" is not a valid DMARC report'.format(
subject
)
raise InvalidDMARCReport(error)
raise ParserError(error)
except Exception as e:
error = 'Unable to parse message with subject "{0}": {1}'.format(
@@ -1604,14 +1606,18 @@ def get_dmarc_reports_from_mailbox(
"Only days and weeks values in 'since' option are \
considered for IMAP conections. Examples: 2d or 1w"
)
since = (datetime.utcnow() - timedelta(minutes=_since)).date()
current_time = datetime.utcnow().date()
since = (datetime.now(timezone.utc) - timedelta(minutes=_since)).date()
current_time = datetime.now(timezone.utc).date()
elif isinstance(connection, MSGraphConnection):
since = (datetime.utcnow() - timedelta(minutes=_since)).isoformat() + "Z"
current_time = datetime.utcnow().isoformat() + "Z"
since = (
datetime.now(timezone.utc) - timedelta(minutes=_since)
).isoformat() + "Z"
current_time = datetime.now(timezone.utc).isoformat() + "Z"
elif isinstance(connection, GmailConnection):
since = (datetime.utcnow() - timedelta(minutes=_since)).strftime("%s")
current_time = datetime.utcnow().strftime("%s")
since = (datetime.now(timezone.utc) - timedelta(minutes=_since)).strftime(
"%s"
)
current_time = datetime.now(timezone.utc).strftime("%s")
else:
pass

View File

@@ -9,6 +9,7 @@ from configparser import ConfigParser
from glob import glob
import logging
import math
import yaml
from collections import OrderedDict
import json
from ssl import CERT_NONE, create_default_context
@@ -46,7 +47,7 @@ from parsedmarc.mail import (
from parsedmarc.mail.graph import AuthMethod
from parsedmarc.log import logger
from parsedmarc.utils import is_mbox, get_reverse_dns
from parsedmarc.utils import is_mbox, get_reverse_dns, get_base_domain
from parsedmarc import SEEN_AGGREGATE_REPORT_IDS
http.client._MAXHEADERS = 200 # pylint:disable=protected-access
@@ -101,8 +102,35 @@ def cli_parse(
def _main():
"""Called when the module is executed"""
def get_index_prefix(report):
if index_prefix_domain_map is None:
return None
if "policy_published" in report:
domain = report["policy_published"]["domain"]
elif "reported_domain" in report:
domain = report("reported_domain")
elif "policies" in report:
domain = report["policies"][0]["domain"]
if domain:
domain = get_base_domain(domain)
for prefix in index_prefix_domain_map:
if domain in index_prefix_domain_map[prefix]:
prefix = (
prefix.lower()
.strip()
.strip("_")
.replace(" ", "_")
.replace("-", "_")
)
prefix = f"{prefix}_"
return prefix
return None
def process_reports(reports_):
output_str = "{0}\n".format(json.dumps(reports_, ensure_ascii=False, indent=2))
indent_value = 2 if opts.prettify_json else None
output_str = "{0}\n".format(
json.dumps(reports_, ensure_ascii=False, indent=indent_value)
)
if not opts.silent:
print(output_str)
@@ -126,7 +154,8 @@ def _main():
elastic.save_aggregate_report_to_elasticsearch(
report,
index_suffix=opts.elasticsearch_index_suffix,
index_prefix=opts.elasticsearch_index_prefix,
index_prefix=opts.elasticsearch_index_prefix
or get_index_prefix(report),
monthly_indexes=opts.elasticsearch_monthly_indexes,
number_of_shards=shards,
number_of_replicas=replicas,
@@ -147,7 +176,8 @@ def _main():
opensearch.save_aggregate_report_to_opensearch(
report,
index_suffix=opts.opensearch_index_suffix,
index_prefix=opts.opensearch_index_prefix,
index_prefix=opts.opensearch_index_prefix
or get_index_prefix(report),
monthly_indexes=opts.opensearch_monthly_indexes,
number_of_shards=shards,
number_of_replicas=replicas,
@@ -189,8 +219,9 @@ def _main():
try:
if opts.webhook_aggregate_url:
indent_value = 2 if opts.prettify_json else None
webhook_client.save_aggregate_report_to_webhook(
json.dumps(report, ensure_ascii=False, indent=2)
json.dumps(report, ensure_ascii=False, indent=indent_value)
)
except Exception as error_:
logger.error("Webhook Error: {0}".format(error_.__str__()))
@@ -212,7 +243,8 @@ def _main():
elastic.save_forensic_report_to_elasticsearch(
report,
index_suffix=opts.elasticsearch_index_suffix,
index_prefix=opts.elasticsearch_index_prefix,
index_prefix=opts.elasticsearch_index_prefix
or get_index_prefix(report),
monthly_indexes=opts.elasticsearch_monthly_indexes,
number_of_shards=shards,
number_of_replicas=replicas,
@@ -231,7 +263,8 @@ def _main():
opensearch.save_forensic_report_to_opensearch(
report,
index_suffix=opts.opensearch_index_suffix,
index_prefix=opts.opensearch_index_prefix,
index_prefix=opts.opensearch_index_prefix
or get_index_prefix(report),
monthly_indexes=opts.opensearch_monthly_indexes,
number_of_shards=shards,
number_of_replicas=replicas,
@@ -271,8 +304,9 @@ def _main():
try:
if opts.webhook_forensic_url:
indent_value = 2 if opts.prettify_json else None
webhook_client.save_forensic_report_to_webhook(
json.dumps(report, ensure_ascii=False, indent=2)
json.dumps(report, ensure_ascii=False, indent=indent_value)
)
except Exception as error_:
logger.error("Webhook Error: {0}".format(error_.__str__()))
@@ -294,7 +328,8 @@ def _main():
elastic.save_smtp_tls_report_to_elasticsearch(
report,
index_suffix=opts.elasticsearch_index_suffix,
index_prefix=opts.elasticsearch_index_prefix,
index_prefix=opts.elasticsearch_index_prefix
or get_index_prefix(report),
monthly_indexes=opts.elasticsearch_monthly_indexes,
number_of_shards=shards,
number_of_replicas=replicas,
@@ -313,7 +348,8 @@ def _main():
opensearch.save_smtp_tls_report_to_opensearch(
report,
index_suffix=opts.opensearch_index_suffix,
index_prefix=opts.opensearch_index_prefix,
index_prefix=opts.opensearch_index_prefix
or get_index_prefix(report),
monthly_indexes=opts.opensearch_monthly_indexes,
number_of_shards=shards,
number_of_replicas=replicas,
@@ -353,8 +389,9 @@ def _main():
try:
if opts.webhook_smtp_tls_url:
indent_value = 2 if opts.prettify_json else None
webhook_client.save_smtp_tls_report_to_webhook(
json.dumps(report, ensure_ascii=False, indent=2)
json.dumps(report, ensure_ascii=False, indent=indent_value)
)
except Exception as error_:
logger.error("Webhook Error: {0}".format(error_.__str__()))
@@ -475,6 +512,12 @@ def _main():
"--debug", action="store_true", help="print debugging information"
)
arg_parser.add_argument("--log-file", default=None, help="output logging to a file")
arg_parser.add_argument(
"--no-prettify-json",
action="store_false",
dest="prettify_json",
help="output JSON in a single line without indentation",
)
arg_parser.add_argument("-v", "--version", action="version", version=__version__)
aggregate_reports = []
@@ -504,6 +547,7 @@ def _main():
dns_timeout=args.dns_timeout,
debug=args.debug,
verbose=args.verbose,
prettify_json=args.prettify_json,
save_aggregate=False,
save_forensic=False,
save_smtp_tls=False,
@@ -625,9 +669,16 @@ def _main():
exit(-1)
opts.silent = True
config = ConfigParser()
index_prefix_domain_map = None
config.read(args.config_file)
if "general" in config.sections():
general_config = config["general"]
if "silent" in general_config:
if general_config["silent"].lower() == "false":
opts.silent = False
if "index_prefix_domain_map" in general_config:
with open(general_config["index_prefix_domain_map"]) as f:
index_prefix_domain_map = yaml.safe_load(f)
if "offline" in general_config:
opts.offline = general_config.getboolean("offline")
if "strip_attachment_payloads" in general_config:
@@ -701,6 +752,8 @@ def _main():
opts.reverse_dns_map_path = general_config["reverse_dns_path"]
if "reverse_dns_map_url" in general_config:
opts.reverse_dns_map_url = general_config["reverse_dns_url"]
if "prettify_json" in general_config:
opts.prettify_json = general_config.getboolean("prettify_json")
if "mailbox" in config.sections():
mailbox_config = config["mailbox"]
@@ -1167,7 +1220,7 @@ def _main():
if "smtp_tls_url" in webhook_config:
opts.webhook_smtp_tls_url = webhook_config["smtp_tls_url"]
if "timeout" in webhook_config:
opts.webhook_timeout = webhook_config["timeout"]
opts.webhook_timeout = webhook_config.getint("timeout")
logger.setLevel(logging.ERROR)
@@ -1586,6 +1639,7 @@ def _main():
username=opts.smtp_user,
password=opts.smtp_password,
subject=opts.smtp_subject,
require_encryption=opts.smtp_ssl,
)
except Exception:
logger.exception("Failed to email results")

View File

@@ -1,2 +1,2 @@
__version__ = "8.18.6"
__version__ = "8.19.1"
USER_AGENT = f"parsedmarc/{__version__}"

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,44 @@
Agriculture
Automotive
Beauty
Conglomerate
Construction
Consulting
Defense
Education
Email Provider
Email Security
Entertainment
Event Planning
Finance
Food
Government
Government Media
Healthcare
ISP
IaaS
Industrial
Legal
Logistics
MSP
MSSP
Manufacturing
Marketing
News
Nonprofit
PaaS
Photography
Physical Security
Print
Publishing
Real Estate
Retail
SaaS
Science
Search Engine
Social Media
Sports
Staffing
Technology
Travel
Web Host

View File

@@ -1,6 +1,5 @@
#!/usr/bin/env python
import logging
import os
import csv
@@ -16,60 +15,48 @@ def _main():
output_rows = []
logging.basicConfig()
logger = logging.getLogger(__name__)
logger.setLevel(logging.INFO)
for p in [
input_csv_file_path,
base_reverse_dns_map_file_path,
known_unknown_list_file_path,
psl_overrides_file_path,
]:
if not os.path.exists(p):
logger.error(f"{p} does not exist")
exit(1)
logger.info(f"Loading {known_unknown_list_file_path}")
known_unknown_domains = []
with open(known_unknown_list_file_path) as f:
for line in f.readlines():
domain = line.lower().strip()
if domain in known_unknown_domains:
logger.warning(
f"{domain} is in {known_unknown_list_file_path} multiple times"
)
else:
known_unknown_domains.append(domain)
logger.info(f"Loading {psl_overrides_file_path}")
psl_overrides = []
with open(psl_overrides_file_path) as f:
for line in f.readlines():
domain = line.lower().strip()
if domain in psl_overrides:
logger.warning(
f"{domain} is in {psl_overrides_file_path} multiple times"
)
else:
psl_overrides.append(domain)
logger.info(f"Loading {base_reverse_dns_map_file_path}")
known_domains = []
output_rows = []
def load_list(file_path, list_var):
if not os.path.exists(file_path):
print(f"Error: {file_path} does not exist")
print(f"Loading {file_path}")
with open(file_path) as f:
for line in f.readlines():
domain = line.lower().strip()
if domain in list_var:
print(f"Error: {domain} is in {file_path} multiple times")
exit(1)
elif domain != "":
list_var.append(domain)
load_list(known_unknown_list_file_path, known_unknown_domains)
load_list(psl_overrides_file_path, psl_overrides)
if not os.path.exists(base_reverse_dns_map_file_path):
print(f"Error: {base_reverse_dns_map_file_path} does not exist")
print(f"Loading {base_reverse_dns_map_file_path}")
with open(base_reverse_dns_map_file_path) as f:
for row in csv.DictReader(f):
domain = row["base_reverse_dns"].lower().strip()
if domain in known_domains:
logger.warning(
f"{domain} is in {base_reverse_dns_map_file_path} multiple times"
print(
f"Error: {domain} is in {base_reverse_dns_map_file_path} multiple times"
)
exit()
else:
known_domains.append(domain)
if domain in known_unknown_domains and known_domains:
pass
logger.warning(
f"{domain} is in {known_unknown_list_file_path} and \
print(
f"Error:{domain} is in {known_unknown_list_file_path} and \
{base_reverse_dns_map_file_path}"
)
logger.info(f"Checking domains against {base_reverse_dns_map_file_path}")
exit(1)
if not os.path.exists(input_csv_file_path):
print(f"Error: {base_reverse_dns_map_file_path} does not exist")
exit(1)
with open(input_csv_file_path) as f:
for row in csv.DictReader(f):
domain = row["source_name"].lower().strip()
@@ -77,12 +64,12 @@ def _main():
continue
for psl_domain in psl_overrides:
if domain.endswith(psl_domain):
domain = psl_domain
domain = psl_domain.strip(".").strip("-")
break
if domain not in known_domains and domain not in known_unknown_domains:
logger.info(f"New unknown domain found: {domain}")
print(f"New unknown domain found: {domain}")
output_rows.append(row)
logger.info(f"Writing {output_csv_file_path}")
print(f"Writing {output_csv_file_path}")
with open(output_csv_file_path, "w") as f:
writer = csv.DictWriter(f, fieldnames=csv_headers)
writer.writeheader()

View File

@@ -1,10 +1,15 @@
185.in-addr.arpa
190.in-addr.arpa
200.in-addr.arpa
1jli.site
26.107
444qcuhilla.com
4xr1.com
9services.com
a7e.ru
a94434500-blog.com
aams8.jp
abv-10.top
acemail.co.in
activaicon.com
adcritic.net
adlucrumnewsletter.com
admin.corpivensa.gob.ve
advantageiq.com
@@ -15,6 +20,11 @@ aghories.com
ai270.net
albagroup-eg.com
alchemy.net
alohabeachcamp.net
alsiscad.com
aluminumpipetubing.com
americanstorageca.com
amplusserver.info
anchorfundhub.com
anglishment.com
anteldata.net.uy
@@ -26,98 +36,186 @@ aosau.net
arandomserver.com
aransk.ru
ardcs.cn
armninl.met
as29550.net
asahachimaru.com
aserv.co.za
asmecam.it
ateky.net.br
aurelienvos.com
automatech.lat
avistaadvantage.com
b8sales.com
bahjs.com
baliaura.com
banaras.co
bearandbullmarketnews.com
bestinvestingtime.com
bhjui.com
biocorp.com
biosophy.net
bitter-echo.com
bizhostingservices.com
blguss.com
bluenet.ch
bluhosting.com
bnasg.com
bodiax.pp.ua
bost-law.com
brainity.com
brazalnde.net
brellatransplc.shop
brnonet.cz
broadwaycover.com
brushinglegal.de
brw.net
btes.tv
budgeteasehub.com
buoytoys.com
buyjapanese.jp
c53dw7m24rj.com
cahtelrandom.org
casadelmarsamara.com
cashflowmasterypro.com
cavabeen.com
cbti.net
centralmalaysia.com
chauffeurplan.co.uk
checkpox.fun
chegouseuvlache.org
chinaxingyu.xyz
christus.mx
churchills.market
ci-xyz.fit
cisumrecords.com
ckaik.cn
clcktoact.com
cli-eurosignal.cz
cloud-admin.it
cloud-edm.com
cloudaccess.net
cloudflare-email.org
cloudhosting.rs
cloudlogin.co
cloudplatformpro.com
cnode.io
cntcloud.com
code-it.net
codefriend.top
colombiaceropapel.org
commerceinsurance.com
comsharempc.com
conexiona.com
coolblaze.com
coowo.com
corpemail.net
cp2-myorderbox.com
cps.com.ar
crnagora.net
cross-d-bar-troutranch.com
ctla.co.kr
cumbalikonakhotel.com
currencyexconverter.com
daakbabu.com
daikinmae.com
dairyvalley.com.my
dastans.ru
datahost36.de
ddii.network
deep-sek.shop
deetownsounds.com
descarca-counter-strike.net
detrot.xyz
dettlaffinc.com
dextoolse.net
digestivedaily.com
digi.net.my
dinofelis.cn
diwkyncbi.top
dkginternet.com
dnexpress.info
dns-oid.com
dnsindia.net
domainserver.ne.jp
domconfig.com
doorsrv.com
dreampox.fun
dreamtechmedia.com
ds.network
dss-group.net
dvj.theworkpc.com
dwlcka.com
dynamic-wiretel.in
dyntcorp.com
easternkingspei.com
economiceagles.com
egosimail.com
eliotporterphotos.us
emailgids.net
emailperegrine.com
entendercopilot.com
entretothom.net
epaycontrol.com
epicinvestmentsreview.co
epicinvestmentsreview.com
epik.com
epsilon-group.com
erestaff.com
euro-trade-gmbh.com
example.com
exposervers.com-new
extendcp.co.uk
eyecandyhosting.xyz
fastwebnet.it
fd9ing7wfn.com
feipnghardware.com
fetscorp.shop
fewo-usedom.net
fin-crime.com
financeaimpoint.com
financeupward.com
firmflat.com
flex-video.bnr.la
flourishfusionlife.com
formicidaehunt.net
fosterheap.com
fredi.shop
frontiernet.net
ftifb7tk3c.com
gamersprotectionvpn.online
gendns.com
getgreencardsfast.com
getthatroi.com
gibbshosting.com
gigidea.net
giize.com
ginous.eu.com
gis.net
gist-th.com
globalglennpartners.com
goldsboroughplace.com
gophermedia.com
gqlists.us.com
gratzl.de
greatestworldnews.com
greennutritioncare.com
gsbb.com
gumbolimbo.net
h-serv.co.uk
haedefpartners.com
halcyon-aboveboard.com
hanzubon.org
healthfuljourneyjoy.com
hgnbroken.us.com
highwey-diesel.com
hirofactory.com
hjd.asso.fr
hongchenggco.pro
hongkongtaxi.co
hopsinthehanger.com
hosted-by-worldstream.net
hostelsucre.com
hosting1337.com
hostinghane.com
hostinglotus.cloud
hostingmichigan.com
hostiran.name
@@ -125,8 +223,11 @@ hostmnl.com
hostname.localhost
hostnetwork.com
hosts.net.nz
hostserv.eu
hostwhitelabel.com
hpms1.jp
hunariojmk.net
hunriokinmuim.net
hypericine.com
i-mecca.net
iaasdns.com
@@ -134,36 +235,88 @@ iam.net.ma
iconmarketingguy.com
idcfcloud.net
idealconcept.live
igmohji.com
igppevents.org.uk
ihglobaldns.com
ilmessicano.com
imjtmn.cn
immenzaces.com
in-addr-arpa
in-addr.arpa
indsalelimited.com
indulgent-holistic.com
industechint.org
inshaaegypt.com
intal.uz
interfarma.kz
intocpanel.com
ip-147-135-108.us
ip-178-33-109.eu
ip-ptr.tech
iswhatpercent.com
itsidc.com
itwebs.com
iuon.net
ivol.co
jalanet.co.id
jimishare.com
jlccptt.net.cn
jlenterprises.co.uk
jmontalto.com
joyomokei.com
jumanra.org
justlongshirts.com
kahlaa.com
kaw.theworkpc.com
kbronet.com.tw
kdnursing.org
kielnet.net
kihy.theworkpc.com
kingschurchwirral.org
kitchenaildbd.com
klaomi.shop
knkconsult.net
kohshikai.com
krhfund.org
krillaglass.com
lancorhomes.com
landpedia.org
lanzatuseo.es
layerdns.cloud
learninglinked.com
legenditds.com
levertechcentre.com
lhost.no
lideri.net.br
lighthouse-media.com
lightpath.net
limogesporcelainboxes.com
lindsaywalt.net
linuxsunucum.com
listertermoformadoa.com
llsend.com
local.net
lohkal.com
londionrtim.net
lonestarmm.net
longmarquis.com
longwoodmgmt.com
lse.kz
lunvoy.com
luxarpro.ru
lwl-puehringer.at
lynx.net.lb
lyse.net
m-sender.com.ua
maggiolicloud.it
magnetmail.net
magnumgo.uz
maia11.com
mail-fire.com
mailsentinel.net
mailset.cn
malardino.net
managed-vps.net
manhattanbulletpoint.com
manpowerservices.com
marketmysterycode.com
@@ -173,8 +326,23 @@ matroguel.cam
maximpactipo.com
mechanicalwalk.store
mediavobis.com
meqlobal.com
mgts.by
migrans.net
miixta.com
milleniumsrv.com
mindworksunlimited.com
mirth-gale.com
misorpresa.com
mitomobile.com
mitsubachi-kibako.net
mjinn.com
mkegs.shop
mobius.fr
model-ac.ink
moderntradingnews.com
monnaiegroup.com
monopolizeright.com
moonjaws.com
morningnewscatcher.com
motion4ever.net
@@ -182,122 +350,245 @@ mschosting.com
msdp1.com
mspnet.pro
mts-nn.ru
multifamilydesign.com
mxserver.ro
mxthunder.net
my-ihor.ru
mycloudmailbox.com
myfriendforum.com
myrewards.net
mysagestore.com
mysecurewebserver.com
myshanet.net
myvps.jp
mywedsite.net
mywic.eu
name.tools
nanshenqfurniture.com
nask.pl
navertise.net
ncbb.kz
ncport.ru
ncsdi.ws
nebdig.com
neovet-base.ru
netbri.com
netcentertelecom.net.br
neti.ee
netkl.org
newinvestingguide.com
newwallstreetcode.com
ngvcv.cn
nic.name
nidix.net
nieuwedagnetwerk.net
nlscanme.com
nmeuh.cn
noisndametal.com
nucleusemail.com
nutriboostlife.com
nwo.giize.com
nwwhalewatchers.org
ny.adsl
nyt1.com
offerslatedeals.com
office365.us
ogicom.net
olivettilexikon.co.uk
omegabrasil.inf.br
onnet21.com
onumubunumu.com
oppt-ac.fit
orbitel.net.co
orfsurface.com
orientalspot.com
outsidences.com
ovaltinalization.co
overta.ru
ox28vgrurc.com
pamulang.net
panaltyspot.space
panolacountysheriffms.com
passionatesmiles.com
paulinelam.com
pdi-corp.com
peloquinbeck.com
perimetercenter.net
permanentscreen.com
permasteellisagroup.com
perumkijhyu.net
pesnia.com.ua
ph8ltwdi12o.com
pharmada.com.de
phdns3.es
pigelixval1.com
pipefittingsindia.com
planethoster.net
playamedia.io
plesk.page
pmnhost.net
pokiloandhu.net
pokupki5.ru
polandi.net
popiup.com
ports.net
posolstvostilya.com
potia.net
prima.com.ar
prima.net.ar
profsol.co.uk
prohealthmotion.com
promooffermarket.site
proudserver.com
proxado.com
psnm.ru
pvcwindowsprices.live
qontenciplc.autos
quakeclick.com
quasarstate.store
quatthonggiotico.com
qxyxab44njd.com
radianthealthrenaissance.com
rapidns.com
raxa.host
reberte.com
reethvikintl.com
regruhosting.ru
reliablepanel.com
rgb365.eu
riddlecamera.net
riddletrends.com
roccopugliese.com
runnin-rebels.com
rupar.puglia.it
rwdhosting.ca
s500host.com
sageevents.co.ke
sahacker-2020.com
samsales.site
sante-lorraine.fr
saransk.ru
satirogluet.com
scioncontacts.com
sdcc.my
seaspraymta3.net
secorp.mx
securen.net
securerelay.in
securev.net
seductiveeyes.com
seizethedayconsulting.com
serroplast.shop
server290.com
server342.com
server3559.cc
servershost.biz
sfek.kz
sgnetway.net
shopfox.ca
silvestrejaguar.sbs
silvestreonca.sbs
simplediagnostics.org
siriuscloud.jp
sisglobalresearch.com
sixpacklink.net
sjestyle.com
smallvillages.com
smartape-vps.com
solusoftware.com
sourcedns.com
southcoastwebhosting12.com
specialtvvs.com
spiritualtechnologies.io
sprout.org
srv.cat
stableserver.net
statlerfa.co.uk
stock-smtp.top
stockepictigers.com
stockexchangejournal.com
subterranean-concave.com
suksangroup.com
swissbluetopaz.com
switer.shop
sysop4.com
system.eu.com
szhongbing.com
t-jon.com
tacaindo.net
tacom.tj
tankertelz.co
tataidc.com
teamveiw.com
tecnoxia.net
tel-xyz.fit
tenkids.net
terminavalley.com
thaicloudsolutions.com
thaikinghost.com
thaimonster.com
thegermainetruth.net
thehandmaderose.com
thepushcase.com
ticdns.com
tigo.bo
toledofibra.net.br
topdns.com
totaal.net
totalplay.net
tqh.ro
traderlearningcenter.com
tradeukraine.site
traveleza.com
trwww.com
tsuzakij.com
tullostrucking.com
turbinetrends.com
twincitiesdistinctivehomes.com
tylerfordonline.com
uiyum.com
ultragate.com
uneedacollie.com
unified.services
unite.services
urawasl.com
us.servername.us
vagebond.net
varvia.de
vbcploo.com
vdc.vn
vendimetry.com
vibrantwellnesscorp.com
virtualine.org
visit.docotor
viviotech.us
vlflgl.com
volganet.ru
vrns.net
vulterdi.edu
vvondertex.com
wallstreetsgossip.com
wamego.net
wanekoohost.com
wealthexpertisepro.com
web-login.eu
weblinkinternational.com
webnox.io
websale.net
welllivinghive.com
westparkcom.com
wetransfer-eu.com
wheelch.me
whoflew.com
whpservers.com
wisdomhard.com
wisewealthcircle.com
wisvis.com
wodeniowa.com
wordpresshosting.xyz
wsiph2.com
xnt.mx
xodiax.com
xpnuf.cn
xsfati.us.com
xspmail.jp
@@ -305,5 +596,6 @@ yourciviccompass.com
yourinvestworkbook.com
yoursitesecure.net
zerowebhosting.net
zmml.uk
znlc.jp
ztomy.com

View File

@@ -1,6 +1,23 @@
akura.ne.jp
amazonaws.com
cloudaccess.net
h-serv.co.uk
linode.com
plesk.page
-applefibernet.com
-c3.net.pl
-celsiainternet.com
-clientes-izzi.mx
-clientes-zap-izzi.mx
-imnet.com.br
-mcnbd.com
-smile.com.bd
-tataidc.co.in
-veloxfiber.com.br
-wconect.com.br
.amazonaws.com
.cloudaccess.net
.ddnsgeek.com
.fastvps-server.com
.in-addr-arpa
.in-addr.arpa
.kasserver.com
.kinghost.net
.linode.com
.linodeusercontent.com
.na4u.ru
.sakura.ne.jp

View File

@@ -0,0 +1,184 @@
#!/usr/bin/env python3
from __future__ import annotations
import os
import csv
from pathlib import Path
from typing import Mapping, Iterable, Optional, Collection, Union, List, Dict
class CSVValidationError(Exception):
def __init__(self, errors: list[str]):
super().__init__("\n".join(errors))
self.errors = errors
def sort_csv(
filepath: Union[str, Path],
field: str,
*,
sort_field_value_must_be_unique: bool = True,
strip_whitespace: bool = True,
fields_to_lowercase: Optional[Iterable[str]] = None,
case_insensitive_sort: bool = False,
required_fields: Optional[Iterable[str]] = None,
allowed_values: Optional[Mapping[str, Collection[str]]] = None,
) -> List[Dict[str, str]]:
"""
Read a CSV, optionally normalize rows (strip whitespace, lowercase certain fields),
validate field values, and write the sorted CSV back to the same path.
- filepath: Path to the CSV to sort.
- field: The field name to sort by.
- fields_to_lowercase: Permanently lowercases these field(s) in the data.
- strip_whitespace: Remove all whitespace at the beginning and of field values.
- case_insensitive_sort: Ignore case when sorting without changing values.
- required_fields: A list of fields that must have data in all rows.
- allowed_values: A mapping of allowed values for fields.
"""
path = Path(filepath)
required_fields = set(required_fields or [])
lower_set = set(fields_to_lowercase or [])
allowed_sets = {k: set(v) for k, v in (allowed_values or {}).items()}
if sort_field_value_must_be_unique:
seen_sort_field_values = []
with path.open("r", newline="") as infile:
reader = csv.DictReader(infile)
fieldnames = reader.fieldnames or []
if field not in fieldnames:
raise CSVValidationError([f"Missing sort column: {field!r}"])
missing_headers = required_fields - set(fieldnames)
if missing_headers:
raise CSVValidationError(
[f"Missing required header(s): {sorted(missing_headers)}"]
)
rows = list(reader)
def normalize_row(row: Dict[str, str]) -> None:
if strip_whitespace:
for k, v in row.items():
if isinstance(v, str):
row[k] = v.strip()
for fld in lower_set:
if fld in row and isinstance(row[fld], str):
row[fld] = row[fld].lower()
def validate_row(
row: Dict[str, str], sort_field: str, line_no: int, errors: list[str]
) -> None:
if sort_field_value_must_be_unique:
if row[sort_field] in seen_sort_field_values:
errors.append(f"Line {line_no}: Duplicate row for '{row[sort_field]}'")
else:
seen_sort_field_values.append(row[sort_field])
for rf in required_fields:
val = row.get(rf)
if val is None or val == "":
errors.append(
f"Line {line_no}: Missing value for required field '{rf}'"
)
for field, allowed_values in allowed_sets.items():
if field in row:
val = row[field]
if val not in allowed_values:
errors.append(
f"Line {line_no}: '{val}' is not an allowed value for '{field}' "
f"(allowed: {sorted(allowed_values)})"
)
errors: list[str] = []
for idx, row in enumerate(rows, start=2): # header is line 1
normalize_row(row)
validate_row(row, field, idx, errors)
if errors:
raise CSVValidationError(errors)
def sort_key(r: Dict[str, str]):
v = r.get(field, "")
if isinstance(v, str) and case_insensitive_sort:
return v.casefold()
return v
rows.sort(key=sort_key)
with open(filepath, "w", newline="") as outfile:
writer = csv.DictWriter(outfile, fieldnames=fieldnames)
writer.writeheader()
writer.writerows(rows)
def sort_list_file(
filepath: Union[str, Path],
*,
lowercase: bool = True,
strip: bool = True,
deduplicate: bool = True,
remove_blank_lines: bool = True,
ending_newline: bool = True,
newline: Optional[str] = "\n",
):
"""Read a list from a file, sort it, optionally strip and deduplicate the values,
then write that list back to the file.
- Filepath: The path to the file.
- lowercase: Lowercase all values prior to sorting.
- remove_blank_lines: Remove any plank lines.
- ending_newline: End the file with a newline, even if remove_blank_lines is true.
- newline: The newline character to use.
"""
with open(filepath, mode="r", newline=newline) as infile:
lines = infile.readlines()
for i in range(len(lines)):
if lowercase:
lines[i] = lines[i].lower()
if strip:
lines[i] = lines[i].strip()
if deduplicate:
lines = list(set(lines))
if remove_blank_lines:
while "" in lines:
lines.remove("")
lines = sorted(lines)
if ending_newline:
if lines[-1] != "":
lines.append("")
with open(filepath, mode="w", newline=newline) as outfile:
outfile.write("\n".join(lines))
def _main():
map_file = "base_reverse_dns_map.csv"
map_key = "base_reverse_dns"
list_files = ["known_unknown_base_reverse_dns.txt", "psl_overrides.txt"]
types_file = "base_reverse_dns_types.txt"
with open(types_file) as f:
types = f.readlines()
while "" in types:
types.remove("")
map_allowed_values = {"Type": types}
for list_file in list_files:
if not os.path.exists(list_file):
print(f"Error: {list_file} does not exist")
exit(1)
sort_list_file(list_file)
if not os.path.exists(types_file):
print(f"Error: {types_file} does not exist")
exit(1)
sort_list_file(types_file, lowercase=False)
if not os.path.exists(map_file):
print(f"Error: {map_file} does not exist")
exit(1)
try:
sort_csv(map_file, map_key, allowed_values=map_allowed_values)
except CSVValidationError as e:
print(f"{map_file} did not validate: {e}")
if __name__ == "__main__":
_main()

View File

@@ -1,59 +0,0 @@
#!/usr/bin/env python3
import os
import csv
maps_dir = os.path.join(".")
map_files = ["base_reverse_dns_map.csv"]
list_files = ["known_unknown_base_reverse_dns.txt", "psl_overrides.txt"]
def sort_csv(filepath, column=0):
with open(filepath, mode="r", newline="") as infile:
reader = csv.reader(infile)
header = next(reader)
sorted_rows = sorted(reader, key=lambda row: row[column])
existing_values = []
for row in sorted_rows:
if row[column] in existing_values:
print(f"Warning: {row[column]} is in {filepath} multiple times")
with open(filepath, mode="w", newline="\n") as outfile:
writer = csv.writer(outfile)
writer.writerow(header)
writer.writerows(sorted_rows)
def sort_list_file(
filepath,
lowercase=True,
strip=True,
deduplicate=True,
remove_blank_lines=True,
ending_newline=True,
newline="\n",
):
with open(filepath, mode="r", newline=newline) as infile:
lines = infile.readlines()
for i in range(len(lines)):
if lowercase:
lines[i] = lines[i].lower()
if strip:
lines[i] = lines[i].strip()
if deduplicate:
lines = list(set(lines))
if remove_blank_lines:
while "" in lines:
lines.remove("")
lines = sorted(lines)
if ending_newline:
if lines[-1] != "":
lines.append("")
with open(filepath, mode="w", newline=newline) as outfile:
outfile.write("\n".join(lines))
for csv_file in map_files:
sort_csv(os.path.join(maps_dir, csv_file))
for list_file in list_files:
sort_list_file(os.path.join(maps_dir, list_file))

View File

@@ -44,6 +44,12 @@ parenthesis_regex = re.compile(r"\s*\(.*\)\s*")
null_file = open(os.devnull, "w")
mailparser_logger = logging.getLogger("mailparser")
mailparser_logger.setLevel(logging.CRITICAL)
psl = publicsuffixlist.PublicSuffixList()
psl_overrides_path = str(files(parsedmarc.resources.maps).joinpath("psl_overrides.txt"))
with open(psl_overrides_path) as f:
psl_overrides = [line.rstrip() for line in f.readlines()]
while "" in psl_overrides:
psl_overrides.remove("")
class EmailParserError(RuntimeError):
@@ -78,7 +84,8 @@ def get_base_domain(domain):
.. note::
Results are based on a list of public domain suffixes at
https://publicsuffix.org/list/public_suffix_list.dat.
https://publicsuffix.org/list/public_suffix_list.dat and overrides included in
parsedmarc.resources.maps.psl_overrides.txt
Args:
domain (str): A domain or subdomain
@@ -87,8 +94,12 @@ def get_base_domain(domain):
str: The base domain of the given domain
"""
psl = publicsuffixlist.PublicSuffixList()
return psl.privatesuffix(domain)
domain = domain.lower()
publicsuffix = psl.privatesuffix(domain)
for override in psl_overrides:
if domain.endswith(override):
return override.strip(".").strip("-")
return publicsuffix
def query_dns(domain, record_type, cache=None, nameservers=None, timeout=2.0):

View File

@@ -55,6 +55,7 @@ dependencies = [
"tqdm>=4.31.1",
"urllib3>=1.25.7",
"xmltodict>=0.12.0",
"PyYAML>=6.0.3"
]
[project.optional-dependencies]

View File

@@ -43,11 +43,12 @@ class Test(unittest.TestCase):
def testExtractReportXMLComparator(self):
"""Test XML comparator function"""
print()
xmlnice = open("samples/extract_report/nice-input.xml").read()
print(xmlnice)
xmlchanged = minify_xml(open("samples/extract_report/changed-input.xml").read())
print(xmlchanged)
xmlnice_file = open("samples/extract_report/nice-input.xml")
xmlnice = xmlnice_file.read()
xmlnice_file.close()
xmlchanged_file = open("samples/extract_report/changed-input.xml")
xmlchanged = minify_xml(xmlchanged_file.read())
xmlchanged_file.close()
self.assertTrue(compare_xml(xmlnice, xmlnice))
self.assertTrue(compare_xml(xmlchanged, xmlchanged))
self.assertFalse(compare_xml(xmlnice, xmlchanged))
@@ -62,7 +63,9 @@ class Test(unittest.TestCase):
data = f.read()
print("Testing {0}: ".format(file), end="")
xmlout = parsedmarc.extract_report(data)
xmlin = open("samples/extract_report/nice-input.xml").read()
xmlin_file = open("samples/extract_report/nice-input.xml")
xmlin = xmlin_file.read()
xmlin_file.close()
self.assertTrue(compare_xml(xmlout, xmlin))
print("Passed!")
@@ -72,7 +75,9 @@ class Test(unittest.TestCase):
file = "samples/extract_report/nice-input.xml"
print("Testing {0}: ".format(file), end="")
xmlout = parsedmarc.extract_report(file)
xmlin = open("samples/extract_report/nice-input.xml").read()
xmlin_file = open("samples/extract_report/nice-input.xml")
xmlin = xmlin_file.read()
xmlin_file.close()
self.assertTrue(compare_xml(xmlout, xmlin))
print("Passed!")
@@ -82,7 +87,9 @@ class Test(unittest.TestCase):
file = "samples/extract_report/nice-input.xml.gz"
print("Testing {0}: ".format(file), end="")
xmlout = parsedmarc.extract_report_from_file_path(file)
xmlin = open("samples/extract_report/nice-input.xml").read()
xmlin_file = open("samples/extract_report/nice-input.xml")
xmlin = xmlin_file.read()
xmlin_file.close()
self.assertTrue(compare_xml(xmlout, xmlin))
print("Passed!")
@@ -92,12 +99,13 @@ class Test(unittest.TestCase):
file = "samples/extract_report/nice-input.xml.zip"
print("Testing {0}: ".format(file), end="")
xmlout = parsedmarc.extract_report_from_file_path(file)
print(xmlout)
xmlin = minify_xml(open("samples/extract_report/nice-input.xml").read())
print(xmlin)
xmlin_file = open("samples/extract_report/nice-input.xml")
xmlin = minify_xml(xmlin_file.read())
xmlin_file.close()
self.assertTrue(compare_xml(xmlout, xmlin))
xmlin = minify_xml(open("samples/extract_report/changed-input.xml").read())
print(xmlin)
xmlin_file = open("samples/extract_report/changed-input.xml")
xmlin = xmlin_file.read()
xmlin_file.close()
self.assertFalse(compare_xml(xmlout, xmlin))
print("Passed!")