mirror of
https://github.com/domainaware/parsedmarc.git
synced 2026-02-18 15:36:24 +00:00
Compare commits
26 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
2b7ae50a27 | ||
|
|
3feb478793 | ||
|
|
01630bb61c | ||
|
|
39347cb244 | ||
|
|
ed25526d59 | ||
|
|
880d7110fe | ||
|
|
d62001f5a4 | ||
|
|
0720bffcb6 | ||
|
|
fecd55a97d | ||
|
|
a121306eed | ||
|
|
980c9c7904 | ||
|
|
963f5d796f | ||
|
|
6532f3571b | ||
|
|
ea878443a8 | ||
|
|
9f6de41958 | ||
|
|
119192701c | ||
|
|
1d650be48a | ||
|
|
a85553fb18 | ||
|
|
5975d8eb21 | ||
|
|
87ae6175f2 | ||
|
|
68b93ed580 | ||
|
|
55508b513b | ||
|
|
71511c0cfc | ||
|
|
7c45812284 | ||
|
|
607a091a5f | ||
|
|
c308bf938c |
1
.gitignore
vendored
1
.gitignore
vendored
@@ -142,3 +142,4 @@ scratch.py
|
||||
|
||||
parsedmarc/resources/maps/base_reverse_dns.csv
|
||||
parsedmarc/resources/maps/unknown_base_reverse_dns.csv
|
||||
*.bak
|
||||
|
||||
11
.vscode/settings.json
vendored
11
.vscode/settings.json
vendored
@@ -13,6 +13,7 @@
|
||||
"automodule",
|
||||
"backported",
|
||||
"bellsouth",
|
||||
"boto",
|
||||
"brakhane",
|
||||
"Brightmail",
|
||||
"CEST",
|
||||
@@ -36,6 +37,7 @@
|
||||
"expiringdict",
|
||||
"fieldlist",
|
||||
"genindex",
|
||||
"geoip",
|
||||
"geoipupdate",
|
||||
"Geolite",
|
||||
"geolocation",
|
||||
@@ -44,7 +46,10 @@
|
||||
"hostnames",
|
||||
"htpasswd",
|
||||
"httpasswd",
|
||||
"httplib",
|
||||
"IMAP",
|
||||
"imapclient",
|
||||
"infile",
|
||||
"Interaktive",
|
||||
"IPDB",
|
||||
"journalctl",
|
||||
@@ -80,14 +85,18 @@
|
||||
"nosecureimap",
|
||||
"nosniff",
|
||||
"nwettbewerb",
|
||||
"opensearch",
|
||||
"parsedmarc",
|
||||
"passsword",
|
||||
"Postorius",
|
||||
"premade",
|
||||
"procs",
|
||||
"publicsuffix",
|
||||
"publicsuffixlist",
|
||||
"publixsuffix",
|
||||
"pygelf",
|
||||
"pypy",
|
||||
"pytest",
|
||||
"quickstart",
|
||||
"Reindex",
|
||||
"replyto",
|
||||
@@ -95,10 +104,12 @@
|
||||
"Rollup",
|
||||
"Rpdm",
|
||||
"SAMEORIGIN",
|
||||
"sdist",
|
||||
"Servernameone",
|
||||
"setuptools",
|
||||
"smartquotes",
|
||||
"SMTPTLS",
|
||||
"sortmaps",
|
||||
"sourcetype",
|
||||
"STARTTLS",
|
||||
"tasklist",
|
||||
|
||||
28
CHANGELOG.md
28
CHANGELOG.md
@@ -1,6 +1,32 @@
|
||||
Changelog
|
||||
=========
|
||||
|
||||
8.18.6
|
||||
------
|
||||
|
||||
- Fix since option to correctly work with weeks (PR #604)
|
||||
- Add 183 entries to `base_reverse_dns_map.csv`
|
||||
- Add 57 entries to `known_unknown_base_reverse_dns.txt`
|
||||
- Check for invalid UTF-8 bytes in `base_reverse_dns_map.csv` at build
|
||||
- Exclude unneeded items from the `parsedmarc.resources` module at build
|
||||
|
||||
8.18.5
|
||||
------
|
||||
|
||||
- Fix CSV download
|
||||
|
||||
8.18.4
|
||||
------
|
||||
|
||||
- Fix webhooks
|
||||
|
||||
8.18.3
|
||||
------
|
||||
|
||||
- Move `__version__` to `parsedmarc.constants`
|
||||
- Create a constant `USER_AGENT`
|
||||
- Use the HTTP `User-Agent` header value `parsedmarc/version` for all HTTP requests
|
||||
|
||||
8.18.2
|
||||
------
|
||||
|
||||
@@ -676,7 +702,7 @@ in the ``elasticsearch`` configuration file section (closes issue #78)
|
||||
-----
|
||||
|
||||
- Add filename and line number to logging output
|
||||
- Improved IMAP error handling
|
||||
- Improved IMAP error handling
|
||||
- Add CLI options
|
||||
|
||||
```text
|
||||
|
||||
9
build.sh
9
build.sh
@@ -18,8 +18,11 @@ if [ -d "./../parsedmarc-docs" ]; then
|
||||
cp -rf build/html/* ../../parsedmarc-docs/
|
||||
fi
|
||||
cd ..
|
||||
sort -o "parsedmarc/resources/maps/known_unknown_base_reverse_dns.txt" "parsedmarc/resources/maps/known_unknown_base_reverse_dns.txt"
|
||||
./sortmaps.py
|
||||
cd parsedmarc/resources/maps
|
||||
python3 sortmaps.py
|
||||
echo "Checking for invalid UTF-8 bytes in base_reverse_dns_map.csv"
|
||||
python3 find_bad_utf8.py base_reverse_dns_map.csv
|
||||
cd ../../..
|
||||
python3 tests.py
|
||||
rm -rf dist/ build/
|
||||
hatch build
|
||||
hatch build
|
||||
|
||||
@@ -369,7 +369,7 @@ The full set of configuration options are:
|
||||
- `mode` - str: The GELF transport type to use. Valid modes: `tcp`, `udp`, `tls`
|
||||
|
||||
- `maildir`
|
||||
- `reports_folder` - str: Full path for mailbox maidir location (Default: `INBOX`)
|
||||
- `maildir_path` - str: Full path for mailbox maidir location (Default: `INBOX`)
|
||||
- `maildir_create` - bool: Create maildir if not present (Default: False)
|
||||
|
||||
- `webhook` - Post the individual reports to a webhook url with the report as the JSON body
|
||||
|
||||
@@ -34,12 +34,13 @@ from parsedmarc.mail import (
|
||||
MSGraphConnection,
|
||||
GmailConnection,
|
||||
)
|
||||
|
||||
from parsedmarc.constants import __version__
|
||||
from parsedmarc.utils import get_base_domain, get_ip_address_info
|
||||
from parsedmarc.utils import is_outlook_msg, convert_outlook_msg
|
||||
from parsedmarc.utils import parse_email
|
||||
from parsedmarc.utils import timestamp_to_human, human_timestamp_to_datetime
|
||||
|
||||
__version__ = "8.18.2"
|
||||
|
||||
logger.debug("parsedmarc v{0}".format(__version__))
|
||||
|
||||
@@ -1579,7 +1580,7 @@ def get_dmarc_reports_from_mailbox(
|
||||
|
||||
if since:
|
||||
_since = 1440 # default one day
|
||||
if re.match(r"\d+[mhd]$", since):
|
||||
if re.match(r"\d+[mhdw]$", since):
|
||||
s = re.split(r"(\d+)", since)
|
||||
if s[2] == "m":
|
||||
_since = int(s[1])
|
||||
|
||||
2
parsedmarc/constants.py
Normal file
2
parsedmarc/constants.py
Normal file
@@ -0,0 +1,2 @@
|
||||
__version__ = "8.18.6"
|
||||
USER_AGENT = f"parsedmarc/{__version__}"
|
||||
@@ -3,6 +3,8 @@
|
||||
A mapping is meant to make it easier to identify who or what a sending source is. Please consider contributing
|
||||
additional mappings in a GitHub Pull Request.
|
||||
|
||||
Do not open these CSV files in Excel. It will replace Unicode characters with question marks. Use LibreOffice Calc instead.
|
||||
|
||||
## base_reverse_dns_map.csv
|
||||
|
||||
A CSV file with three fields: `base_reverse_dns`, `name`, and `type`.
|
||||
@@ -25,6 +27,7 @@ The `service_type` is based on the following rule precedence:
|
||||
- Agriculture
|
||||
- Automotive
|
||||
- Beauty
|
||||
- Conglomerate
|
||||
- Construction
|
||||
- Consulting
|
||||
- Defense
|
||||
@@ -41,6 +44,7 @@ The `service_type` is based on the following rule precedence:
|
||||
- IaaS
|
||||
- Industrial
|
||||
- ISP
|
||||
- Legal
|
||||
- Logistics
|
||||
- Manufacturing
|
||||
- Marketing
|
||||
@@ -50,6 +54,7 @@ The `service_type` is based on the following rule precedence:
|
||||
- Nonprofit
|
||||
- PaaS
|
||||
- Photography
|
||||
- Physical Security
|
||||
- Print
|
||||
- Publishing
|
||||
- Real Estate
|
||||
@@ -72,12 +77,16 @@ A list of reverse DNS base domains that could not be identified as belonging to
|
||||
|
||||
## base_reverse_dns.csv
|
||||
|
||||
A CSV with the fields `source_name` and optionally `message_countcount`. This CSV can be generated byy exporting the base DNS data from the Kibana on Splunk dashboards provided by parsedmarc. This file is not tracked by Git.
|
||||
A CSV with the fields `source_name` and optionally `message_count`. This CSV can be generated by exporting the base DNS data from the Kibana or Splunk dashboards provided by parsedmarc. This file is not tracked by Git.
|
||||
|
||||
## unknown_base_reverse_dns.csv
|
||||
|
||||
A CSV file with the fields `source_name` and `message_count`. This file is not tracked by Git.
|
||||
|
||||
## find_bad_utf8.py
|
||||
|
||||
Locates invalid UTF-8 bytes in files and optionally tries to current them. Generated by GPT5. Helped me find where I had introduced invalid bytes in `base_reverse_dns_map.csv`.
|
||||
|
||||
## find_unknown_base_reverse_dns.py
|
||||
|
||||
This is a python script that reads the domains in `base_reverse_dns.csv` and writes the domains that are not in `base_reverse_dns_map.csv` or `known_unknown_base_reverse_dns.txt` to `unknown_base_reverse_dns.csv`. This is useful for identifying potential additional domains to contribute to `base_reverse_dns_map.csv` and `known_unknown_base_reverse_dns.txt`.
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
488
parsedmarc/resources/maps/find_bad_utf8.py
Executable file
488
parsedmarc/resources/maps/find_bad_utf8.py
Executable file
@@ -0,0 +1,488 @@
|
||||
#!/usr/bin/env python3
|
||||
|
||||
|
||||
import argparse
|
||||
import codecs
|
||||
import os
|
||||
import sys
|
||||
import shutil
|
||||
from typing import List, Tuple
|
||||
|
||||
"""
|
||||
Locates and optionally corrects bad UTF-8 bytes in a file.
|
||||
Generated by GPT-5 Use at your own risk.
|
||||
"""
|
||||
|
||||
# -------------------------
|
||||
# UTF-8 scanning
|
||||
# -------------------------
|
||||
|
||||
|
||||
def scan_line_for_utf8_errors(
|
||||
line_bytes: bytes, line_no: int, base_offset: int, context: int
|
||||
):
|
||||
"""
|
||||
Scan one line of raw bytes for UTF-8 decoding errors.
|
||||
Returns a list of dicts describing each error.
|
||||
"""
|
||||
pos = 0
|
||||
results = []
|
||||
while pos < len(line_bytes):
|
||||
dec = codecs.getincrementaldecoder("utf-8")("strict")
|
||||
try:
|
||||
dec.decode(line_bytes[pos:], final=True)
|
||||
break
|
||||
except UnicodeDecodeError as e:
|
||||
rel_index = e.start
|
||||
abs_index_in_line = pos + rel_index
|
||||
abs_offset = base_offset + abs_index_in_line
|
||||
|
||||
start_ctx = max(0, abs_index_in_line - context)
|
||||
end_ctx = min(len(line_bytes), abs_index_in_line + 1 + context)
|
||||
ctx_bytes = line_bytes[start_ctx:end_ctx]
|
||||
bad_byte = line_bytes[abs_index_in_line : abs_index_in_line + 1]
|
||||
col = abs_index_in_line + 1 # 1-based byte column
|
||||
|
||||
results.append(
|
||||
{
|
||||
"line": line_no,
|
||||
"column": col,
|
||||
"abs_offset": abs_offset,
|
||||
"bad_byte_hex": bad_byte.hex(),
|
||||
"context_hex": ctx_bytes.hex(),
|
||||
"context_preview": ctx_bytes.decode("utf-8", errors="replace"),
|
||||
}
|
||||
)
|
||||
# Move past the offending byte and continue
|
||||
pos = abs_index_in_line + 1
|
||||
return results
|
||||
|
||||
|
||||
def scan_file_for_utf8_errors(path: str, context: int, limit: int):
|
||||
errors_found = 0
|
||||
limit_val = limit if limit != 0 else float("inf")
|
||||
|
||||
with open(path, "rb") as f:
|
||||
total_offset = 0
|
||||
line_no = 0
|
||||
while True:
|
||||
line = f.readline()
|
||||
if not line:
|
||||
break
|
||||
line_no += 1
|
||||
results = scan_line_for_utf8_errors(line, line_no, total_offset, context)
|
||||
for r in results:
|
||||
errors_found += 1
|
||||
print(
|
||||
f"[ERROR {errors_found}] Line {r['line']}, Column {r['column']}, "
|
||||
f"Absolute byte offset {r['abs_offset']}"
|
||||
)
|
||||
print(f" Bad byte: 0x{r['bad_byte_hex']}")
|
||||
print(f" Context (hex): {r['context_hex']}")
|
||||
print(f" Context (preview): {r['context_preview']}")
|
||||
print()
|
||||
if errors_found >= limit_val:
|
||||
print(f"Reached limit of {limit} errors. Stopping.")
|
||||
return errors_found
|
||||
total_offset += len(line)
|
||||
|
||||
if errors_found == 0:
|
||||
print("No invalid UTF-8 bytes found. 🎉")
|
||||
else:
|
||||
print(f"Found {errors_found} invalid UTF-8 byte(s).")
|
||||
return errors_found
|
||||
|
||||
|
||||
# -------------------------
|
||||
# Whole-file conversion
|
||||
# -------------------------
|
||||
|
||||
|
||||
def detect_encoding_text(path: str) -> Tuple[str, str]:
|
||||
"""
|
||||
Use charset-normalizer to detect file encoding.
|
||||
Return (encoding_name, decoded_text). Falls back to cp1252 if needed.
|
||||
"""
|
||||
try:
|
||||
from charset_normalizer import from_path
|
||||
except ImportError:
|
||||
print(
|
||||
"Please install charset-normalizer: pip install charset-normalizer",
|
||||
file=sys.stderr,
|
||||
)
|
||||
sys.exit(4)
|
||||
|
||||
matches = from_path(path)
|
||||
match = matches.best()
|
||||
if match is None or match.encoding is None:
|
||||
# Fallback heuristic for Western single-byte text
|
||||
with open(path, "rb") as fb:
|
||||
data = fb.read()
|
||||
try:
|
||||
return "cp1252", data.decode("cp1252", errors="strict")
|
||||
except UnicodeDecodeError:
|
||||
print("Unable to detect encoding reliably.", file=sys.stderr)
|
||||
sys.exit(5)
|
||||
|
||||
return match.encoding, str(match)
|
||||
|
||||
|
||||
def convert_to_utf8(src_path: str, out_path: str, src_encoding: str = None) -> str:
|
||||
"""
|
||||
Convert an entire file to UTF-8 (re-decoding everything).
|
||||
If src_encoding is provided, use it; else auto-detect.
|
||||
Returns the encoding actually used.
|
||||
"""
|
||||
if src_encoding:
|
||||
with open(src_path, "rb") as fb:
|
||||
data = fb.read()
|
||||
try:
|
||||
text = data.decode(src_encoding, errors="strict")
|
||||
except LookupError:
|
||||
print(f"Unknown encoding: {src_encoding}", file=sys.stderr)
|
||||
sys.exit(6)
|
||||
except UnicodeDecodeError as e:
|
||||
print(f"Decoding failed with {src_encoding}: {e}", file=sys.stderr)
|
||||
sys.exit(7)
|
||||
used = src_encoding
|
||||
else:
|
||||
used, text = detect_encoding_text(src_path)
|
||||
|
||||
with open(out_path, "w", encoding="utf-8", newline="") as fw:
|
||||
fw.write(text)
|
||||
return used
|
||||
|
||||
|
||||
def verify_utf8_file(path: str) -> Tuple[bool, str]:
|
||||
try:
|
||||
with open(path, "rb") as fb:
|
||||
fb.read().decode("utf-8", errors="strict")
|
||||
return True, ""
|
||||
except UnicodeDecodeError as e:
|
||||
return False, str(e)
|
||||
|
||||
|
||||
# -------------------------
|
||||
# Targeted single-byte fixer
|
||||
# -------------------------
|
||||
|
||||
|
||||
def iter_lines_with_offsets(b: bytes):
|
||||
"""
|
||||
Yield (line_bytes, line_start_abs_offset). Preserves LF/CRLF/CR in bytes.
|
||||
"""
|
||||
start = 0
|
||||
for i, byte in enumerate(b):
|
||||
if byte == 0x0A: # LF
|
||||
yield b[start : i + 1], start
|
||||
start = i + 1
|
||||
if start < len(b):
|
||||
yield b[start:], start
|
||||
|
||||
|
||||
def detect_probable_fallbacks() -> List[str]:
|
||||
# Good defaults for Western/Portuguese text
|
||||
return ["cp1252", "iso-8859-1", "iso-8859-15"]
|
||||
|
||||
|
||||
def repair_mixed_utf8_line(line: bytes, base_offset: int, fallback_chain: List[str]):
|
||||
"""
|
||||
Strictly validate UTF-8 and fix *only* the exact offending byte when an error occurs.
|
||||
This avoids touching adjacent valid UTF-8 (prevents mojibake like 'é').
|
||||
"""
|
||||
out_fragments: List[str] = []
|
||||
fixes = []
|
||||
pos = 0
|
||||
n = len(line)
|
||||
|
||||
while pos < n:
|
||||
dec = codecs.getincrementaldecoder("utf-8")("strict")
|
||||
try:
|
||||
s = dec.decode(line[pos:], final=True)
|
||||
out_fragments.append(s)
|
||||
break
|
||||
except UnicodeDecodeError as e:
|
||||
# Append the valid prefix before the error
|
||||
if e.start > 0:
|
||||
out_fragments.append(
|
||||
line[pos : pos + e.start].decode("utf-8", errors="strict")
|
||||
)
|
||||
|
||||
bad_index = pos + e.start # absolute index in 'line'
|
||||
bad_slice = line[bad_index : bad_index + 1] # FIX EXACTLY ONE BYTE
|
||||
|
||||
# Decode that single byte using the first working fallback
|
||||
decoded = None
|
||||
used_enc = None
|
||||
for enc in fallback_chain:
|
||||
try:
|
||||
decoded = bad_slice.decode(enc, errors="strict")
|
||||
used_enc = enc
|
||||
break
|
||||
except Exception:
|
||||
continue
|
||||
if decoded is None:
|
||||
# latin-1 always succeeds (byte->same code point)
|
||||
decoded = bad_slice.decode("latin-1")
|
||||
used_enc = "latin-1 (fallback)"
|
||||
|
||||
out_fragments.append(decoded)
|
||||
|
||||
# Log the fix
|
||||
col_1based = bad_index + 1 # byte-based column
|
||||
fixes.append(
|
||||
{
|
||||
"line_base_offset": base_offset,
|
||||
"line": None, # caller fills line number
|
||||
"column": col_1based,
|
||||
"abs_offset": base_offset + bad_index,
|
||||
"bad_bytes_hex": bad_slice.hex(),
|
||||
"used_encoding": used_enc,
|
||||
"replacement_preview": decoded,
|
||||
}
|
||||
)
|
||||
|
||||
# Advance exactly one byte past the offending byte and continue
|
||||
pos = bad_index + 1
|
||||
|
||||
return "".join(out_fragments), fixes
|
||||
|
||||
|
||||
def targeted_fix_to_utf8(
|
||||
src_path: str,
|
||||
out_path: str,
|
||||
fallback_chain: List[str],
|
||||
dry_run: bool,
|
||||
max_fixes: int,
|
||||
):
|
||||
with open(src_path, "rb") as fb:
|
||||
data = fb.read()
|
||||
|
||||
total_fixes = 0
|
||||
repaired_lines: List[str] = []
|
||||
line_no = 0
|
||||
max_val = max_fixes if max_fixes != 0 else float("inf")
|
||||
|
||||
for line_bytes, base_offset in iter_lines_with_offsets(data):
|
||||
line_no += 1
|
||||
# Fast path: keep lines that are already valid UTF-8
|
||||
try:
|
||||
repaired_lines.append(line_bytes.decode("utf-8", errors="strict"))
|
||||
continue
|
||||
except UnicodeDecodeError:
|
||||
pass
|
||||
|
||||
fixed_text, fixes = repair_mixed_utf8_line(
|
||||
line_bytes, base_offset, fallback_chain=fallback_chain
|
||||
)
|
||||
for f in fixes:
|
||||
f["line"] = line_no
|
||||
|
||||
repaired_lines.append(fixed_text)
|
||||
|
||||
# Log fixes
|
||||
for f in fixes:
|
||||
total_fixes += 1
|
||||
print(
|
||||
f"[FIX {total_fixes}] Line {f['line']}, Column {f['column']}, Abs offset {f['abs_offset']}"
|
||||
)
|
||||
print(f" Bad bytes: 0x{f['bad_bytes_hex']}")
|
||||
print(f" Used encoding: {f['used_encoding']}")
|
||||
preview = f["replacement_preview"].replace("\r", "\\r").replace("\n", "\\n")
|
||||
if len(preview) > 40:
|
||||
preview = preview[:40] + "…"
|
||||
print(f" Replacement preview: {preview}")
|
||||
print()
|
||||
if total_fixes >= max_val:
|
||||
print(f"Reached max fixes limit ({max_fixes}). Stopping scan.")
|
||||
break
|
||||
if total_fixes >= max_val:
|
||||
break
|
||||
|
||||
if dry_run:
|
||||
print(f"Dry run complete. Detected {total_fixes} fix(es). No file written.")
|
||||
return total_fixes
|
||||
|
||||
# Join and verify result can be encoded to UTF-8
|
||||
repaired_text = "".join(repaired_lines)
|
||||
try:
|
||||
repaired_text.encode("utf-8", errors="strict")
|
||||
except UnicodeEncodeError as e:
|
||||
print(f"Internal error: repaired text not valid UTF-8: {e}", file=sys.stderr)
|
||||
sys.exit(3)
|
||||
|
||||
with open(out_path, "w", encoding="utf-8", newline="") as fw:
|
||||
fw.write(repaired_text)
|
||||
|
||||
print(f"Fixed file written to: {out_path}")
|
||||
print(f"Total fixes applied: {total_fixes}")
|
||||
return total_fixes
|
||||
|
||||
|
||||
# -------------------------
|
||||
# CLI
|
||||
# -------------------------
|
||||
|
||||
|
||||
def main():
|
||||
ap = argparse.ArgumentParser(
|
||||
description=(
|
||||
"Scan for invalid UTF-8; optionally convert whole file or fix only invalid bytes.\n\n"
|
||||
"By default, --convert and --fix **edit the input file in place** and create a backup "
|
||||
"named '<input>.bak' before writing. If you pass --output, the original file is left "
|
||||
"unchanged and no backup is created. Use --dry-run to preview fixes without writing."
|
||||
),
|
||||
formatter_class=argparse.RawTextHelpFormatter,
|
||||
)
|
||||
ap.add_argument("path", help="Path to the CSV/text file")
|
||||
ap.add_argument(
|
||||
"--context",
|
||||
type=int,
|
||||
default=20,
|
||||
help="Bytes of context to show around errors (default: 20)",
|
||||
)
|
||||
ap.add_argument(
|
||||
"--limit",
|
||||
type=int,
|
||||
default=100,
|
||||
help="Max errors to report during scan (0 = unlimited)",
|
||||
)
|
||||
ap.add_argument(
|
||||
"--skip-scan", action="store_true", help="Skip initial scan for speed"
|
||||
)
|
||||
|
||||
# Whole-file convert
|
||||
ap.add_argument(
|
||||
"--convert",
|
||||
action="store_true",
|
||||
help="Convert entire file to UTF-8 using auto/forced encoding "
|
||||
"(in-place by default; creates '<input>.bak').",
|
||||
)
|
||||
ap.add_argument(
|
||||
"--encoding",
|
||||
help="Force source encoding for --convert or first fallback for --fix",
|
||||
)
|
||||
ap.add_argument(
|
||||
"--output",
|
||||
help="Write to this path instead of in-place (no .bak is created in that case)",
|
||||
)
|
||||
|
||||
# Targeted fix
|
||||
ap.add_argument(
|
||||
"--fix",
|
||||
action="store_true",
|
||||
help="Fix only invalid byte(s) via fallback encodings "
|
||||
"(in-place by default; creates '<input>.bak').",
|
||||
)
|
||||
ap.add_argument(
|
||||
"--fallbacks",
|
||||
help="Comma-separated fallback encodings (default: cp1252,iso-8859-1,iso-8859-15)",
|
||||
)
|
||||
ap.add_argument(
|
||||
"--dry-run",
|
||||
action="store_true",
|
||||
help="(fix) Print fixes but do not write or create a .bak",
|
||||
)
|
||||
ap.add_argument(
|
||||
"--max-fixes",
|
||||
type=int,
|
||||
default=0,
|
||||
help="(fix) Stop after N fixes (0 = unlimited)",
|
||||
)
|
||||
|
||||
args = ap.parse_args()
|
||||
path = args.path
|
||||
|
||||
if not os.path.isfile(path):
|
||||
print(f"File not found: {path}", file=sys.stderr)
|
||||
sys.exit(2)
|
||||
|
||||
# Optional scan first
|
||||
if not args.skip_scan:
|
||||
scan_file_for_utf8_errors(path, context=args.context, limit=args.limit)
|
||||
|
||||
# Mode selection guards
|
||||
if args.convert and args.fix:
|
||||
print("Choose either --convert or --fix (not both).", file=sys.stderr)
|
||||
sys.exit(9)
|
||||
if not args.convert and not args.fix and args.skip_scan:
|
||||
print("No action selected (use --convert or --fix).")
|
||||
return
|
||||
if not args.convert and not args.fix:
|
||||
# User only wanted a scan
|
||||
return
|
||||
|
||||
# Determine output path and backup behavior
|
||||
# In-place by default: create '<input>.bak' before overwriting.
|
||||
if args.output:
|
||||
out_path = args.output
|
||||
in_place = False
|
||||
else:
|
||||
out_path = path
|
||||
in_place = True
|
||||
|
||||
# CONVERT mode
|
||||
if args.convert:
|
||||
print("\n[CONVERT MODE] Converting file to UTF-8...")
|
||||
if in_place:
|
||||
# Create backup before overwriting original
|
||||
backup_path = path + ".bak"
|
||||
shutil.copy2(path, backup_path)
|
||||
print(f"Backup created: {backup_path}")
|
||||
used = convert_to_utf8(path, out_path, src_encoding=args.encoding)
|
||||
print(f"Source encoding used: {used}")
|
||||
print(f"Saved UTF-8 file as: {out_path}")
|
||||
ok, err = verify_utf8_file(out_path)
|
||||
if ok:
|
||||
print("Verification: output is valid UTF-8 ✅")
|
||||
else:
|
||||
print(f"Verification failed: {err}")
|
||||
sys.exit(8)
|
||||
return
|
||||
|
||||
# FIX mode (targeted, single-byte)
|
||||
if args.fix:
|
||||
print("\n[FIX MODE] Fixing only invalid bytes to UTF-8...")
|
||||
if args.dry_run:
|
||||
# Dry-run: never write or create backup
|
||||
out_path_effective = os.devnull
|
||||
in_place_effective = False
|
||||
else:
|
||||
out_path_effective = out_path
|
||||
in_place_effective = in_place
|
||||
|
||||
# Build fallback chain (if --encoding provided, try it first)
|
||||
if args.fallbacks:
|
||||
fallback_chain = [e.strip() for e in args.fallbacks.split(",") if e.strip()]
|
||||
else:
|
||||
fallback_chain = detect_probable_fallbacks()
|
||||
if args.encoding and args.encoding not in fallback_chain:
|
||||
fallback_chain = [args.encoding] + fallback_chain
|
||||
|
||||
if in_place_effective:
|
||||
# Create backup before overwriting original (only when actually writing)
|
||||
backup_path = path + ".bak"
|
||||
shutil.copy2(path, backup_path)
|
||||
print(f"Backup created: {backup_path}")
|
||||
|
||||
fix_count = targeted_fix_to_utf8(
|
||||
path,
|
||||
out_path_effective,
|
||||
fallback_chain=fallback_chain,
|
||||
dry_run=args.dry_run,
|
||||
max_fixes=args.max_fixes,
|
||||
)
|
||||
|
||||
if not args.dry_run:
|
||||
ok, err = verify_utf8_file(out_path_effective)
|
||||
if ok:
|
||||
print("Verification: output is valid UTF-8 ✅")
|
||||
print(f"Fix mode completed — {fix_count} byte(s) corrected.")
|
||||
else:
|
||||
print(f"Verification failed: {err}")
|
||||
sys.exit(8)
|
||||
return
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
@@ -9,6 +9,7 @@ def _main():
|
||||
input_csv_file_path = "base_reverse_dns.csv"
|
||||
base_reverse_dns_map_file_path = "base_reverse_dns_map.csv"
|
||||
known_unknown_list_file_path = "known_unknown_base_reverse_dns.txt"
|
||||
psl_overrides_file_path = "psl_overrides.txt"
|
||||
output_csv_file_path = "unknown_base_reverse_dns.csv"
|
||||
|
||||
csv_headers = ["source_name", "message_count"]
|
||||
@@ -23,6 +24,7 @@ def _main():
|
||||
input_csv_file_path,
|
||||
base_reverse_dns_map_file_path,
|
||||
known_unknown_list_file_path,
|
||||
psl_overrides_file_path,
|
||||
]:
|
||||
if not os.path.exists(p):
|
||||
logger.error(f"{p} does not exist")
|
||||
@@ -38,6 +40,17 @@ def _main():
|
||||
)
|
||||
else:
|
||||
known_unknown_domains.append(domain)
|
||||
logger.info(f"Loading {psl_overrides_file_path}")
|
||||
psl_overrides = []
|
||||
with open(psl_overrides_file_path) as f:
|
||||
for line in f.readlines():
|
||||
domain = line.lower().strip()
|
||||
if domain in psl_overrides:
|
||||
logger.warning(
|
||||
f"{domain} is in {psl_overrides_file_path} multiple times"
|
||||
)
|
||||
else:
|
||||
psl_overrides.append(domain)
|
||||
logger.info(f"Loading {base_reverse_dns_map_file_path}")
|
||||
known_domains = []
|
||||
with open(base_reverse_dns_map_file_path) as f:
|
||||
@@ -52,13 +65,20 @@ def _main():
|
||||
if domain in known_unknown_domains and known_domains:
|
||||
pass
|
||||
logger.warning(
|
||||
f"{domain} is in {known_unknown_list_file_path} and {base_reverse_dns_map_file_path}"
|
||||
f"{domain} is in {known_unknown_list_file_path} and \
|
||||
{base_reverse_dns_map_file_path}"
|
||||
)
|
||||
|
||||
logger.info(f"Checking domains against {base_reverse_dns_map_file_path}")
|
||||
with open(input_csv_file_path) as f:
|
||||
for row in csv.DictReader(f):
|
||||
domain = row["source_name"].lower().strip()
|
||||
if domain == "":
|
||||
continue
|
||||
for psl_domain in psl_overrides:
|
||||
if domain.endswith(psl_domain):
|
||||
domain = psl_domain
|
||||
break
|
||||
if domain not in known_domains and domain not in known_unknown_domains:
|
||||
logger.info(f"New unknown domain found: {domain}")
|
||||
output_rows.append(row)
|
||||
|
||||
@@ -1,125 +1,309 @@
|
||||
185.in-addr.arpa
|
||||
190.in-addr.arpa
|
||||
200.in-addr.arpa
|
||||
9services.com
|
||||
a7e.ru
|
||||
a94434500-blog.com
|
||||
abv-10.top
|
||||
adlucrumnewsletter.com
|
||||
admin.corpivensa.gob.ve
|
||||
advantageiq.com
|
||||
advrider.ro
|
||||
aerospacevitro.us.com
|
||||
agenturserver.de
|
||||
aghories.com
|
||||
ai270.net
|
||||
albagroup-eg.com
|
||||
alchemy.net
|
||||
anchorfundhub.com
|
||||
anglishment.com
|
||||
anteldata.net.uy
|
||||
antis.edu
|
||||
antonaoll.com
|
||||
anviklass.org
|
||||
anwrgrp.lat
|
||||
aosau.net
|
||||
arandomserver.com
|
||||
aransk.ru
|
||||
ardcs.cn
|
||||
as29550.net
|
||||
asmecam.it
|
||||
aurelienvos.com
|
||||
automatech.lat
|
||||
avistaadvantage.com
|
||||
b8sales.com
|
||||
bearandbullmarketnews.com
|
||||
bestinvestingtime.com
|
||||
biocorp.com
|
||||
bisno1.co.jp
|
||||
bitter-echo.com
|
||||
blguss.com
|
||||
bluenet.ch
|
||||
bluhosting.com
|
||||
bodiax.pp.ua
|
||||
bost-law.com
|
||||
brainity.com
|
||||
brnonet.cz
|
||||
brushinglegal.de
|
||||
brw.net
|
||||
budgeteasehub.com
|
||||
buoytoys.com
|
||||
cashflowmasterypro.com
|
||||
cavabeen.com
|
||||
cbti.net
|
||||
checkpox.fun
|
||||
chegouseuvlache.org
|
||||
christus.mx
|
||||
ckaik.cn
|
||||
cloud-edm.com
|
||||
cloudaccess.net
|
||||
cloudflare-email.org
|
||||
cloudhosting.rs
|
||||
cloudlogin.co
|
||||
cnode.io
|
||||
code-it.net
|
||||
colombiaceropapel.org
|
||||
commerceinsurance.com
|
||||
coolblaze.com
|
||||
coowo.com
|
||||
corpemail.net
|
||||
cp2-myorderbox.com
|
||||
cps.com.ar
|
||||
ctla.co.kr
|
||||
dastans.ru
|
||||
datahost36.de
|
||||
descarca-counter-strike.net
|
||||
detrot.xyz
|
||||
digi.net.my
|
||||
dinofelis.cn
|
||||
diwkyncbi.top
|
||||
dkginternet.com
|
||||
dns-oid.com
|
||||
domconfig.com
|
||||
doorsrv.com
|
||||
dreampox.fun
|
||||
dreamtechmedia.com
|
||||
ds.network
|
||||
dvj.theworkpc.com
|
||||
dwlcka.com
|
||||
dyntcorp.com
|
||||
economiceagles.com
|
||||
egosimail.com
|
||||
emailgids.net
|
||||
emailperegrine.com
|
||||
entretothom.net
|
||||
epsilon-group.com
|
||||
erestaff.com
|
||||
example.com
|
||||
exposervers.com-new
|
||||
eyecandyhosting.xyz
|
||||
fetscorp.shop
|
||||
fin-crime.com
|
||||
financeaimpoint.com
|
||||
financeupward.com
|
||||
flex-video.bnr.la
|
||||
formicidaehunt.net
|
||||
fosterheap.com
|
||||
frontiernet.net
|
||||
gendns.com
|
||||
getthatroi.com
|
||||
gigidea.net
|
||||
giize.com
|
||||
ginous.eu.com
|
||||
gist-th.com
|
||||
gophermedia.com
|
||||
gqlists.us.com
|
||||
gratzl.de
|
||||
greatestworldnews.com
|
||||
greennutritioncare.com
|
||||
h-serv.co.uk
|
||||
hgnbroken.us.com
|
||||
hosting1337.com
|
||||
hostinglotus.cloud
|
||||
hostingmichigan.com
|
||||
hostiran.name
|
||||
hostmnl.com
|
||||
hostname.localhost
|
||||
hostnetwork.com
|
||||
hosts.net.nz
|
||||
hostwhitelabel.com
|
||||
hpms1.jp
|
||||
hypericine.com
|
||||
i-mecca.net
|
||||
iaasdns.com
|
||||
iam.net.ma
|
||||
iconmarketingguy.com
|
||||
idcfcloud.net
|
||||
idealconcept.live
|
||||
igppevents.org.uk
|
||||
imjtmn.cn
|
||||
immenzaces.com
|
||||
inshaaegypt.com
|
||||
ip-147-135-108.us
|
||||
ip-178-33-109.eu
|
||||
ip-ptr.tech
|
||||
itsidc.com
|
||||
itwebs.com
|
||||
ivol.co
|
||||
jalanet.co.id
|
||||
jimishare.com
|
||||
jumanra.org
|
||||
kahlaa.com
|
||||
kbronet.com.tw
|
||||
kdnursing.org
|
||||
kihy.theworkpc.com
|
||||
kitchenaildbd.com
|
||||
layerdns.cloud
|
||||
legenditds.com
|
||||
lighthouse-media.com
|
||||
listertermoformadoa.com
|
||||
llsend.com
|
||||
lohkal.com
|
||||
lonestarmm.net
|
||||
longwoodmgmt.com
|
||||
lwl-puehringer.at
|
||||
lynx.net.lb
|
||||
magnetmail.net
|
||||
mail-fire.com
|
||||
manhattanbulletpoint.com
|
||||
manpowerservices.com
|
||||
marketmysterycode.com
|
||||
marketwizardspro.com
|
||||
masterclassjournal.com
|
||||
matroguel.cam
|
||||
maximpactipo.com
|
||||
mechanicalwalk.store
|
||||
mediavobis.com
|
||||
misorpresa.com
|
||||
moderntradingnews.com
|
||||
moonjaws.com
|
||||
morningnewscatcher.com
|
||||
motion4ever.net
|
||||
mschosting.com
|
||||
msdp1.com
|
||||
mspnet.pro
|
||||
mts-nn.ru
|
||||
mxserver.ro
|
||||
mxthunder.net
|
||||
myrewards.net
|
||||
mysagestore.com
|
||||
mysecurewebserver.com
|
||||
myvps.jp
|
||||
name.tools
|
||||
nanshenqfurniture.com
|
||||
nask.pl
|
||||
ncport.ru
|
||||
ncsdi.ws
|
||||
nebdig.com
|
||||
neovet-base.ru
|
||||
netbri.com
|
||||
netkl.org
|
||||
newinvestingguide.com
|
||||
newwallstreetcode.com
|
||||
ngvcv.cn
|
||||
nic.name
|
||||
nidix.net
|
||||
nlscanme.com
|
||||
nmeuh.cn
|
||||
noisndametal.com
|
||||
nwo.giize.com
|
||||
offerslatedeals.com
|
||||
office365.us
|
||||
ogicom.net
|
||||
omegabrasil.inf.br
|
||||
onnet21.com
|
||||
oppt-ac.fit
|
||||
orbitel.net.co
|
||||
ovaltinalization.co
|
||||
overta.ru
|
||||
panaltyspot.space
|
||||
passionatesmiles.com
|
||||
perimetercenter.net
|
||||
permanentscreen.com
|
||||
phdns3.es
|
||||
planethoster.net
|
||||
plesk.page
|
||||
pmnhost.net
|
||||
pokupki5.ru
|
||||
popiup.com
|
||||
ports.net
|
||||
prima.com.ar
|
||||
prima.net.ar
|
||||
prohealthmotion.com
|
||||
proudserver.com
|
||||
psnm.ru
|
||||
pvcwindowsprices.live
|
||||
qontenciplc.autos
|
||||
quatthonggiotico.com
|
||||
rapidns.com
|
||||
raxa.host
|
||||
reliablepanel.com
|
||||
rgb365.eu
|
||||
riddlecamera.net
|
||||
rwdhosting.ca
|
||||
s500host.com
|
||||
sahacker-2020.com
|
||||
samsales.site
|
||||
saransk.ru
|
||||
satirogluet.com
|
||||
securednshost.com
|
||||
seaspraymta3.net
|
||||
secorp.mx
|
||||
securen.net
|
||||
securerelay.in
|
||||
securev.net
|
||||
servershost.biz
|
||||
silvestrejaguar.sbs
|
||||
silvestreonca.sbs
|
||||
siriuscloud.jp
|
||||
sisglobalresearch.com
|
||||
smallvillages.com
|
||||
smartape-vps.com
|
||||
solusoftware.com
|
||||
spiritualtechnologies.io
|
||||
sprout.org
|
||||
stableserver.net
|
||||
stockepictigers.com
|
||||
stockexchangejournal.com
|
||||
suksangroup.com
|
||||
sysop4.com
|
||||
system.eu.com
|
||||
szhongbing.com
|
||||
t-jon.com
|
||||
tecnoxia.net
|
||||
tel-xyz.fit
|
||||
tenkids.net
|
||||
terminavalley.com
|
||||
thaicloudsolutions.com
|
||||
thaimonster.com
|
||||
thepushcase.com
|
||||
totaal.net
|
||||
traderlearningcenter.com
|
||||
tullostrucking.com
|
||||
ultragate.com
|
||||
unite.services
|
||||
urawasl.com
|
||||
us.servername.us
|
||||
varvia.de
|
||||
vendimetry.com
|
||||
vibrantwellnesscorp.com
|
||||
viviotech.us
|
||||
vlflgl.com
|
||||
volganet.ru
|
||||
wallstreetsgossip.com
|
||||
wealthexpertisepro.com
|
||||
web-login.eu
|
||||
weblinkinternational.com
|
||||
webnox.io
|
||||
welllivinghive.com
|
||||
wisdomhard.com
|
||||
wisewealthcircle.com
|
||||
wsiph2.com
|
||||
xnt.mx
|
||||
xpnuf.cn
|
||||
xsfati.us.com
|
||||
xspmail.jp
|
||||
yourciviccompass.com
|
||||
yourinvestworkbook.com
|
||||
yoursitesecure.net
|
||||
zerowebhosting.net
|
||||
znlc.jp
|
||||
ztomy.com
|
||||
|
||||
6
parsedmarc/resources/maps/psl_overrides.txt
Normal file
6
parsedmarc/resources/maps/psl_overrides.txt
Normal file
@@ -0,0 +1,6 @@
|
||||
akura.ne.jp
|
||||
amazonaws.com
|
||||
cloudaccess.net
|
||||
h-serv.co.uk
|
||||
linode.com
|
||||
plesk.page
|
||||
59
parsedmarc/resources/maps/sortmaps.py
Executable file
59
parsedmarc/resources/maps/sortmaps.py
Executable file
@@ -0,0 +1,59 @@
|
||||
#!/usr/bin/env python3
|
||||
|
||||
import os
|
||||
import csv
|
||||
|
||||
maps_dir = os.path.join(".")
|
||||
map_files = ["base_reverse_dns_map.csv"]
|
||||
list_files = ["known_unknown_base_reverse_dns.txt", "psl_overrides.txt"]
|
||||
|
||||
|
||||
def sort_csv(filepath, column=0):
|
||||
with open(filepath, mode="r", newline="") as infile:
|
||||
reader = csv.reader(infile)
|
||||
header = next(reader)
|
||||
sorted_rows = sorted(reader, key=lambda row: row[column])
|
||||
existing_values = []
|
||||
for row in sorted_rows:
|
||||
if row[column] in existing_values:
|
||||
print(f"Warning: {row[column]} is in {filepath} multiple times")
|
||||
|
||||
with open(filepath, mode="w", newline="\n") as outfile:
|
||||
writer = csv.writer(outfile)
|
||||
writer.writerow(header)
|
||||
writer.writerows(sorted_rows)
|
||||
|
||||
|
||||
def sort_list_file(
|
||||
filepath,
|
||||
lowercase=True,
|
||||
strip=True,
|
||||
deduplicate=True,
|
||||
remove_blank_lines=True,
|
||||
ending_newline=True,
|
||||
newline="\n",
|
||||
):
|
||||
with open(filepath, mode="r", newline=newline) as infile:
|
||||
lines = infile.readlines()
|
||||
for i in range(len(lines)):
|
||||
if lowercase:
|
||||
lines[i] = lines[i].lower()
|
||||
if strip:
|
||||
lines[i] = lines[i].strip()
|
||||
if deduplicate:
|
||||
lines = list(set(lines))
|
||||
if remove_blank_lines:
|
||||
while "" in lines:
|
||||
lines.remove("")
|
||||
lines = sorted(lines)
|
||||
if ending_newline:
|
||||
if lines[-1] != "":
|
||||
lines.append("")
|
||||
with open(filepath, mode="w", newline=newline) as outfile:
|
||||
outfile.write("\n".join(lines))
|
||||
|
||||
|
||||
for csv_file in map_files:
|
||||
sort_csv(os.path.join(maps_dir, csv_file))
|
||||
for list_file in list_files:
|
||||
sort_list_file(os.path.join(maps_dir, list_file))
|
||||
@@ -5,7 +5,7 @@ import json
|
||||
import urllib3
|
||||
import requests
|
||||
|
||||
from parsedmarc import __version__
|
||||
from parsedmarc.constants import USER_AGENT
|
||||
from parsedmarc.log import logger
|
||||
from parsedmarc.utils import human_timestamp_to_unix_timestamp
|
||||
|
||||
@@ -51,7 +51,7 @@ class HECClient(object):
|
||||
self._common_data = dict(host=self.host, source=self.source, index=self.index)
|
||||
|
||||
self.session.headers = {
|
||||
"User-Agent": "parsedmarc/{0}".format(__version__),
|
||||
"User-Agent": USER_AGENT,
|
||||
"Authorization": "Splunk {0}".format(self.access_token),
|
||||
}
|
||||
|
||||
|
||||
@@ -37,7 +37,7 @@ import requests
|
||||
from parsedmarc.log import logger
|
||||
import parsedmarc.resources.dbip
|
||||
import parsedmarc.resources.maps
|
||||
|
||||
from parsedmarc.constants import USER_AGENT
|
||||
|
||||
parenthesis_regex = re.compile(r"\s*\(.*\)\s*")
|
||||
|
||||
@@ -345,7 +345,8 @@ def get_service_from_reverse_dns_base_domain(
|
||||
if not (offline or always_use_local_file) and len(reverse_dns_map) == 0:
|
||||
try:
|
||||
logger.debug(f"Trying to fetch reverse DNS map from {url}...")
|
||||
response = requests.get(url)
|
||||
headers = {"User-Agent": USER_AGENT}
|
||||
response = requests.get(url, headers=headers)
|
||||
response.raise_for_status()
|
||||
csv_file.write(response.text)
|
||||
csv_file.seek(0)
|
||||
@@ -355,6 +356,7 @@ def get_service_from_reverse_dns_base_domain(
|
||||
except Exception:
|
||||
logger.warning("Not a valid CSV file")
|
||||
csv_file.seek(0)
|
||||
logging.debug("Response body:")
|
||||
logger.debug(csv_file.read())
|
||||
|
||||
if len(reverse_dns_map) == 0:
|
||||
|
||||
@@ -1,6 +1,7 @@
|
||||
import requests
|
||||
|
||||
from parsedmarc import logger
|
||||
from parsedmarc.constants import USER_AGENT
|
||||
|
||||
|
||||
class WebhookClient(object):
|
||||
@@ -21,7 +22,7 @@ class WebhookClient(object):
|
||||
self.timeout = timeout
|
||||
self.session = requests.Session()
|
||||
self.session.headers = {
|
||||
"User-Agent": "parsedmarc",
|
||||
"User-Agent": USER_AGENT,
|
||||
"Content-Type": "application/json",
|
||||
}
|
||||
|
||||
|
||||
@@ -76,9 +76,20 @@ parsedmarc = "parsedmarc.cli:_main"
|
||||
Homepage = "https://domainaware.github.io/parsedmarc"
|
||||
|
||||
[tool.hatch.version]
|
||||
path = "parsedmarc/__init__.py"
|
||||
path = "parsedmarc/constants.py"
|
||||
|
||||
[tool.hatch.build.targets.sdist]
|
||||
include = [
|
||||
"/parsedmarc",
|
||||
]
|
||||
|
||||
[tool.hatch.build]
|
||||
exclude = [
|
||||
"base_reverse_dns.csv",
|
||||
"find_bad_utf8.py",
|
||||
"find_unknown_base_reverse_dns.py",
|
||||
"unknown_base_reverse_dns.csv",
|
||||
"sortmaps.py",
|
||||
"README.md",
|
||||
"*.bak"
|
||||
]
|
||||
|
||||
25
sortmaps.py
25
sortmaps.py
@@ -1,25 +0,0 @@
|
||||
#!/usr/bin/env python3
|
||||
|
||||
import os
|
||||
import glob
|
||||
import csv
|
||||
|
||||
|
||||
maps_dir = os.path.join("parsedmarc", "resources", "maps")
|
||||
csv_files = glob.glob(os.path.join(maps_dir, "*.csv"))
|
||||
|
||||
|
||||
def sort_csv(filepath, column=0):
|
||||
with open(filepath, mode="r", newline="") as infile:
|
||||
reader = csv.reader(infile)
|
||||
header = next(reader)
|
||||
sorted_rows = sorted(reader, key=lambda row: row[column])
|
||||
|
||||
with open(filepath, mode="w", newline="\n") as outfile:
|
||||
writer = csv.writer(outfile)
|
||||
writer.writerow(header)
|
||||
writer.writerows(sorted_rows)
|
||||
|
||||
|
||||
for csv_file in csv_files:
|
||||
sort_csv(csv_file)
|
||||
107
splunk/smtp_tls_dashboard.xml
Normal file
107
splunk/smtp_tls_dashboard.xml
Normal file
@@ -0,0 +1,107 @@
|
||||
<form version="1.1" theme="dark">
|
||||
<label>SMTP TLS Reporting</label>
|
||||
<fieldset submitButton="false" autoRun="true">
|
||||
<input type="time" token="time">
|
||||
<label></label>
|
||||
<default>
|
||||
<earliest>-7d@h</earliest>
|
||||
<latest>now</latest>
|
||||
</default>
|
||||
</input>
|
||||
<input type="text" token="organization_name" searchWhenChanged="true">
|
||||
<label>Organization name</label>
|
||||
<default>*</default>
|
||||
<initialValue>*</initialValue>
|
||||
</input>
|
||||
<input type="text" token="policy_domain">
|
||||
<label>Policy domain</label>
|
||||
<default>*</default>
|
||||
<initialValue>*</initialValue>
|
||||
</input>
|
||||
<input type="dropdown" token="policy_type" searchWhenChanged="true">
|
||||
<label>Policy type</label>
|
||||
<choice value="*">Any</choice>
|
||||
<choice value="tlsa">tlsa</choice>
|
||||
<choice value="sts">sts</choice>
|
||||
<choice value="no-policy-found">no-policy-found</choice>
|
||||
<default>*</default>
|
||||
<initialValue>*</initialValue>
|
||||
</input>
|
||||
</fieldset>
|
||||
<row>
|
||||
<panel>
|
||||
<title>Reporting organizations</title>
|
||||
<table>
|
||||
<search>
|
||||
<query>index=email sourcetype=smtp:tls organization_name=$organization_name$ policies{}.policy_domain=$policy_domain$
|
||||
| rename policies{}.policy_domain as policy_domain
|
||||
| rename policies{}.policy_type as policy_type
|
||||
| rename policies{}.failed_session_count as failed_sessions
|
||||
| rename policies{}.failure_details{}.failed_session_count as failed_sessions
|
||||
| rename policies{}.successful_session_count as successful_sessions
|
||||
| rename policies{}.failure_details{}.sending_mta_ip as sending_mta_ip
|
||||
| rename policies{}.failure_details{}.receiving_ip as receiving_ip
|
||||
| rename policies{}.failure_details{}.receiving_mx_hostname as receiving_mx_hostname
|
||||
| rename policies{}.failure_details{}.result_type as failure_type
|
||||
| fillnull value=0 failed_sessions
|
||||
| stats sum(failed_sessions) as failed_sessions sum(successful_sessions) as successful_sessions by organization_name
|
||||
| sort -successful_sessions 0</query>
|
||||
<earliest>$time.earliest$</earliest>
|
||||
<latest>$time.latest$</latest>
|
||||
</search>
|
||||
<option name="drilldown">none</option>
|
||||
<option name="refresh.display">progressbar</option>
|
||||
</table>
|
||||
</panel>
|
||||
<panel>
|
||||
<title>Domains</title>
|
||||
<table>
|
||||
<search>
|
||||
<query>index=email sourcetype=smtp:tls organization_name=$organization_name$ policies{}.policy_domain=$policy_domain$
|
||||
| rename policies{}.policy_domain as policy_domain
|
||||
| rename policies{}.policy_type as policy_type
|
||||
| rename policies{}.failed_session_count as failed_sessions
|
||||
| rename policies{}.failure_details{}.failed_session_count as failed_sessions
|
||||
| rename policies{}.successful_session_count as successful_sessions
|
||||
| rename policies{}.failure_details{}.sending_mta_ip as sending_mta_ip
|
||||
| rename policies{}.failure_details{}.receiving_ip as receiving_ip
|
||||
| rename policies{}.failure_details{}.receiving_mx_hostname as receiving_mx_hostname
|
||||
| rename policies{}.failure_details{}.result_type as failure_type
|
||||
| fillnull value=0 failed_sessions
|
||||
| stats sum(failed_sessions) as failed_sessions sum(successful_sessions) as successful_sessions by policy_domain
|
||||
| sort -successful_sessions 0</query>
|
||||
<earliest>$time.earliest$</earliest>
|
||||
<latest>$time.latest$</latest>
|
||||
</search>
|
||||
<option name="drilldown">none</option>
|
||||
<option name="refresh.display">progressbar</option>
|
||||
</table>
|
||||
</panel>
|
||||
</row>
|
||||
<row>
|
||||
<panel>
|
||||
<title>Failure details</title>
|
||||
<table>
|
||||
<search>
|
||||
<query>index=email sourcetype=smtp:tls organization_name=$organization_name$ policies{}.policy_domain=$policy_domain$ policies{}.failure_details{}.result_type=*
|
||||
| rename policies{}.policy_domain as policy_domain
|
||||
| rename policies{}.policy_type as policy_type
|
||||
| rename policies{}.failed_session_count as failed_sessions
|
||||
| rename policies{}.failure_details{}.failed_session_count as failed_sessions
|
||||
| rename policies{}.successful_session_count as successful_sessions
|
||||
| rename policies{}.failure_details{}.sending_mta_ip as sending_mta_ip
|
||||
| rename policies{}.failure_details{}.receiving_ip as receiving_ip
|
||||
| rename policies{}.failure_details{}.receiving_mx_hostname as receiving_mx_hostname
|
||||
| fillnull value=0 failed_sessions
|
||||
| rename policies{}.failure_details{}.result_type as failure_type
|
||||
| table _time organization_name policy_domain policy_type failed_sessions successful_sessions sending_mta_ip receiving_ip receiving_mx_hostname failure_type
|
||||
| sort by -_time 0</query>
|
||||
<earliest>$time.earliest$</earliest>
|
||||
<latest>$time.latest$</latest>
|
||||
</search>
|
||||
<option name="drilldown">none</option>
|
||||
<option name="refresh.display">progressbar</option>
|
||||
</table>
|
||||
</panel>
|
||||
</row>
|
||||
</form>
|
||||
Reference in New Issue
Block a user