Files
parsedmarc/pyproject.toml
T
Fabio Scaccabarozzi 327fcff2b9 Add optional PostgreSQL storage backend (#667)
Adds a PostgreSQL output backend as a lighter-weight alternative to
Elasticsearch/OpenSearch, configured via a [postgresql] section
(host/port/user/password/database or a libpq connection_string). Tables
are created automatically on first run; a Grafana dashboard is included.

- psycopg is an optional extra (pip install parsedmarc[postgresql]); the
  import is guarded so `import parsedmarc` works without it, and
  PostgreSQLClient raises a clear install hint when constructed without
  the driver. Binary wheels aren't available for every platform.
- Schema captures the RFC 9990 / DMARCbis aggregate fields: np, testing,
  discovery_method, generator, xml_namespace, and per-result human_result
  on the DKIM/SPF auth-result tables.
- forensic -> failure naming throughout (table dmarc_failure_report,
  save_failure_report_to_postgresql, dashboard, docs) to match #659.
- Failure-report de-duplication mirrors the Elasticsearch backend exactly:
  arrival date + From + To + Subject (NULL-safe via IS NOT DISTINCT FROM;
  semantic JSONB equality). Aggregate and SMTP-TLS use ON CONFLICT.
- PostgreSQLClient.close() for clean CLI shutdown; comment documents why
  the two timestamp helpers must stay distinct (report dates are local,
  record/SMTP-TLS dates are UTC).
- CLI: config parse raises ConfigurationError on missing
  host/connection_string; wired into _init_output_clients + save loops.
- Tests in tests/test_postgres.py (helpers, mocked-DB save assertions,
  create_tables, connect/error wrapping, dedup, real-sample round trip)
  and tests/test_cli.py (config parse + end-to-end save wiring incl.
  AlreadySaved/PostgreSQLError handling). postgres.py at 99% line
  coverage; only _main's output-client-init retry path is left.

Co-authored-by: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-05-21 09:17:49 -04:00

128 lines
3.8 KiB
TOML

[build-system]
requires = [
"hatchling>=1.27.0",
]
requires_python = ">=3.10,<3.15"
build-backend = "hatchling.build"
[project]
name = "parsedmarc"
dynamic = [
"version",
]
description = "A Python package and CLI for parsing aggregate and forensic DMARC reports"
readme = "README.md"
license = "Apache-2.0"
authors = [
{ name = "Sean Whalen", email = "whalenster@gmail.com" },
]
keywords = [
"DMARC",
"parser",
"reporting",
]
classifiers = [
"Development Status :: 5 - Production/Stable",
"Intended Audience :: Developers",
"Intended Audience :: Information Technology",
"License :: OSI Approved :: Apache Software License",
"Operating System :: OS Independent",
"Programming Language :: Python :: 3"
]
requires-python = ">=3.10"
dependencies = [
"azure-identity>=1.8.0",
"azure-monitor-ingestion>=1.0.0",
"boto3>=1.16.63",
"dateparser>=1.1.1",
"dnspython>=2.0.0",
"elasticsearch-dsl==7.4.0",
"elasticsearch<7.14.0",
"expiringdict>=1.1.4",
"kafka-python-ng>=2.2.2",
"lxml>=4.4.0",
"mailsuite[gmail,msgraph]>=2.0.2",
"maxminddb>=2.0.0",
"opensearch-py>=2.4.2,<=4.0.0",
"publicsuffixlist>=0.10.0",
"pygelf>=0.4.2",
"requests>=2.22.0",
"tqdm>=4.31.1",
"urllib3>=1.25.7",
"xmltodict>=0.12.0",
"PyYAML>=6.0.3"
]
[project.optional-dependencies]
postgresql = [
# Optional output backend. psycopg ships prebuilt binary wheels via the
# [binary] extra, but those wheels don't exist for every platform/arch,
# so PostgreSQL support is opt-in rather than a mandatory dependency.
"psycopg[binary]>=3.1.0",
]
build = [
# Used only by maintainer tooling under parsedmarc/resources/maps/ —
# `collect_domain_info.py --use-search-fallback` falls back to a
# DuckDuckGo search when the homepage fetch returns a bot-block / parked
# / empty page. Optional import; the script runs without it as long as
# the fallback flag isn't passed.
"ddgs>=9.0.0",
"hatch>=1.14.0",
"myst-parser[linkify]",
"nose",
"pytest",
"pytest-cov",
"ruff",
"sphinx",
"sphinx_rtd_theme",
]
[project.scripts]
parsedmarc = "parsedmarc.cli:_main"
[project.urls]
Homepage = "https://domainaware.github.io/parsedmarc"
[tool.hatch.version]
path = "parsedmarc/constants.py"
[tool.hatch.build.targets.sdist]
include = [
"/parsedmarc",
]
[tool.hatch.build]
exclude = [
"base_reverse_dns.csv",
"unknown_base_reverse_dns.csv",
"README.md",
"*.bak",
# Maintenance tooling: any Python file under parsedmarc/resources/maps/
# whose name doesn't start with `_` (i.e. everything except __init__.py,
# which must keep shipping for `importlib.resources.files()` lookups).
"parsedmarc/resources/maps/[!_]*.py",
]
[tool.pytest.ini_options]
# Default to the per-module test layout under tests/. New tests should go
# into tests/test_<module>.py to match the file they exercise; do not
# reintroduce a monolithic tests.py.
testpaths = ["tests"]
[tool.coverage.run]
# Coverage measures shipped code only. Master's reported ≈66.9% on
# Codecov was an artefact of the old monolithic tests.py having no
# [tool.coverage.run] block, which let coverage's default behaviour
# measure every file imported during the run — including the test file
# itself at ~99% "covered". That inflated the headline by ~8 percentage
# points without any actual testing signal. Restricting to the parsedmarc
# package gives a meaningful number that tracks how much of the shipped
# library the test suite actually exercises.
source = ["parsedmarc"]
# Maintainer-only batch scripts under parsedmarc/resources/maps/ ship
# out of the wheel (see the [tool.hatch.build] exclude block above) —
# omit them so the headline number reflects only installed library code.
omit = [
"*/parsedmarc/resources/maps/*.py",
]