mirror of
https://github.com/domainaware/parsedmarc.git
synced 2026-06-18 08:14:17 +00:00
0c456d44ed
* Declare backward-compatible method aliases inside class bodies Assigning the legacy save_forensic_* aliases onto the classes after the class body (KafkaClient.save_forensic_reports_to_kafka = ...) is invisible to static type checkers, so Pylance/Pyright flagged every assignment and every use with reportAttributeAccessIssue. Declaring the alias inside the class body is statically visible — the IDE errors disappear and the aliases get autocomplete and proper typing. Runtime behavior is identical (same function object bound as a method), guarded by the existing assertIs alias tests, whose type-ignore comments are now unnecessary. Also add a pyright ignore on the NoBrokersAvailable import in kafkaclient.py: the import is guarded by try/except ImportError for kafka-python 2.x, but Pyright resolves against the installed 3.x where the name no longer exists. Co-Authored-By: Claude Fable 5 <noreply@anthropic.com> * Bump version to 10.1.0 10.0.4 is tagged and released; CHANGELOG.md already documents the in-progress 10.1.0 section that this release will ship. Co-Authored-By: Claude Fable 5 <noreply@anthropic.com> --------- Co-authored-by: Claude Fable 5 <noreply@anthropic.com>
221 lines
9.0 KiB
Python
221 lines
9.0 KiB
Python
"""Tests for parsedmarc.s3"""
|
|
|
|
import json
|
|
import unittest
|
|
from unittest.mock import MagicMock, patch
|
|
|
|
from parsedmarc.s3 import S3Client
|
|
|
|
|
|
def _sample_aggregate_report():
|
|
"""Minimal aggregate report shape used by S3Client.save_*_to_s3."""
|
|
return {
|
|
"report_metadata": {
|
|
"org_name": "example.com",
|
|
"org_email": "dmarc@example.com",
|
|
"report_id": "agg-123",
|
|
"begin_date": "2024-01-15 00:00:00",
|
|
"end_date": "2024-01-16 00:00:00",
|
|
# not in S3Client.metadata_keys; should NOT appear on the S3 object
|
|
"errors": [],
|
|
},
|
|
"policy_published": {"domain": "example.com", "p": "none"},
|
|
"records": [],
|
|
}
|
|
|
|
|
|
def _sample_smtp_tls_report():
|
|
"""Minimal SMTP TLS report shape as parse_smtp_tls_report_json
|
|
produces it — flat, with ISO-string begin_date / end_date pulled
|
|
directly from the report JSON."""
|
|
return {
|
|
"organization_name": "example.com",
|
|
"begin_date": "2024-02-03T00:00:00Z",
|
|
"end_date": "2024-02-04T00:00:00Z",
|
|
"report_id": "tls-456",
|
|
"contact_info": "tls-admin@example.com",
|
|
"policies": [],
|
|
}
|
|
|
|
|
|
class TestS3ClientInit(unittest.TestCase):
|
|
"""S3Client.__init__ delegates to boto3.resource() with the supplied
|
|
credentials and endpoint. A regression in argument names or order
|
|
would silently send reports to the wrong bucket or auth as the wrong
|
|
principal."""
|
|
|
|
def test_init_forwards_credentials_to_boto3(self):
|
|
with patch("parsedmarc.s3.boto3.resource") as mock_resource:
|
|
S3Client(
|
|
bucket_name="my-bucket",
|
|
bucket_path="dmarc",
|
|
region_name="us-east-1",
|
|
endpoint_url="https://s3.example.com",
|
|
access_key_id="AKIA-test",
|
|
secret_access_key="secret-test",
|
|
)
|
|
mock_resource.assert_called_once_with(
|
|
"s3",
|
|
region_name="us-east-1",
|
|
endpoint_url="https://s3.example.com",
|
|
aws_access_key_id="AKIA-test",
|
|
aws_secret_access_key="secret-test",
|
|
)
|
|
|
|
def test_init_caches_bucket_handle(self):
|
|
"""self.bucket is the Bucket(bucket_name) on the boto3 resource,
|
|
so subsequent save_* calls go to the right bucket."""
|
|
with patch("parsedmarc.s3.boto3.resource") as mock_resource:
|
|
mock_resource.return_value.Bucket.return_value = "bucket-handle"
|
|
client = S3Client(
|
|
bucket_name="my-bucket",
|
|
bucket_path="dmarc",
|
|
region_name="us-east-1",
|
|
endpoint_url="https://s3.example.com",
|
|
access_key_id="k",
|
|
secret_access_key="s",
|
|
)
|
|
mock_resource.return_value.Bucket.assert_called_once_with("my-bucket")
|
|
self.assertEqual(client.bucket, "bucket-handle")
|
|
|
|
|
|
class TestS3ClientSavePathsAndMetadata(unittest.TestCase):
|
|
"""The S3 key is built from the report's begin_date and report_id.
|
|
Wrong format = unfindable reports; wrong metadata filtering = secret
|
|
leakage onto the S3 object."""
|
|
|
|
def _client_with_mock_bucket(self):
|
|
with patch("parsedmarc.s3.boto3.resource"):
|
|
client = S3Client(
|
|
bucket_name="b",
|
|
bucket_path="dmarc",
|
|
region_name="us-east-1",
|
|
endpoint_url="https://s3.example.com",
|
|
access_key_id="k",
|
|
secret_access_key="s",
|
|
)
|
|
client.bucket = MagicMock()
|
|
return client
|
|
|
|
def test_aggregate_dispatches_with_aggregate_in_key_path(self):
|
|
"""save_aggregate_report_to_s3 puts the object under
|
|
<bucket_path>/aggregate/year=YYYY/month=MM/day=DD/<report_id>.json."""
|
|
client = self._client_with_mock_bucket()
|
|
client.save_aggregate_report_to_s3(_sample_aggregate_report())
|
|
client.bucket.put_object.assert_called_once()
|
|
call = client.bucket.put_object.call_args
|
|
self.assertEqual(
|
|
call.kwargs["Key"],
|
|
"dmarc/aggregate/year=2024/month=01/day=15/agg-123.json",
|
|
)
|
|
|
|
def test_failure_dispatches_with_failure_in_key_path(self):
|
|
client = self._client_with_mock_bucket()
|
|
report = _sample_aggregate_report()
|
|
report["report_metadata"]["report_id"] = "fail-789"
|
|
client.save_failure_report_to_s3(report)
|
|
key = client.bucket.put_object.call_args.kwargs["Key"]
|
|
self.assertEqual(key, "dmarc/failure/year=2024/month=01/day=15/fail-789.json")
|
|
|
|
def test_smtp_tls_uses_report_begin_date(self):
|
|
"""SMTP TLS reports are flat — no report_metadata — and
|
|
begin_date is the ISO string produced by parse_smtp_tls_report_json.
|
|
The S3 path-builder parses that string into a datetime for the
|
|
year=/month=/day= key segments.
|
|
|
|
Regression: an earlier version assumed ALL reports carried a
|
|
report_metadata sub-object, which crashed with KeyError on every
|
|
SMTP TLS save. The CLI swallowed the error and only logged it,
|
|
so the bug was invisible in production."""
|
|
client = self._client_with_mock_bucket()
|
|
client.save_smtp_tls_report_to_s3(_sample_smtp_tls_report())
|
|
key = client.bucket.put_object.call_args.kwargs["Key"]
|
|
self.assertEqual(key, "dmarc/smtp_tls/year=2024/month=02/day=03/tls-456.json")
|
|
|
|
def test_smtp_tls_metadata_comes_from_flat_report_fields(self):
|
|
"""SMTP TLS object metadata is built from the flat report
|
|
instead of report_metadata. organization_name is renamed to
|
|
org_name (the S3 metadata key) for consistency with DMARC."""
|
|
client = self._client_with_mock_bucket()
|
|
client.save_smtp_tls_report_to_s3(_sample_smtp_tls_report())
|
|
meta = client.bucket.put_object.call_args.kwargs["Metadata"]
|
|
self.assertEqual(meta["org_name"], "example.com")
|
|
self.assertEqual(meta["report_id"], "tls-456")
|
|
self.assertEqual(meta["begin_date"], "2024-02-03T00:00:00Z")
|
|
self.assertEqual(meta["end_date"], "2024-02-04T00:00:00Z")
|
|
|
|
def test_object_body_is_json_serialized_report(self):
|
|
client = self._client_with_mock_bucket()
|
|
report = _sample_aggregate_report()
|
|
client.save_aggregate_report_to_s3(report)
|
|
body = client.bucket.put_object.call_args.kwargs["Body"]
|
|
# Round-trip the JSON to make sure it actually deserializes and
|
|
# carries every top-level key the source report had.
|
|
self.assertEqual(json.loads(body), report)
|
|
|
|
def test_metadata_filtered_to_documented_keys_only(self):
|
|
"""report_metadata fields outside `metadata_keys` must not be
|
|
attached to the S3 object — they could leak large or sensitive
|
|
payloads (errors lists, internal IDs) into object metadata."""
|
|
client = self._client_with_mock_bucket()
|
|
report = _sample_aggregate_report()
|
|
report["report_metadata"]["errors"] = ["a", "b"]
|
|
report["report_metadata"]["internal_diag"] = "secret"
|
|
client.save_aggregate_report_to_s3(report)
|
|
meta = client.bucket.put_object.call_args.kwargs["Metadata"]
|
|
self.assertEqual(
|
|
set(meta.keys()),
|
|
{"org_name", "org_email", "report_id", "begin_date", "end_date"},
|
|
)
|
|
self.assertNotIn("errors", meta)
|
|
self.assertNotIn("internal_diag", meta)
|
|
|
|
|
|
class TestS3ClientClose(unittest.TestCase):
|
|
"""close() must release the underlying boto3 client; a slow leak
|
|
here matters for long-running watch-mode processes."""
|
|
|
|
def test_close_calls_underlying_client_close(self):
|
|
with patch("parsedmarc.s3.boto3.resource") as mock_resource:
|
|
client = S3Client(
|
|
bucket_name="b",
|
|
bucket_path="p",
|
|
region_name="r",
|
|
endpoint_url="https://s3.example.com",
|
|
access_key_id="k",
|
|
secret_access_key="s",
|
|
)
|
|
client.close()
|
|
mock_resource.return_value.meta.client.close.assert_called_once()
|
|
|
|
def test_close_swallows_exceptions_from_underlying_client(self):
|
|
"""close() is called during shutdown/reload; if boto3 raises
|
|
from the close path, we don't want it to propagate and prevent
|
|
clean exit. The except is defensive but deliberate."""
|
|
with patch("parsedmarc.s3.boto3.resource") as mock_resource:
|
|
mock_resource.return_value.meta.client.close.side_effect = RuntimeError(
|
|
"boom"
|
|
)
|
|
client = S3Client(
|
|
bucket_name="b",
|
|
bucket_path="p",
|
|
region_name="r",
|
|
endpoint_url="https://s3.example.com",
|
|
access_key_id="k",
|
|
secret_access_key="s",
|
|
)
|
|
# Should not raise.
|
|
client.close()
|
|
|
|
|
|
class TestS3ClientBackwardCompatAlias(unittest.TestCase):
|
|
def test_forensic_alias_points_to_failure_method(self):
|
|
self.assertIs(
|
|
S3Client.save_forensic_report_to_s3,
|
|
S3Client.save_failure_report_to_s3,
|
|
)
|
|
|
|
|
|
if __name__ == "__main__":
|
|
unittest.main(verbosity=2)
|