Files
parsedmarc/tests/test_s3.py
T
Sean Whalen 0c456d44ed Declare backward-compatible method aliases inside class bodies (#797)
* Declare backward-compatible method aliases inside class bodies

Assigning the legacy save_forensic_* aliases onto the classes after the
class body (KafkaClient.save_forensic_reports_to_kafka = ...) is invisible
to static type checkers, so Pylance/Pyright flagged every assignment and
every use with reportAttributeAccessIssue. Declaring the alias inside the
class body is statically visible — the IDE errors disappear and the
aliases get autocomplete and proper typing. Runtime behavior is identical
(same function object bound as a method), guarded by the existing
assertIs alias tests, whose type-ignore comments are now unnecessary.

Also add a pyright ignore on the NoBrokersAvailable import in
kafkaclient.py: the import is guarded by try/except ImportError for
kafka-python 2.x, but Pyright resolves against the installed 3.x where
the name no longer exists.

Co-Authored-By: Claude Fable 5 <noreply@anthropic.com>

* Bump version to 10.1.0

10.0.4 is tagged and released; CHANGELOG.md already documents the
in-progress 10.1.0 section that this release will ship.

Co-Authored-By: Claude Fable 5 <noreply@anthropic.com>

---------

Co-authored-by: Claude Fable 5 <noreply@anthropic.com>
2026-06-12 20:50:47 -04:00

221 lines
9.0 KiB
Python

"""Tests for parsedmarc.s3"""
import json
import unittest
from unittest.mock import MagicMock, patch
from parsedmarc.s3 import S3Client
def _sample_aggregate_report():
"""Minimal aggregate report shape used by S3Client.save_*_to_s3."""
return {
"report_metadata": {
"org_name": "example.com",
"org_email": "dmarc@example.com",
"report_id": "agg-123",
"begin_date": "2024-01-15 00:00:00",
"end_date": "2024-01-16 00:00:00",
# not in S3Client.metadata_keys; should NOT appear on the S3 object
"errors": [],
},
"policy_published": {"domain": "example.com", "p": "none"},
"records": [],
}
def _sample_smtp_tls_report():
"""Minimal SMTP TLS report shape as parse_smtp_tls_report_json
produces it — flat, with ISO-string begin_date / end_date pulled
directly from the report JSON."""
return {
"organization_name": "example.com",
"begin_date": "2024-02-03T00:00:00Z",
"end_date": "2024-02-04T00:00:00Z",
"report_id": "tls-456",
"contact_info": "tls-admin@example.com",
"policies": [],
}
class TestS3ClientInit(unittest.TestCase):
"""S3Client.__init__ delegates to boto3.resource() with the supplied
credentials and endpoint. A regression in argument names or order
would silently send reports to the wrong bucket or auth as the wrong
principal."""
def test_init_forwards_credentials_to_boto3(self):
with patch("parsedmarc.s3.boto3.resource") as mock_resource:
S3Client(
bucket_name="my-bucket",
bucket_path="dmarc",
region_name="us-east-1",
endpoint_url="https://s3.example.com",
access_key_id="AKIA-test",
secret_access_key="secret-test",
)
mock_resource.assert_called_once_with(
"s3",
region_name="us-east-1",
endpoint_url="https://s3.example.com",
aws_access_key_id="AKIA-test",
aws_secret_access_key="secret-test",
)
def test_init_caches_bucket_handle(self):
"""self.bucket is the Bucket(bucket_name) on the boto3 resource,
so subsequent save_* calls go to the right bucket."""
with patch("parsedmarc.s3.boto3.resource") as mock_resource:
mock_resource.return_value.Bucket.return_value = "bucket-handle"
client = S3Client(
bucket_name="my-bucket",
bucket_path="dmarc",
region_name="us-east-1",
endpoint_url="https://s3.example.com",
access_key_id="k",
secret_access_key="s",
)
mock_resource.return_value.Bucket.assert_called_once_with("my-bucket")
self.assertEqual(client.bucket, "bucket-handle")
class TestS3ClientSavePathsAndMetadata(unittest.TestCase):
"""The S3 key is built from the report's begin_date and report_id.
Wrong format = unfindable reports; wrong metadata filtering = secret
leakage onto the S3 object."""
def _client_with_mock_bucket(self):
with patch("parsedmarc.s3.boto3.resource"):
client = S3Client(
bucket_name="b",
bucket_path="dmarc",
region_name="us-east-1",
endpoint_url="https://s3.example.com",
access_key_id="k",
secret_access_key="s",
)
client.bucket = MagicMock()
return client
def test_aggregate_dispatches_with_aggregate_in_key_path(self):
"""save_aggregate_report_to_s3 puts the object under
<bucket_path>/aggregate/year=YYYY/month=MM/day=DD/<report_id>.json."""
client = self._client_with_mock_bucket()
client.save_aggregate_report_to_s3(_sample_aggregate_report())
client.bucket.put_object.assert_called_once()
call = client.bucket.put_object.call_args
self.assertEqual(
call.kwargs["Key"],
"dmarc/aggregate/year=2024/month=01/day=15/agg-123.json",
)
def test_failure_dispatches_with_failure_in_key_path(self):
client = self._client_with_mock_bucket()
report = _sample_aggregate_report()
report["report_metadata"]["report_id"] = "fail-789"
client.save_failure_report_to_s3(report)
key = client.bucket.put_object.call_args.kwargs["Key"]
self.assertEqual(key, "dmarc/failure/year=2024/month=01/day=15/fail-789.json")
def test_smtp_tls_uses_report_begin_date(self):
"""SMTP TLS reports are flat — no report_metadata — and
begin_date is the ISO string produced by parse_smtp_tls_report_json.
The S3 path-builder parses that string into a datetime for the
year=/month=/day= key segments.
Regression: an earlier version assumed ALL reports carried a
report_metadata sub-object, which crashed with KeyError on every
SMTP TLS save. The CLI swallowed the error and only logged it,
so the bug was invisible in production."""
client = self._client_with_mock_bucket()
client.save_smtp_tls_report_to_s3(_sample_smtp_tls_report())
key = client.bucket.put_object.call_args.kwargs["Key"]
self.assertEqual(key, "dmarc/smtp_tls/year=2024/month=02/day=03/tls-456.json")
def test_smtp_tls_metadata_comes_from_flat_report_fields(self):
"""SMTP TLS object metadata is built from the flat report
instead of report_metadata. organization_name is renamed to
org_name (the S3 metadata key) for consistency with DMARC."""
client = self._client_with_mock_bucket()
client.save_smtp_tls_report_to_s3(_sample_smtp_tls_report())
meta = client.bucket.put_object.call_args.kwargs["Metadata"]
self.assertEqual(meta["org_name"], "example.com")
self.assertEqual(meta["report_id"], "tls-456")
self.assertEqual(meta["begin_date"], "2024-02-03T00:00:00Z")
self.assertEqual(meta["end_date"], "2024-02-04T00:00:00Z")
def test_object_body_is_json_serialized_report(self):
client = self._client_with_mock_bucket()
report = _sample_aggregate_report()
client.save_aggregate_report_to_s3(report)
body = client.bucket.put_object.call_args.kwargs["Body"]
# Round-trip the JSON to make sure it actually deserializes and
# carries every top-level key the source report had.
self.assertEqual(json.loads(body), report)
def test_metadata_filtered_to_documented_keys_only(self):
"""report_metadata fields outside `metadata_keys` must not be
attached to the S3 object — they could leak large or sensitive
payloads (errors lists, internal IDs) into object metadata."""
client = self._client_with_mock_bucket()
report = _sample_aggregate_report()
report["report_metadata"]["errors"] = ["a", "b"]
report["report_metadata"]["internal_diag"] = "secret"
client.save_aggregate_report_to_s3(report)
meta = client.bucket.put_object.call_args.kwargs["Metadata"]
self.assertEqual(
set(meta.keys()),
{"org_name", "org_email", "report_id", "begin_date", "end_date"},
)
self.assertNotIn("errors", meta)
self.assertNotIn("internal_diag", meta)
class TestS3ClientClose(unittest.TestCase):
"""close() must release the underlying boto3 client; a slow leak
here matters for long-running watch-mode processes."""
def test_close_calls_underlying_client_close(self):
with patch("parsedmarc.s3.boto3.resource") as mock_resource:
client = S3Client(
bucket_name="b",
bucket_path="p",
region_name="r",
endpoint_url="https://s3.example.com",
access_key_id="k",
secret_access_key="s",
)
client.close()
mock_resource.return_value.meta.client.close.assert_called_once()
def test_close_swallows_exceptions_from_underlying_client(self):
"""close() is called during shutdown/reload; if boto3 raises
from the close path, we don't want it to propagate and prevent
clean exit. The except is defensive but deliberate."""
with patch("parsedmarc.s3.boto3.resource") as mock_resource:
mock_resource.return_value.meta.client.close.side_effect = RuntimeError(
"boom"
)
client = S3Client(
bucket_name="b",
bucket_path="p",
region_name="r",
endpoint_url="https://s3.example.com",
access_key_id="k",
secret_access_key="s",
)
# Should not raise.
client.close()
class TestS3ClientBackwardCompatAlias(unittest.TestCase):
def test_forensic_alias_points_to_failure_method(self):
self.assertIs(
S3Client.save_forensic_report_to_s3,
S3Client.save_failure_report_to_s3,
)
if __name__ == "__main__":
unittest.main(verbosity=2)