test: cover parsedmarc's mailbox processing loop end-to-end on a real Maildir (#777)

AGENTS.md notes get_dmarc_reports_from_mailbox was halted at low coverage because honest testing needed a live IMAP server or mocks so deep they test the mock. mailsuite's MaildirConnection is a real on-disk backend with no network or credentials, so the fetch -> parse/classify -> route loop can now be exercised for real in CI. TestGetDmarcReportsFromMailboxMaildir delivers real sample reports (one aggregate, failure, and SMTP-TLS email) plus an unparseable message into a Maildir INBOX, runs get_dmarc_reports_from_mailbox offline, and asserts on observable results — parsed report counts and which archive subfolder each message physically lands in: - each report type routed to Archive/{Aggregate,Failure,SMTP-TLS}, the junk message to Archive/Invalid, INBOX drained - delete=True removes processed messages instead of archiving them - test=True parses and returns reports but moves nothing and creates no folders setUp resets the module-global SEEN_AGGREGATE_REPORT_IDS dedup cache so test order can't drop an already-"seen" aggregate report, and the maildir lives at a fresh subpath so mailbox.Maildir(create=True) actually builds cur/new/tmp. Lifts parsedmarc/__init__.py from 76% to 82%, honestly. Co-authored-by: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-07-29 03:44:54 +00:00 · 2026-05-21 12:37:24 -04:00
parent a6778707d7
commit ef2fb84cc0
1 changed files with 86 additions and 0 deletions
@@ -2433,6 +2433,92 @@ class TestMigrateForensicArchiveFolderMaildir(unittest.TestCase):
        self.assertEqual(result["failure_reports"], [])


+class TestGetDmarcReportsFromMailboxMaildir(unittest.TestCase):
+    """parsedmarc's real mailbox processing loop, end to end on an on-disk
+    Maildir (mailsuite MaildirConnection, no mocks, offline parsing): fetch
+    from INBOX, parse and classify each message, then route it to the matching
+    archive subfolder — or delete it / leave it, per mode. This path was
+    previously untestable without a live IMAP server (see AGENTS.md), so it sat
+    uncovered; the Maildir backend lets it run in CI with no network or
+    credentials, asserting on the observable result (parsed counts + where each
+    message physically ended up), not on mock call records."""
+
+    AGGREGATE = "samples/aggregate/twilight.eml"
+    FAILURE = "samples/failure/dmarc_ruf_report_linkedin.eml"
+    SMTP_TLS = "samples/smtp_tls/google.com_smtp_tls_report.eml"
+    JUNK = b"From: noise@example.com\nSubject: not a report\n\nplain text\n"
+
+    def setUp(self):
+        self._tmp = mkdtemp()
+        self.addCleanup(rmtree, self._tmp, ignore_errors=True)
+        # Aggregate dedup is a module-global ExpiringDict; reset it so an
+        # aggregate report "seen" by an earlier test isn't silently dropped
+        # from this test's results.
+        parsedmarc.SEEN_AGGREGATE_REPORT_IDS.clear()
+        # Use a not-yet-existing subpath so mailbox.Maildir(create=True) builds
+        # cur/new/tmp (it skips creation if the directory already exists, which
+        # mkdtemp's would). Deliver straight to disk; the connection is built
+        # afterwards (in _run) so its first read sees every delivered message.
+        self._maildir = os.path.join(self._tmp, "Maildir")
+        self._inbox = mailbox.Maildir(self._maildir, create=True)
+
+    def _deliver(self, source):
+        raw = open(source, "rb").read() if isinstance(source, str) else source
+        self._inbox.add(mailbox.MaildirMessage(raw))
+        self._inbox.flush()
+
+    def _run(self, **kwargs):
+        conn = MaildirConnection(self._maildir, maildir_create=True)
+        result = parsedmarc.get_dmarc_reports_from_mailbox(
+            connection=conn, offline=True, **kwargs
+        )
+        return conn, result
+
+    def test_each_report_type_routed_to_its_archive_subfolder(self):
+        """One report of each type plus an unparseable message: each is filed
+        under the correct subfolder (Aggregate / Failure / SMTP-TLS / Invalid)
+        and the INBOX is drained."""
+        self._deliver(self.AGGREGATE)
+        self._deliver(self.FAILURE)
+        self._deliver(self.SMTP_TLS)
+        self._deliver(self.JUNK)
+
+        conn, result = self._run()
+
+        self.assertEqual(len(result["aggregate_reports"]), 1)
+        self.assertEqual(len(result["failure_reports"]), 1)
+        self.assertEqual(len(result["smtp_tls_reports"]), 1)
+
+        self.assertEqual(conn.fetch_messages("INBOX"), [])
+        self.assertEqual(len(conn.fetch_messages("Archive/Aggregate")), 1)
+        self.assertEqual(len(conn.fetch_messages("Archive/Failure")), 1)
+        self.assertEqual(len(conn.fetch_messages("Archive/SMTP-TLS")), 1)
+        self.assertEqual(len(conn.fetch_messages("Archive/Invalid")), 1)
+
+    def test_delete_mode_removes_processed_messages(self):
+        """delete=True: a parsed message is removed from the INBOX rather than
+        archived."""
+        self._deliver(self.FAILURE)
+
+        conn, result = self._run(delete=True)
+
+        self.assertEqual(len(result["failure_reports"]), 1)
+        self.assertEqual(conn.fetch_messages("INBOX"), [])
+        # The Failure folder is created but nothing is filed there — deleted.
+        self.assertEqual(conn.fetch_messages("Archive/Failure"), [])
+
+    def test_test_mode_parses_without_moving_or_creating_folders(self):
+        """test=True: the report is parsed and returned, but the message stays
+        in the INBOX and no archive folders are created/touched."""
+        self._deliver(self.FAILURE)
+
+        conn, result = self._run(test=True)
+
+        self.assertEqual(len(result["failure_reports"]), 1)
+        self.assertEqual(len(conn.fetch_messages("INBOX")), 1)
+        self.assertFalse(conn.folder_exists("Archive/Failure"))
+
+
 class TestEmailResultsErrorBranches(unittest.TestCase):
    """email_results requires mail_to to be a list — this is enforced
    by an assert. A regression that dropped the assert would mean the