Merge branch 'master' into copilot/support-dmarcbis-reports

2026-06-11 13:09:44 +00:00 · 2026-03-10 12:21:07 -04:00
parent a12fe811d0 7929919223
commit 42ae29eaae
12 changed files with 734 additions and 32 deletions
@@ -0,0 +1,72 @@
+name: Bug report
+description: Report a reproducible parsedmarc bug
+title: "[Bug]: "
+labels:
+  - bug
+body:
+  - type: input
+    id: version
+    attributes:
+      label: parsedmarc version
+      description: Include the parsedmarc version or commit if known.
+      placeholder: 9.x.x
+    validations:
+      required: true
+  - type: dropdown
+    id: input_backend
+    attributes:
+      label: Input backend
+      description: Which input path or mailbox backend is involved?
+      options:
+        - IMAP
+        - MS Graph
+        - Gmail API
+        - Maildir
+        - mbox
+        - Local file / direct parse
+        - Other
+    validations:
+      required: true
+  - type: textarea
+    id: environment
+    attributes:
+      label: Environment
+      description: Runtime, container image, OS, Python version, or deployment details.
+      placeholder: Docker on Debian, Python 3.12, parsedmarc installed from PyPI
+    validations:
+      required: true
+  - type: textarea
+    id: config
+    attributes:
+      label: Sanitized config
+      description: Include the relevant config fragment with secrets removed.
+      render: ini
+  - type: textarea
+    id: steps
+    attributes:
+      label: Steps to reproduce
+      description: Describe the smallest reproducible sequence you can.
+      placeholder: |
+        1. Configure parsedmarc with ...
+        2. Run ...
+        3. Observe ...
+    validations:
+      required: true
+  - type: textarea
+    id: expected_actual
+    attributes:
+      label: Expected vs actual behavior
+      description: What did you expect, and what happened instead?
+    validations:
+      required: true
+  - type: textarea
+    id: logs
+    attributes:
+      label: Logs or traceback
+      description: Paste sanitized logs or a traceback if available.
+      render: text
+  - type: textarea
+    id: samples
+    attributes:
+      label: Sample report availability
+      description: If you can share a sanitized sample report or message, note that here.
@@ -0,0 +1,5 @@
+blank_issues_enabled: true
+contact_links:
+  - name: Security issue
+    url: https://github.com/domainaware/parsedmarc/security/policy
+    about: Please use the security policy and avoid filing public issues for undisclosed vulnerabilities.
@@ -0,0 +1,30 @@
+name: Feature request
+description: Suggest a new feature or behavior change
+title: "[Feature]: "
+labels:
+  - enhancement
+body:
+  - type: textarea
+    id: problem
+    attributes:
+      label: Problem statement
+      description: What workflow or limitation are you trying to solve?
+    validations:
+      required: true
+  - type: textarea
+    id: proposal
+    attributes:
+      label: Proposed behavior
+      description: Describe the feature or behavior you want.
+    validations:
+      required: true
+  - type: textarea
+    id: alternatives
+    attributes:
+      label: Alternatives considered
+      description: Describe workarounds or alternative approaches you considered.
+  - type: textarea
+    id: impact
+    attributes:
+      label: Compatibility or operational impact
+      description: Note config, output, performance, or deployment implications if relevant.
@@ -0,0 +1,24 @@
+## Summary
+
+- 
+
+## Why
+
+- 
+
+## Testing
+
+- 
+
+## Backward Compatibility / Risk
+
+- 
+
+## Related Issue
+
+- Closes #
+
+## Checklist
+
+- [ ] Tests added or updated if behavior changed
+- [ ] Docs updated if config or user-facing behavior changed
@@ -36,6 +36,23 @@ Forensic reports have been renamed to failure reports throughout the project to
  - **Kibana**: Index pattern uses `dmarc_f*` to match both `dmarc_forensic*` and `dmarc_failure*`
  - **Splunk**: Base search queries `(sourcetype="dmarc:failure" OR sourcetype="dmarc:forensic")`
  - **Elasticsearch/OpenSearch**: Duplicate-check searches query across both `dmarc_failure*` and `dmarc_forensic*` index patterns
+## 9.2.0
+
+### Added
+
+- OpenSearch AWS SigV4 authentication support (PR #673)
+- IMAP move/delete compatibility fallbacks (PR #671)
+- `fail_on_output_error` CLI option for sink failures (PR #672)
+- Gmail service account auth mode for non-interactive runs (PR #676)
+- Microsoft Graph certificate authentication support (PRs #692 and #693)
+- Microsoft Graph well-known folder fallback for root listing failures (PR #618 and #684 close #609)
+
+### Fixed
+
+- Pass mailbox since filter through `watch_inbox` callback (PR #670 closes issue #581)
+- `parsedmarc.mail.gmail.GmailConnection.delete_message` now properly calls the Gmail API (PR #668)
+- Avoid extra mailbox fetch in batch and test mode (PR #691 closes #533)
+
 ## 9.1.2

 ### Fixes
@@ -0,0 +1,78 @@
+# Contributing
+
+Thanks for contributing to parsedmarc.
+
+## Local setup
+
+Use a virtual environment for local development.
+
+```bash
+python3 -m venv .venv
+. .venv/bin/activate
+python -m pip install --upgrade pip
+pip install .[build]
+```
+
+## Before opening a pull request
+
+Run the checks that match your change:
+
+```bash
+ruff check .
+pytest --cov --cov-report=xml tests.py
+```
+
+If you changed documentation:
+
+```bash
+cd docs
+make html
+```
+
+If you changed CLI behavior or parsing logic, it is also useful to exercise the
+sample reports:
+
+```bash
+parsedmarc --debug -c ci.ini samples/aggregate/*
+parsedmarc --debug -c ci.ini samples/forensic/*
+```
+
+To skip DNS lookups during tests, set:
+
+```bash
+GITHUB_ACTIONS=true
+```
+
+## Pull request guidelines
+
+- Keep pull requests small and focused. Separate bug fixes, docs updates, and
+  repo-maintenance changes where practical.
+- Add or update tests when behavior changes.
+- Update docs when configuration or user-facing behavior changes.
+- Include a short summary, the reason for the change, and the testing you ran.
+- Link the related issue when there is one.
+
+## Branch maintenance
+
+Upstream `master` may move quickly. Before asking for review or after another PR
+lands, rebase your branch onto the current upstream branch and force-push with
+lease if needed:
+
+```bash
+git fetch upstream
+git rebase upstream/master
+git push --force-with-lease
+```
+
+## CI and coverage
+
+GitHub Actions is the source of truth for linting, docs, and test status.
+
+Codecov patch coverage is usually the most relevant signal for small PRs. Project
+coverage can be noisier when the base comparison is stale, so interpret it in
+the context of the actual diff.
+
+## Questions
+
+Use GitHub issues for bugs and feature requests. If you are not sure whether a
+change is wanted, opening an issue first is usually the safest path.
@@ -0,0 +1,29 @@
+# Security Policy
+
+## Reporting a vulnerability
+
+Please do not open a public GitHub issue for an undisclosed security
+vulnerability. Use GitHub private vulnerability reporting in the Security tab of this project instead.
+
+When reporting a vulnerability, include:
+
+- the affected parsedmarc version or commit
+- the component or integration involved
+- clear reproduction details if available
+- potential impact
+- any suggested mitigation or workaround
+
+## Supported versions
+
+Security fixes will be applied to the latest released version and
+the current `master` branch.
+
+Older versions will not receive backported fixes.
+
+## Disclosure process
+
+After a report is received, maintainers can validate the issue, assess impact,
+and coordinate a fix before public disclosure.
+
+Please avoid publishing proof-of-concept details until maintainers have had a
+reasonable opportunity to investigate and release a fix or mitigation.
@@ -203,7 +203,7 @@ The full set of configuration options are:
  - `password` - str: The IMAP password
 - `msgraph`
  - `auth_method` - str: Authentication method, valid types are
-      `UsernamePassword`, `DeviceCode`, or `ClientSecret`
+      `UsernamePassword`, `DeviceCode`, `ClientSecret`, or `Certificate`
      (Default: `UsernamePassword`).
  - `user` - str: The M365 user, required when the auth method is
      UsernamePassword
@@ -211,6 +211,11 @@ The full set of configuration options are:
      method is UsernamePassword
  - `client_id` - str: The app registration's client ID
  - `client_secret` - str: The app registration's secret
+  - `certificate_path` - str: Path to a PEM or PKCS12 certificate
+      including the private key. Required when the auth method is
+      `Certificate`
+  - `certificate_password` - str: Optional password for the
+      certificate file when using `Certificate` auth
  - `tenant_id` - str: The Azure AD tenant ID. This is required
      for all auth methods except UsernamePassword.
  - `mailbox` - str: The mailbox name. This defaults to the
@@ -248,6 +253,9 @@ The full set of configuration options are:
    -Description "Restrict access to dmarc reports mailbox."
    ```

+    The same application permission and mailbox scoping guidance
+    applies to the `Certificate` auth method.
+
    :::
 - `elasticsearch`
  - `hosts` - str: A comma separated list of hostnames and ports
@@ -996,10 +996,12 @@ def extract_report(content: Union[bytes, str, BinaryIO]) -> str:
    return report


-def extract_report_from_file_path(file_path: str):
+def extract_report_from_file_path(
+    file_path: Union[str, bytes, os.PathLike[str], os.PathLike[bytes]],
+) -> str:
    """Extracts report from a file at the given file_path"""
    try:
-        with open(file_path, "rb") as report_file:
+        with open(os.fspath(file_path), "rb") as report_file:
            return extract_report(report_file.read())
    except FileNotFoundError:
        raise ParserError("File was not found")
@@ -1703,7 +1705,7 @@ def parse_report_email(


 def parse_report_file(
-    input_: Union[bytes, str, BinaryIO],
+    input_: Union[bytes, str, os.PathLike[str], os.PathLike[bytes], BinaryIO],
    *,
    nameservers: Optional[list[str]] = None,
    dns_timeout: float = 2.0,
@@ -1720,7 +1722,8 @@ def parse_report_file(
    file-like object. or bytes

    Args:
-        input_ (str | bytes | BinaryIO): A path to a file, a file like object, or bytes
+        input_ (str | os.PathLike | bytes | BinaryIO): A path to a file,
+            a file-like object, or bytes
        nameservers (list): A list of one or more nameservers to use
            (Cloudflare's public DNS resolvers by default)
        dns_timeout (float): Sets the DNS timeout in seconds
@@ -1737,9 +1740,10 @@ def parse_report_file(
        dict: The parsed DMARC report
    """
    file_object: BinaryIO
-    if isinstance(input_, str):
-        logger.debug("Parsing {0}".format(input_))
-        file_object = open(input_, "rb")
+    if isinstance(input_, (str, os.PathLike)):
+        file_path = os.fspath(input_)
+        logger.debug("Parsing {0}".format(file_path))
+        file_object = open(file_path, "rb")
    elif isinstance(input_, (bytes, bytearray, memoryview)):
        file_object = BytesIO(bytes(input_))
    else:
@@ -2180,14 +2184,17 @@ def get_dmarc_reports_from_mailbox(
        "smtp_tls_reports": smtp_tls_reports,
    }

-    if current_time:
-        total_messages = len(
-            connection.fetch_messages(reports_folder, since=current_time)
-        )
+    if not test and not batch_size:
+        if current_time:
+            total_messages = len(
+                connection.fetch_messages(reports_folder, since=current_time)
+            )
+        else:
+            total_messages = len(connection.fetch_messages(reports_folder))
    else:
-        total_messages = len(connection.fetch_messages(reports_folder))
+        total_messages = 0

-    if not test and not batch_size and total_messages > 0:
+    if total_messages > 0:
        # Process emails that came in during the last run
        results = get_dmarc_reports_from_mailbox(
            connection=connection,
@@ -644,6 +644,8 @@ def _main():
        graph_password=None,
        graph_client_id=None,
        graph_client_secret=None,
+        graph_certificate_path=None,
+        graph_certificate_password=None,
        graph_tenant_id=None,
        graph_mailbox=None,
        graph_allow_unencrypted_storage=False,
@@ -1018,6 +1020,19 @@ def _main():
                    )
                    exit(-1)

+            if opts.graph_auth_method == AuthMethod.Certificate.name:
+                if "certificate_path" in graph_config:
+                    opts.graph_certificate_path = graph_config["certificate_path"]
+                else:
+                    logger.critical(
+                        "certificate_path setting missing from the msgraph config section"
+                    )
+                    exit(-1)
+                if "certificate_password" in graph_config:
+                    opts.graph_certificate_password = graph_config[
+                        "certificate_password"
+                    ]
+
            if "client_id" in graph_config:
                opts.graph_client_id = graph_config["client_id"]
            else:
@@ -1760,6 +1775,8 @@ def _main():
                tenant_id=opts.graph_tenant_id,
                client_id=opts.graph_client_id,
                client_secret=opts.graph_client_secret,
+                certificate_path=opts.graph_certificate_path,
+                certificate_password=opts.graph_certificate_password,
                username=opts.graph_user,
                password=opts.graph_password,
                token_file=opts.graph_token_file,
@@ -12,19 +12,25 @@ from azure.identity import (
    UsernamePasswordCredential,
    DeviceCodeCredential,
    ClientSecretCredential,
+    CertificateCredential,
    TokenCachePersistenceOptions,
    AuthenticationRecord,
 )
 from msgraph.core import GraphClient
+from requests.exceptions import RequestException

 from parsedmarc.log import logger
 from parsedmarc.mail.mailbox_connection import MailboxConnection

+GRAPH_REQUEST_RETRY_ATTEMPTS = 3
+GRAPH_REQUEST_RETRY_DELAY_SECONDS = 5
+

 class AuthMethod(Enum):
    DeviceCode = 1
    UsernamePassword = 2
    ClientSecret = 3
+    Certificate = 4


 def _get_cache_args(token_path: Path, allow_unencrypted_storage):
@@ -83,30 +89,55 @@ def _generate_credential(auth_method: str, token_path: Path, **kwargs):
            tenant_id=kwargs["tenant_id"],
            client_secret=kwargs["client_secret"],
        )
+    elif auth_method == AuthMethod.Certificate.name:
+        cert_path = kwargs.get("certificate_path")
+        if not cert_path:
+            raise ValueError(
+                "certificate_path is required when auth_method is 'Certificate'"
+            )
+        credential = CertificateCredential(
+            client_id=kwargs["client_id"],
+            tenant_id=kwargs["tenant_id"],
+            certificate_path=cert_path,
+            password=kwargs.get("certificate_password"),
+        )
    else:
        raise RuntimeError(f"Auth method {auth_method} not found")
    return credential


 class MSGraphConnection(MailboxConnection):
+    _WELL_KNOWN_FOLDERS = {
+        "inbox": "inbox",
+        "archive": "archive",
+        "drafts": "drafts",
+        "sentitems": "sentitems",
+        "deleteditems": "deleteditems",
+        "junkemail": "junkemail",
+    }
+
    def __init__(
        self,
        auth_method: str,
        mailbox: str,
        graph_url: str,
        client_id: str,
-        client_secret: str,
-        username: str,
-        password: str,
+        client_secret: Optional[str],
+        username: Optional[str],
+        password: Optional[str],
        tenant_id: str,
        token_file: str,
        allow_unencrypted_storage: bool,
+        certificate_path: Optional[str] = None,
+        certificate_password: Optional[Union[str, bytes]] = None,
    ):
        token_path = Path(token_file)
        credential = _generate_credential(
            auth_method,
            client_id=client_id,
            client_secret=client_secret,
+            certificate_path=certificate_path,
+            certificate_password=certificate_password,
            username=username,
            password=password,
            tenant_id=tenant_id,
@@ -117,10 +148,10 @@ class MSGraphConnection(MailboxConnection):
            "credential": credential,
            "cloud": graph_url,
        }
-        if not isinstance(credential, ClientSecretCredential):
+        if not isinstance(credential, (ClientSecretCredential, CertificateCredential)):
            scopes = ["Mail.ReadWrite"]
            # Detect if mailbox is shared
-            if mailbox and username != mailbox:
+            if mailbox and username and username != mailbox:
                scopes = ["Mail.ReadWrite.Shared"]
            auth_record = credential.authenticate(scopes=scopes)
            _cache_auth_record(auth_record, token_path)
@@ -129,6 +160,23 @@ class MSGraphConnection(MailboxConnection):
        self._client = GraphClient(**client_params)
        self.mailbox_name = mailbox

+    def _request_with_retries(self, method_name: str, *args, **kwargs):
+        for attempt in range(1, GRAPH_REQUEST_RETRY_ATTEMPTS + 1):
+            try:
+                return getattr(self._client, method_name)(*args, **kwargs)
+            except RequestException as error:
+                if attempt == GRAPH_REQUEST_RETRY_ATTEMPTS:
+                    raise
+                logger.warning(
+                    "Transient MS Graph %s error on attempt %s/%s: %s",
+                    method_name.upper(),
+                    attempt,
+                    GRAPH_REQUEST_RETRY_ATTEMPTS,
+                    error,
+                )
+                sleep(GRAPH_REQUEST_RETRY_DELAY_SECONDS)
+        raise RuntimeError("no retry attempts configured")
+
    def create_folder(self, folder_name: str):
        sub_url = ""
        path_parts = folder_name.split("/")
@@ -143,7 +191,7 @@ class MSGraphConnection(MailboxConnection):

        request_body = {"displayName": folder_name}
        request_url = f"/users/{self.mailbox_name}/mailFolders{sub_url}"
-        resp = self._client.post(request_url, json=request_body)
+        resp = self._request_with_retries("post", request_url, json=request_body)
        if resp.status_code == 409:
            logger.debug(f"Folder {folder_name} already exists, skipping creation")
        elif resp.status_code == 201:
@@ -173,7 +221,7 @@ class MSGraphConnection(MailboxConnection):
            params["$top"] = batch_size
        else:
            params["$top"] = 100
-        result = self._client.get(url, params=params)
+        result = self._request_with_retries("get", url, params=params)
        if result.status_code != 200:
            raise RuntimeError(f"Failed to fetch messages {result.text}")
        messages = result.json()["value"]
@@ -181,7 +229,7 @@ class MSGraphConnection(MailboxConnection):
        while "@odata.nextLink" in result.json() and (
            since is not None or (batch_size == 0 or batch_size - len(messages) > 0)
        ):
-            result = self._client.get(result.json()["@odata.nextLink"])
+            result = self._request_with_retries("get", result.json()["@odata.nextLink"])
            if result.status_code != 200:
                raise RuntimeError(f"Failed to fetch messages {result.text}")
            messages.extend(result.json()["value"])
@@ -190,7 +238,7 @@ class MSGraphConnection(MailboxConnection):
    def mark_message_read(self, message_id: str):
        """Marks a message as read"""
        url = f"/users/{self.mailbox_name}/messages/{message_id}"
-        resp = self._client.patch(url, json={"isRead": "true"})
+        resp = self._request_with_retries("patch", url, json={"isRead": "true"})
        if resp.status_code != 200:
            raise RuntimeWarning(
                f"Failed to mark message read{resp.status_code}: {resp.json()}"
@@ -198,7 +246,7 @@ class MSGraphConnection(MailboxConnection):

    def fetch_message(self, message_id: str, **kwargs):
        url = f"/users/{self.mailbox_name}/messages/{message_id}/$value"
-        result = self._client.get(url)
+        result = self._request_with_retries("get", url)
        if result.status_code != 200:
            raise RuntimeWarning(
                f"Failed to fetch message{result.status_code}: {result.json()}"
@@ -210,7 +258,7 @@ class MSGraphConnection(MailboxConnection):

    def delete_message(self, message_id: str):
        url = f"/users/{self.mailbox_name}/messages/{message_id}"
-        resp = self._client.delete(url)
+        resp = self._request_with_retries("delete", url)
        if resp.status_code != 204:
            raise RuntimeWarning(
                f"Failed to delete message {resp.status_code}: {resp.json()}"
@@ -220,7 +268,7 @@ class MSGraphConnection(MailboxConnection):
        folder_id = self._find_folder_id_from_folder_path(folder_name)
        request_body = {"destinationId": folder_id}
        url = f"/users/{self.mailbox_name}/messages/{message_id}/move"
-        resp = self._client.post(url, json=request_body)
+        resp = self._request_with_retries("post", url, json=request_body)
        if resp.status_code != 201:
            raise RuntimeWarning(
                f"Failed to move message {resp.status_code}: {resp.json()}"
@@ -248,6 +296,19 @@ class MSGraphConnection(MailboxConnection):
        else:
            return self._find_folder_id_with_parent(folder_name, None)

+    def _get_well_known_folder_id(self, folder_name: str) -> Optional[str]:
+        folder_key = folder_name.lower().replace(" ", "").replace("-", "")
+        alias = self._WELL_KNOWN_FOLDERS.get(folder_key)
+        if alias is None:
+            return None
+
+        url = f"/users/{self.mailbox_name}/mailFolders/{alias}?$select=id,displayName"
+        folder_resp = self._request_with_retries("get", url)
+        if folder_resp.status_code != 200:
+            return None
+        payload = folder_resp.json()
+        return payload.get("id")
+
    def _find_folder_id_with_parent(
        self, folder_name: str, parent_folder_id: Optional[str]
    ):
@@ -256,8 +317,12 @@ class MSGraphConnection(MailboxConnection):
            sub_url = f"/{parent_folder_id}/childFolders"
        url = f"/users/{self.mailbox_name}/mailFolders{sub_url}"
        filter = f"?$filter=displayName eq '{folder_name}'"
-        folders_resp = self._client.get(url + filter)
+        folders_resp = self._request_with_retries("get", url + filter)
        if folders_resp.status_code != 200:
+            if parent_folder_id is None:
+                well_known_folder_id = self._get_well_known_folder_id(folder_name)
+                if well_known_folder_id:
+                    return well_known_folder_id
            raise RuntimeWarning(f"Failed to list folders.{folders_resp.json()}")
        folders: list = folders_resp.json()["value"]
        matched_folders = [
@@ -101,14 +101,23 @@ class Test(unittest.TestCase):
    def testExtractReportXML(self):
        """Test extract report function for XML input"""
        print()
-        file = "samples/extract_report/nice-input.xml"
-        print("Testing {0}: ".format(file), end="")
-        xmlout = parsedmarc.extract_report_from_file_path(file)
-        with open("samples/extract_report/nice-input.xml") as f:
-            xmlin = f.read()
+        report_path = "samples/extract_report/nice-input.xml"
+        print("Testing {0}: ".format(report_path), end="")
+        xmlout = parsedmarc.extract_report_from_file_path(report_path)
+        xmlin_file = open("samples/extract_report/nice-input.xml")
+        xmlin = xmlin_file.read()
+        xmlin_file.close()
        self.assertTrue(compare_xml(xmlout, xmlin))
        print("Passed!")

+    def testExtractReportXMLFromPath(self):
+        """Test extract report function for pathlib.Path input"""
+        report_path = Path("samples/extract_report/nice-input.xml")
+        xmlout = parsedmarc.extract_report_from_file_path(report_path)
+        with open("samples/extract_report/nice-input.xml") as xmlin_file:
+            xmlin = xmlin_file.read()
+        self.assertTrue(compare_xml(xmlout, xmlin))
+
    def testExtractReportGZip(self):
        """Test extract report function for gzip input"""
        print()
@@ -134,6 +143,28 @@ class Test(unittest.TestCase):
        self.assertFalse(compare_xml(xmlout, xmlin))
        print("Passed!")

+    def testParseReportFileAcceptsPathForXML(self):
+        report_path = Path(
+            "samples/aggregate/protection.outlook.com!example.com!1711756800!1711843200.xml"
+        )
+        result = parsedmarc.parse_report_file(
+            report_path,
+            offline=True,
+        )
+        self.assertEqual(result["report_type"], "aggregate")
+        self.assertEqual(result["report"]["report_metadata"]["org_name"], "outlook.com")
+
+    def testParseReportFileAcceptsPathForEmail(self):
+        report_path = Path(
+            "samples/aggregate/Report domain- borschow.com Submitter- google.com Report-ID- 949348866075514174.eml"
+        )
+        result = parsedmarc.parse_report_file(
+            report_path,
+            offline=True,
+        )
+        self.assertEqual(result["report_type"], "aggregate")
+        self.assertEqual(result["report"]["report_metadata"]["org_name"], "google.com")
+
    def testAggregateSamples(self):
        """Test sample aggregate/rua DMARC reports"""
        print()
@@ -2243,6 +2274,31 @@ class TestGraphConnection(unittest.TestCase):
        with self.assertRaises(RuntimeError):
            connection._get_all_messages("/url", batch_size=0, since=None)

+    def testGetAllMessagesRetriesTransientRequestErrors(self):
+        connection = MSGraphConnection.__new__(MSGraphConnection)
+        connection._client = MagicMock()
+        connection._client.get.side_effect = [
+            graph_module.RequestException("connection reset"),
+            _FakeGraphResponse(200, {"value": [{"id": "1"}]}),
+        ]
+        with patch.object(graph_module, "sleep") as mocked_sleep:
+            messages = connection._get_all_messages("/url", batch_size=0, since=None)
+        self.assertEqual([msg["id"] for msg in messages], ["1"])
+        mocked_sleep.assert_called_once_with(graph_module.GRAPH_REQUEST_RETRY_DELAY_SECONDS)
+
+    def testGetAllMessagesRaisesAfterRetryExhaustion(self):
+        connection = MSGraphConnection.__new__(MSGraphConnection)
+        connection._client = MagicMock()
+        connection._client.get.side_effect = graph_module.RequestException(
+            "connection reset"
+        )
+        with patch.object(graph_module, "sleep") as mocked_sleep:
+            with self.assertRaises(graph_module.RequestException):
+                connection._get_all_messages("/url", batch_size=0, since=None)
+        self.assertEqual(
+            mocked_sleep.call_count, graph_module.GRAPH_REQUEST_RETRY_ATTEMPTS - 1
+        )
+
    def testGetAllMessagesNextPageFailure(self):
        connection = MSGraphConnection.__new__(MSGraphConnection)
        first_response = _FakeGraphResponse(
@@ -2376,6 +2432,49 @@ class TestGraphConnection(unittest.TestCase):
            client_id="cid", tenant_id="tenant", client_secret="secret"
        )

+    def testGenerateCredentialCertificate(self):
+        fake_credential = object()
+        with patch.object(
+            graph_module, "CertificateCredential", return_value=fake_credential
+        ) as mocked:
+            result = _generate_credential(
+                graph_module.AuthMethod.Certificate.name,
+                Path("/tmp/token"),
+                client_id="cid",
+                client_secret="secret",
+                certificate_path="/tmp/cert.pem",
+                certificate_password="secret-pass",
+                username="user",
+                password="pass",
+                tenant_id="tenant",
+                allow_unencrypted_storage=False,
+            )
+        self.assertIs(result, fake_credential)
+        mocked.assert_called_once_with(
+            client_id="cid",
+            tenant_id="tenant",
+            certificate_path="/tmp/cert.pem",
+            password="secret-pass",
+        )
+
+    def testGenerateCredentialCertificateRequiresPath(self):
+        with self.assertRaisesRegex(
+            ValueError,
+            "certificate_path is required when auth_method is 'Certificate'",
+        ):
+            _generate_credential(
+                graph_module.AuthMethod.Certificate.name,
+                Path("/tmp/token"),
+                client_id="cid",
+                client_secret=None,
+                certificate_path=None,
+                certificate_password="secret-pass",
+                username=None,
+                password=None,
+                tenant_id="tenant",
+                allow_unencrypted_storage=False,
+            )
+
    def testInitUsesSharedMailboxScopes(self):
        class FakeCredential:
            def __init__(self):
@@ -2408,6 +2507,63 @@ class TestGraphConnection(unittest.TestCase):
            graph_client.call_args.kwargs.get("scopes"), ["Mail.ReadWrite.Shared"]
        )

+    def testInitWithoutUsernameUsesDefaultMailReadWriteScope(self):
+        class FakeCredential:
+            def __init__(self):
+                self.authenticate = MagicMock(return_value="auth-record")
+
+        fake_credential = FakeCredential()
+        with patch.object(
+            graph_module, "_generate_credential", return_value=fake_credential
+        ):
+            with patch.object(graph_module, "_cache_auth_record") as cache_auth:
+                with patch.object(graph_module, "GraphClient") as graph_client:
+                    MSGraphConnection(
+                        auth_method=graph_module.AuthMethod.DeviceCode.name,
+                        mailbox="owner@example.com",
+                        graph_url="https://graph.microsoft.com",
+                        client_id="cid",
+                        client_secret="secret",
+                        username=None,
+                        password=None,
+                        tenant_id="tenant",
+                        token_file="/tmp/token-file",
+                        allow_unencrypted_storage=True,
+                    )
+        fake_credential.authenticate.assert_called_once_with(scopes=["Mail.ReadWrite"])
+        cache_auth.assert_called_once()
+        graph_client.assert_called_once()
+        self.assertEqual(graph_client.call_args.kwargs.get("scopes"), ["Mail.ReadWrite"])
+
+    def testInitCertificateAuthSkipsInteractiveAuthenticate(self):
+        class DummyCertificateCredential:
+            pass
+
+        fake_credential = DummyCertificateCredential()
+        with patch.object(graph_module, "CertificateCredential", DummyCertificateCredential):
+            with patch.object(
+                graph_module, "_generate_credential", return_value=fake_credential
+            ):
+                with patch.object(graph_module, "_cache_auth_record") as cache_auth:
+                    with patch.object(graph_module, "GraphClient") as graph_client:
+                        MSGraphConnection(
+                            auth_method=graph_module.AuthMethod.Certificate.name,
+                            mailbox="shared@example.com",
+                            graph_url="https://graph.microsoft.com",
+                            client_id="cid",
+                            client_secret=None,
+                            certificate_path="/tmp/cert.pem",
+                            certificate_password="secret-pass",
+                            username=None,
+                            password=None,
+                            tenant_id="tenant",
+                            token_file="/tmp/token-file",
+                            allow_unencrypted_storage=False,
+                        )
+        cache_auth.assert_not_called()
+        graph_client.assert_called_once()
+        self.assertNotIn("scopes", graph_client.call_args.kwargs)
+
    def testCreateFolderAndMoveErrors(self):
        connection = MSGraphConnection.__new__(MSGraphConnection)
        connection.mailbox_name = "mailbox@example.com"
@@ -2783,5 +2939,199 @@ since = 2d
        self.assertEqual(mock_watch_inbox.call_args.kwargs.get("since"), "2d")


+class _DummyMailboxConnection:
+    def __init__(self):
+        self.fetch_calls = []
+
+    def create_folder(self, folder_name):
+        return None
+
+    def fetch_messages(self, reports_folder, **kwargs):
+        self.fetch_calls.append({"reports_folder": reports_folder, **kwargs})
+        return []
+
+    def fetch_message(self, message_id, **kwargs):
+        return ""
+
+    def delete_message(self, message_id):
+        return None
+
+    def move_message(self, message_id, folder_name):
+        return None
+
+    def keepalive(self):
+        return None
+
+    def watch(self, check_callback, check_timeout):
+        return None
+
+
+class TestMailboxPerformance(unittest.TestCase):
+    def testBatchModeAvoidsExtraFullFetch(self):
+        connection = _DummyMailboxConnection()
+        parsedmarc.get_dmarc_reports_from_mailbox(
+            connection=connection,
+            reports_folder="INBOX",
+            test=True,
+            batch_size=10,
+            create_folders=False,
+        )
+        self.assertEqual(len(connection.fetch_calls), 1)
+    @patch("parsedmarc.cli.get_dmarc_reports_from_mailbox")
+    @patch("parsedmarc.cli.MSGraphConnection")
+    def testCliPassesMsGraphCertificateAuthSettings(
+        self, mock_graph_connection, mock_get_mailbox_reports
+    ):
+        mock_graph_connection.return_value = object()
+        mock_get_mailbox_reports.return_value = {
+            "aggregate_reports": [],
+            "forensic_reports": [],
+            "smtp_tls_reports": [],
+        }
+
+        config_text = """[general]
+silent = true
+
+[msgraph]
+auth_method = Certificate
+client_id = client-id
+tenant_id = tenant-id
+mailbox = shared@example.com
+certificate_path = /tmp/msgraph-cert.pem
+certificate_password = cert-pass
+"""
+
+        with tempfile.NamedTemporaryFile("w", suffix=".ini", delete=False) as cfg:
+            cfg.write(config_text)
+            cfg_path = cfg.name
+        self.addCleanup(lambda: os.path.exists(cfg_path) and os.remove(cfg_path))
+
+        with patch.object(sys, "argv", ["parsedmarc", "-c", cfg_path]):
+            parsedmarc.cli._main()
+
+        self.assertEqual(
+            mock_graph_connection.call_args.kwargs.get("auth_method"), "Certificate"
+        )
+        self.assertEqual(
+            mock_graph_connection.call_args.kwargs.get("certificate_path"),
+            "/tmp/msgraph-cert.pem",
+        )
+        self.assertEqual(
+            mock_graph_connection.call_args.kwargs.get("certificate_password"),
+            "cert-pass",
+        )
+
+    @patch("parsedmarc.cli.get_dmarc_reports_from_mailbox")
+    @patch("parsedmarc.cli.MSGraphConnection")
+    @patch("parsedmarc.cli.logger")
+    def testCliRequiresMsGraphCertificatePath(
+        self, mock_logger, mock_graph_connection, mock_get_mailbox_reports
+    ):
+        config_text = """[general]
+silent = true
+
+[msgraph]
+auth_method = Certificate
+client_id = client-id
+tenant_id = tenant-id
+mailbox = shared@example.com
+"""
+
+        with tempfile.NamedTemporaryFile("w", suffix=".ini", delete=False) as cfg:
+            cfg.write(config_text)
+            cfg_path = cfg.name
+        self.addCleanup(lambda: os.path.exists(cfg_path) and os.remove(cfg_path))
+
+        with patch.object(sys, "argv", ["parsedmarc", "-c", cfg_path]):
+            with self.assertRaises(SystemExit) as system_exit:
+                parsedmarc.cli._main()
+
+        self.assertEqual(system_exit.exception.code, -1)
+        mock_logger.critical.assert_called_once_with(
+            "certificate_path setting missing from the msgraph config section"
+        )
+        mock_graph_connection.assert_not_called()
+        mock_get_mailbox_reports.assert_not_called()
+
+class _FakeGraphClient:
+    def get(self, url, params=None):
+        if "/mailFolders/inbox?$select=id,displayName" in url:
+            return _FakeGraphResponse(200, {"id": "inbox-id", "displayName": "Inbox"})
+
+        if "/mailFolders?$filter=displayName eq 'Inbox'" in url:
+            return _FakeGraphResponse(
+                404,
+                {
+                    "error": {
+                        "code": "ErrorItemNotFound",
+                        "message": "Default folder Root not found.",
+                    }
+                },
+            )
+
+        if "/mailFolders?$filter=displayName eq 'Custom'" in url:
+            return _FakeGraphResponse(
+                404,
+                {
+                    "error": {
+                        "code": "ErrorItemNotFound",
+                        "message": "Default folder Root not found.",
+                    }
+                },
+            )
+
+        return _FakeGraphResponse(404, {"error": {"code": "NotFound"}})
+
+
+class TestMSGraphFolderFallback(unittest.TestCase):
+    def testWellKnownFolderFallback(self):
+        connection = MSGraphConnection.__new__(MSGraphConnection)
+        connection.mailbox_name = "shared@example.com"
+        connection._client = _FakeGraphClient()
+        connection._request_with_retries = MagicMock(
+            side_effect=lambda method_name, *args, **kwargs: getattr(
+                connection._client, method_name
+            )(
+                *args, **kwargs
+            )
+        )
+
+        folder_id = connection._find_folder_id_with_parent("Inbox", None)
+        self.assertEqual(folder_id, "inbox-id")
+        connection._request_with_retries.assert_any_call(
+            "get", "/users/shared@example.com/mailFolders?$filter=displayName eq 'Inbox'"
+        )
+        connection._request_with_retries.assert_any_call(
+            "get", "/users/shared@example.com/mailFolders/inbox?$select=id,displayName"
+        )
+
+    def testUnknownFolderStillFails(self):
+        connection = MSGraphConnection.__new__(MSGraphConnection)
+        connection.mailbox_name = "shared@example.com"
+        connection._client = _FakeGraphClient()
+        connection._request_with_retries = MagicMock(
+            side_effect=lambda method_name, *args, **kwargs: getattr(
+                connection._client, method_name
+            )(
+                *args, **kwargs
+            )
+        )
+
+        with self.assertRaises(RuntimeWarning):
+            connection._find_folder_id_from_folder_path("Custom")
+
+    def testSingleSegmentPathAvoidsExtraWellKnownLookupWhenListingSucceeds(self):
+        connection = MSGraphConnection.__new__(MSGraphConnection)
+        connection.mailbox_name = "shared@example.com"
+        connection._find_folder_id_with_parent = MagicMock(return_value="custom-id")
+        connection._get_well_known_folder_id = MagicMock(return_value="inbox-id")
+
+        folder_id = connection._find_folder_id_from_folder_path("Inbox")
+
+        self.assertEqual(folder_id, "custom-id")
+        connection._find_folder_id_with_parent.assert_called_once_with("Inbox", None)
+        connection._get_well_known_folder_id.assert_not_called()
+
+
 if __name__ == "__main__":
    unittest.main(verbosity=2)