diff --git a/.github/ISSUE_TEMPLATE/bug_report.yml b/.github/ISSUE_TEMPLATE/bug_report.yml new file mode 100644 index 0000000..53044de --- /dev/null +++ b/.github/ISSUE_TEMPLATE/bug_report.yml @@ -0,0 +1,72 @@ +name: Bug report +description: Report a reproducible parsedmarc bug +title: "[Bug]: " +labels: + - bug +body: + - type: input + id: version + attributes: + label: parsedmarc version + description: Include the parsedmarc version or commit if known. + placeholder: 9.x.x + validations: + required: true + - type: dropdown + id: input_backend + attributes: + label: Input backend + description: Which input path or mailbox backend is involved? + options: + - IMAP + - MS Graph + - Gmail API + - Maildir + - mbox + - Local file / direct parse + - Other + validations: + required: true + - type: textarea + id: environment + attributes: + label: Environment + description: Runtime, container image, OS, Python version, or deployment details. + placeholder: Docker on Debian, Python 3.12, parsedmarc installed from PyPI + validations: + required: true + - type: textarea + id: config + attributes: + label: Sanitized config + description: Include the relevant config fragment with secrets removed. + render: ini + - type: textarea + id: steps + attributes: + label: Steps to reproduce + description: Describe the smallest reproducible sequence you can. + placeholder: | + 1. Configure parsedmarc with ... + 2. Run ... + 3. Observe ... + validations: + required: true + - type: textarea + id: expected_actual + attributes: + label: Expected vs actual behavior + description: What did you expect, and what happened instead? + validations: + required: true + - type: textarea + id: logs + attributes: + label: Logs or traceback + description: Paste sanitized logs or a traceback if available. + render: text + - type: textarea + id: samples + attributes: + label: Sample report availability + description: If you can share a sanitized sample report or message, note that here. diff --git a/.github/ISSUE_TEMPLATE/config.yml b/.github/ISSUE_TEMPLATE/config.yml new file mode 100644 index 0000000..2b5cc15 --- /dev/null +++ b/.github/ISSUE_TEMPLATE/config.yml @@ -0,0 +1,5 @@ +blank_issues_enabled: true +contact_links: + - name: Security issue + url: https://github.com/domainaware/parsedmarc/security/policy + about: Please use the security policy and avoid filing public issues for undisclosed vulnerabilities. diff --git a/.github/ISSUE_TEMPLATE/feature_request.yml b/.github/ISSUE_TEMPLATE/feature_request.yml new file mode 100644 index 0000000..1057a1a --- /dev/null +++ b/.github/ISSUE_TEMPLATE/feature_request.yml @@ -0,0 +1,30 @@ +name: Feature request +description: Suggest a new feature or behavior change +title: "[Feature]: " +labels: + - enhancement +body: + - type: textarea + id: problem + attributes: + label: Problem statement + description: What workflow or limitation are you trying to solve? + validations: + required: true + - type: textarea + id: proposal + attributes: + label: Proposed behavior + description: Describe the feature or behavior you want. + validations: + required: true + - type: textarea + id: alternatives + attributes: + label: Alternatives considered + description: Describe workarounds or alternative approaches you considered. + - type: textarea + id: impact + attributes: + label: Compatibility or operational impact + description: Note config, output, performance, or deployment implications if relevant. diff --git a/.github/pull_request_template.md b/.github/pull_request_template.md new file mode 100644 index 0000000..fa819d3 --- /dev/null +++ b/.github/pull_request_template.md @@ -0,0 +1,24 @@ +## Summary + +- + +## Why + +- + +## Testing + +- + +## Backward Compatibility / Risk + +- + +## Related Issue + +- Closes # + +## Checklist + +- [ ] Tests added or updated if behavior changed +- [ ] Docs updated if config or user-facing behavior changed diff --git a/CHANGELOG.md b/CHANGELOG.md index fc64082..a945820 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -36,6 +36,23 @@ Forensic reports have been renamed to failure reports throughout the project to - **Kibana**: Index pattern uses `dmarc_f*` to match both `dmarc_forensic*` and `dmarc_failure*` - **Splunk**: Base search queries `(sourcetype="dmarc:failure" OR sourcetype="dmarc:forensic")` - **Elasticsearch/OpenSearch**: Duplicate-check searches query across both `dmarc_failure*` and `dmarc_forensic*` index patterns +## 9.2.0 + +### Added + +- OpenSearch AWS SigV4 authentication support (PR #673) +- IMAP move/delete compatibility fallbacks (PR #671) +- `fail_on_output_error` CLI option for sink failures (PR #672) +- Gmail service account auth mode for non-interactive runs (PR #676) +- Microsoft Graph certificate authentication support (PRs #692 and #693) +- Microsoft Graph well-known folder fallback for root listing failures (PR #618 and #684 close #609) + +### Fixed + +- Pass mailbox since filter through `watch_inbox` callback (PR #670 closes issue #581) +- `parsedmarc.mail.gmail.GmailConnection.delete_message` now properly calls the Gmail API (PR #668) +- Avoid extra mailbox fetch in batch and test mode (PR #691 closes #533) + ## 9.1.2 ### Fixes diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md new file mode 100644 index 0000000..e5db8cc --- /dev/null +++ b/CONTRIBUTING.md @@ -0,0 +1,78 @@ +# Contributing + +Thanks for contributing to parsedmarc. + +## Local setup + +Use a virtual environment for local development. + +```bash +python3 -m venv .venv +. .venv/bin/activate +python -m pip install --upgrade pip +pip install .[build] +``` + +## Before opening a pull request + +Run the checks that match your change: + +```bash +ruff check . +pytest --cov --cov-report=xml tests.py +``` + +If you changed documentation: + +```bash +cd docs +make html +``` + +If you changed CLI behavior or parsing logic, it is also useful to exercise the +sample reports: + +```bash +parsedmarc --debug -c ci.ini samples/aggregate/* +parsedmarc --debug -c ci.ini samples/forensic/* +``` + +To skip DNS lookups during tests, set: + +```bash +GITHUB_ACTIONS=true +``` + +## Pull request guidelines + +- Keep pull requests small and focused. Separate bug fixes, docs updates, and + repo-maintenance changes where practical. +- Add or update tests when behavior changes. +- Update docs when configuration or user-facing behavior changes. +- Include a short summary, the reason for the change, and the testing you ran. +- Link the related issue when there is one. + +## Branch maintenance + +Upstream `master` may move quickly. Before asking for review or after another PR +lands, rebase your branch onto the current upstream branch and force-push with +lease if needed: + +```bash +git fetch upstream +git rebase upstream/master +git push --force-with-lease +``` + +## CI and coverage + +GitHub Actions is the source of truth for linting, docs, and test status. + +Codecov patch coverage is usually the most relevant signal for small PRs. Project +coverage can be noisier when the base comparison is stale, so interpret it in +the context of the actual diff. + +## Questions + +Use GitHub issues for bugs and feature requests. If you are not sure whether a +change is wanted, opening an issue first is usually the safest path. diff --git a/SECURITY.md b/SECURITY.md new file mode 100644 index 0000000..a38f850 --- /dev/null +++ b/SECURITY.md @@ -0,0 +1,29 @@ +# Security Policy + +## Reporting a vulnerability + +Please do not open a public GitHub issue for an undisclosed security +vulnerability. Use GitHub private vulnerability reporting in the Security tab of this project instead. + +When reporting a vulnerability, include: + +- the affected parsedmarc version or commit +- the component or integration involved +- clear reproduction details if available +- potential impact +- any suggested mitigation or workaround + +## Supported versions + +Security fixes will be applied to the latest released version and +the current `master` branch. + +Older versions will not receive backported fixes. + +## Disclosure process + +After a report is received, maintainers can validate the issue, assess impact, +and coordinate a fix before public disclosure. + +Please avoid publishing proof-of-concept details until maintainers have had a +reasonable opportunity to investigate and release a fix or mitigation. diff --git a/docs/source/usage.md b/docs/source/usage.md index 426d98f..174d765 100644 --- a/docs/source/usage.md +++ b/docs/source/usage.md @@ -203,7 +203,7 @@ The full set of configuration options are: - `password` - str: The IMAP password - `msgraph` - `auth_method` - str: Authentication method, valid types are - `UsernamePassword`, `DeviceCode`, or `ClientSecret` + `UsernamePassword`, `DeviceCode`, `ClientSecret`, or `Certificate` (Default: `UsernamePassword`). - `user` - str: The M365 user, required when the auth method is UsernamePassword @@ -211,6 +211,11 @@ The full set of configuration options are: method is UsernamePassword - `client_id` - str: The app registration's client ID - `client_secret` - str: The app registration's secret + - `certificate_path` - str: Path to a PEM or PKCS12 certificate + including the private key. Required when the auth method is + `Certificate` + - `certificate_password` - str: Optional password for the + certificate file when using `Certificate` auth - `tenant_id` - str: The Azure AD tenant ID. This is required for all auth methods except UsernamePassword. - `mailbox` - str: The mailbox name. This defaults to the @@ -248,6 +253,9 @@ The full set of configuration options are: -Description "Restrict access to dmarc reports mailbox." ``` + The same application permission and mailbox scoping guidance + applies to the `Certificate` auth method. + ::: - `elasticsearch` - `hosts` - str: A comma separated list of hostnames and ports diff --git a/parsedmarc/__init__.py b/parsedmarc/__init__.py index afbefca..c2af098 100644 --- a/parsedmarc/__init__.py +++ b/parsedmarc/__init__.py @@ -996,10 +996,12 @@ def extract_report(content: Union[bytes, str, BinaryIO]) -> str: return report -def extract_report_from_file_path(file_path: str): +def extract_report_from_file_path( + file_path: Union[str, bytes, os.PathLike[str], os.PathLike[bytes]], +) -> str: """Extracts report from a file at the given file_path""" try: - with open(file_path, "rb") as report_file: + with open(os.fspath(file_path), "rb") as report_file: return extract_report(report_file.read()) except FileNotFoundError: raise ParserError("File was not found") @@ -1703,7 +1705,7 @@ def parse_report_email( def parse_report_file( - input_: Union[bytes, str, BinaryIO], + input_: Union[bytes, str, os.PathLike[str], os.PathLike[bytes], BinaryIO], *, nameservers: Optional[list[str]] = None, dns_timeout: float = 2.0, @@ -1720,7 +1722,8 @@ def parse_report_file( file-like object. or bytes Args: - input_ (str | bytes | BinaryIO): A path to a file, a file like object, or bytes + input_ (str | os.PathLike | bytes | BinaryIO): A path to a file, + a file-like object, or bytes nameservers (list): A list of one or more nameservers to use (Cloudflare's public DNS resolvers by default) dns_timeout (float): Sets the DNS timeout in seconds @@ -1737,9 +1740,10 @@ def parse_report_file( dict: The parsed DMARC report """ file_object: BinaryIO - if isinstance(input_, str): - logger.debug("Parsing {0}".format(input_)) - file_object = open(input_, "rb") + if isinstance(input_, (str, os.PathLike)): + file_path = os.fspath(input_) + logger.debug("Parsing {0}".format(file_path)) + file_object = open(file_path, "rb") elif isinstance(input_, (bytes, bytearray, memoryview)): file_object = BytesIO(bytes(input_)) else: @@ -2180,14 +2184,17 @@ def get_dmarc_reports_from_mailbox( "smtp_tls_reports": smtp_tls_reports, } - if current_time: - total_messages = len( - connection.fetch_messages(reports_folder, since=current_time) - ) + if not test and not batch_size: + if current_time: + total_messages = len( + connection.fetch_messages(reports_folder, since=current_time) + ) + else: + total_messages = len(connection.fetch_messages(reports_folder)) else: - total_messages = len(connection.fetch_messages(reports_folder)) + total_messages = 0 - if not test and not batch_size and total_messages > 0: + if total_messages > 0: # Process emails that came in during the last run results = get_dmarc_reports_from_mailbox( connection=connection, diff --git a/parsedmarc/cli.py b/parsedmarc/cli.py index 77a8a3b..e2f2a9b 100644 --- a/parsedmarc/cli.py +++ b/parsedmarc/cli.py @@ -644,6 +644,8 @@ def _main(): graph_password=None, graph_client_id=None, graph_client_secret=None, + graph_certificate_path=None, + graph_certificate_password=None, graph_tenant_id=None, graph_mailbox=None, graph_allow_unencrypted_storage=False, @@ -1018,6 +1020,19 @@ def _main(): ) exit(-1) + if opts.graph_auth_method == AuthMethod.Certificate.name: + if "certificate_path" in graph_config: + opts.graph_certificate_path = graph_config["certificate_path"] + else: + logger.critical( + "certificate_path setting missing from the msgraph config section" + ) + exit(-1) + if "certificate_password" in graph_config: + opts.graph_certificate_password = graph_config[ + "certificate_password" + ] + if "client_id" in graph_config: opts.graph_client_id = graph_config["client_id"] else: @@ -1760,6 +1775,8 @@ def _main(): tenant_id=opts.graph_tenant_id, client_id=opts.graph_client_id, client_secret=opts.graph_client_secret, + certificate_path=opts.graph_certificate_path, + certificate_password=opts.graph_certificate_password, username=opts.graph_user, password=opts.graph_password, token_file=opts.graph_token_file, diff --git a/parsedmarc/mail/graph.py b/parsedmarc/mail/graph.py index e87ac7a..05154f7 100644 --- a/parsedmarc/mail/graph.py +++ b/parsedmarc/mail/graph.py @@ -12,19 +12,25 @@ from azure.identity import ( UsernamePasswordCredential, DeviceCodeCredential, ClientSecretCredential, + CertificateCredential, TokenCachePersistenceOptions, AuthenticationRecord, ) from msgraph.core import GraphClient +from requests.exceptions import RequestException from parsedmarc.log import logger from parsedmarc.mail.mailbox_connection import MailboxConnection +GRAPH_REQUEST_RETRY_ATTEMPTS = 3 +GRAPH_REQUEST_RETRY_DELAY_SECONDS = 5 + class AuthMethod(Enum): DeviceCode = 1 UsernamePassword = 2 ClientSecret = 3 + Certificate = 4 def _get_cache_args(token_path: Path, allow_unencrypted_storage): @@ -83,30 +89,55 @@ def _generate_credential(auth_method: str, token_path: Path, **kwargs): tenant_id=kwargs["tenant_id"], client_secret=kwargs["client_secret"], ) + elif auth_method == AuthMethod.Certificate.name: + cert_path = kwargs.get("certificate_path") + if not cert_path: + raise ValueError( + "certificate_path is required when auth_method is 'Certificate'" + ) + credential = CertificateCredential( + client_id=kwargs["client_id"], + tenant_id=kwargs["tenant_id"], + certificate_path=cert_path, + password=kwargs.get("certificate_password"), + ) else: raise RuntimeError(f"Auth method {auth_method} not found") return credential class MSGraphConnection(MailboxConnection): + _WELL_KNOWN_FOLDERS = { + "inbox": "inbox", + "archive": "archive", + "drafts": "drafts", + "sentitems": "sentitems", + "deleteditems": "deleteditems", + "junkemail": "junkemail", + } + def __init__( self, auth_method: str, mailbox: str, graph_url: str, client_id: str, - client_secret: str, - username: str, - password: str, + client_secret: Optional[str], + username: Optional[str], + password: Optional[str], tenant_id: str, token_file: str, allow_unencrypted_storage: bool, + certificate_path: Optional[str] = None, + certificate_password: Optional[Union[str, bytes]] = None, ): token_path = Path(token_file) credential = _generate_credential( auth_method, client_id=client_id, client_secret=client_secret, + certificate_path=certificate_path, + certificate_password=certificate_password, username=username, password=password, tenant_id=tenant_id, @@ -117,10 +148,10 @@ class MSGraphConnection(MailboxConnection): "credential": credential, "cloud": graph_url, } - if not isinstance(credential, ClientSecretCredential): + if not isinstance(credential, (ClientSecretCredential, CertificateCredential)): scopes = ["Mail.ReadWrite"] # Detect if mailbox is shared - if mailbox and username != mailbox: + if mailbox and username and username != mailbox: scopes = ["Mail.ReadWrite.Shared"] auth_record = credential.authenticate(scopes=scopes) _cache_auth_record(auth_record, token_path) @@ -129,6 +160,23 @@ class MSGraphConnection(MailboxConnection): self._client = GraphClient(**client_params) self.mailbox_name = mailbox + def _request_with_retries(self, method_name: str, *args, **kwargs): + for attempt in range(1, GRAPH_REQUEST_RETRY_ATTEMPTS + 1): + try: + return getattr(self._client, method_name)(*args, **kwargs) + except RequestException as error: + if attempt == GRAPH_REQUEST_RETRY_ATTEMPTS: + raise + logger.warning( + "Transient MS Graph %s error on attempt %s/%s: %s", + method_name.upper(), + attempt, + GRAPH_REQUEST_RETRY_ATTEMPTS, + error, + ) + sleep(GRAPH_REQUEST_RETRY_DELAY_SECONDS) + raise RuntimeError("no retry attempts configured") + def create_folder(self, folder_name: str): sub_url = "" path_parts = folder_name.split("/") @@ -143,7 +191,7 @@ class MSGraphConnection(MailboxConnection): request_body = {"displayName": folder_name} request_url = f"/users/{self.mailbox_name}/mailFolders{sub_url}" - resp = self._client.post(request_url, json=request_body) + resp = self._request_with_retries("post", request_url, json=request_body) if resp.status_code == 409: logger.debug(f"Folder {folder_name} already exists, skipping creation") elif resp.status_code == 201: @@ -173,7 +221,7 @@ class MSGraphConnection(MailboxConnection): params["$top"] = batch_size else: params["$top"] = 100 - result = self._client.get(url, params=params) + result = self._request_with_retries("get", url, params=params) if result.status_code != 200: raise RuntimeError(f"Failed to fetch messages {result.text}") messages = result.json()["value"] @@ -181,7 +229,7 @@ class MSGraphConnection(MailboxConnection): while "@odata.nextLink" in result.json() and ( since is not None or (batch_size == 0 or batch_size - len(messages) > 0) ): - result = self._client.get(result.json()["@odata.nextLink"]) + result = self._request_with_retries("get", result.json()["@odata.nextLink"]) if result.status_code != 200: raise RuntimeError(f"Failed to fetch messages {result.text}") messages.extend(result.json()["value"]) @@ -190,7 +238,7 @@ class MSGraphConnection(MailboxConnection): def mark_message_read(self, message_id: str): """Marks a message as read""" url = f"/users/{self.mailbox_name}/messages/{message_id}" - resp = self._client.patch(url, json={"isRead": "true"}) + resp = self._request_with_retries("patch", url, json={"isRead": "true"}) if resp.status_code != 200: raise RuntimeWarning( f"Failed to mark message read{resp.status_code}: {resp.json()}" @@ -198,7 +246,7 @@ class MSGraphConnection(MailboxConnection): def fetch_message(self, message_id: str, **kwargs): url = f"/users/{self.mailbox_name}/messages/{message_id}/$value" - result = self._client.get(url) + result = self._request_with_retries("get", url) if result.status_code != 200: raise RuntimeWarning( f"Failed to fetch message{result.status_code}: {result.json()}" @@ -210,7 +258,7 @@ class MSGraphConnection(MailboxConnection): def delete_message(self, message_id: str): url = f"/users/{self.mailbox_name}/messages/{message_id}" - resp = self._client.delete(url) + resp = self._request_with_retries("delete", url) if resp.status_code != 204: raise RuntimeWarning( f"Failed to delete message {resp.status_code}: {resp.json()}" @@ -220,7 +268,7 @@ class MSGraphConnection(MailboxConnection): folder_id = self._find_folder_id_from_folder_path(folder_name) request_body = {"destinationId": folder_id} url = f"/users/{self.mailbox_name}/messages/{message_id}/move" - resp = self._client.post(url, json=request_body) + resp = self._request_with_retries("post", url, json=request_body) if resp.status_code != 201: raise RuntimeWarning( f"Failed to move message {resp.status_code}: {resp.json()}" @@ -248,6 +296,19 @@ class MSGraphConnection(MailboxConnection): else: return self._find_folder_id_with_parent(folder_name, None) + def _get_well_known_folder_id(self, folder_name: str) -> Optional[str]: + folder_key = folder_name.lower().replace(" ", "").replace("-", "") + alias = self._WELL_KNOWN_FOLDERS.get(folder_key) + if alias is None: + return None + + url = f"/users/{self.mailbox_name}/mailFolders/{alias}?$select=id,displayName" + folder_resp = self._request_with_retries("get", url) + if folder_resp.status_code != 200: + return None + payload = folder_resp.json() + return payload.get("id") + def _find_folder_id_with_parent( self, folder_name: str, parent_folder_id: Optional[str] ): @@ -256,8 +317,12 @@ class MSGraphConnection(MailboxConnection): sub_url = f"/{parent_folder_id}/childFolders" url = f"/users/{self.mailbox_name}/mailFolders{sub_url}" filter = f"?$filter=displayName eq '{folder_name}'" - folders_resp = self._client.get(url + filter) + folders_resp = self._request_with_retries("get", url + filter) if folders_resp.status_code != 200: + if parent_folder_id is None: + well_known_folder_id = self._get_well_known_folder_id(folder_name) + if well_known_folder_id: + return well_known_folder_id raise RuntimeWarning(f"Failed to list folders.{folders_resp.json()}") folders: list = folders_resp.json()["value"] matched_folders = [ diff --git a/tests.py b/tests.py index a5d8b59..5d32644 100755 --- a/tests.py +++ b/tests.py @@ -101,14 +101,23 @@ class Test(unittest.TestCase): def testExtractReportXML(self): """Test extract report function for XML input""" print() - file = "samples/extract_report/nice-input.xml" - print("Testing {0}: ".format(file), end="") - xmlout = parsedmarc.extract_report_from_file_path(file) - with open("samples/extract_report/nice-input.xml") as f: - xmlin = f.read() + report_path = "samples/extract_report/nice-input.xml" + print("Testing {0}: ".format(report_path), end="") + xmlout = parsedmarc.extract_report_from_file_path(report_path) + xmlin_file = open("samples/extract_report/nice-input.xml") + xmlin = xmlin_file.read() + xmlin_file.close() self.assertTrue(compare_xml(xmlout, xmlin)) print("Passed!") + def testExtractReportXMLFromPath(self): + """Test extract report function for pathlib.Path input""" + report_path = Path("samples/extract_report/nice-input.xml") + xmlout = parsedmarc.extract_report_from_file_path(report_path) + with open("samples/extract_report/nice-input.xml") as xmlin_file: + xmlin = xmlin_file.read() + self.assertTrue(compare_xml(xmlout, xmlin)) + def testExtractReportGZip(self): """Test extract report function for gzip input""" print() @@ -134,6 +143,28 @@ class Test(unittest.TestCase): self.assertFalse(compare_xml(xmlout, xmlin)) print("Passed!") + def testParseReportFileAcceptsPathForXML(self): + report_path = Path( + "samples/aggregate/protection.outlook.com!example.com!1711756800!1711843200.xml" + ) + result = parsedmarc.parse_report_file( + report_path, + offline=True, + ) + self.assertEqual(result["report_type"], "aggregate") + self.assertEqual(result["report"]["report_metadata"]["org_name"], "outlook.com") + + def testParseReportFileAcceptsPathForEmail(self): + report_path = Path( + "samples/aggregate/Report domain- borschow.com Submitter- google.com Report-ID- 949348866075514174.eml" + ) + result = parsedmarc.parse_report_file( + report_path, + offline=True, + ) + self.assertEqual(result["report_type"], "aggregate") + self.assertEqual(result["report"]["report_metadata"]["org_name"], "google.com") + def testAggregateSamples(self): """Test sample aggregate/rua DMARC reports""" print() @@ -2243,6 +2274,31 @@ class TestGraphConnection(unittest.TestCase): with self.assertRaises(RuntimeError): connection._get_all_messages("/url", batch_size=0, since=None) + def testGetAllMessagesRetriesTransientRequestErrors(self): + connection = MSGraphConnection.__new__(MSGraphConnection) + connection._client = MagicMock() + connection._client.get.side_effect = [ + graph_module.RequestException("connection reset"), + _FakeGraphResponse(200, {"value": [{"id": "1"}]}), + ] + with patch.object(graph_module, "sleep") as mocked_sleep: + messages = connection._get_all_messages("/url", batch_size=0, since=None) + self.assertEqual([msg["id"] for msg in messages], ["1"]) + mocked_sleep.assert_called_once_with(graph_module.GRAPH_REQUEST_RETRY_DELAY_SECONDS) + + def testGetAllMessagesRaisesAfterRetryExhaustion(self): + connection = MSGraphConnection.__new__(MSGraphConnection) + connection._client = MagicMock() + connection._client.get.side_effect = graph_module.RequestException( + "connection reset" + ) + with patch.object(graph_module, "sleep") as mocked_sleep: + with self.assertRaises(graph_module.RequestException): + connection._get_all_messages("/url", batch_size=0, since=None) + self.assertEqual( + mocked_sleep.call_count, graph_module.GRAPH_REQUEST_RETRY_ATTEMPTS - 1 + ) + def testGetAllMessagesNextPageFailure(self): connection = MSGraphConnection.__new__(MSGraphConnection) first_response = _FakeGraphResponse( @@ -2376,6 +2432,49 @@ class TestGraphConnection(unittest.TestCase): client_id="cid", tenant_id="tenant", client_secret="secret" ) + def testGenerateCredentialCertificate(self): + fake_credential = object() + with patch.object( + graph_module, "CertificateCredential", return_value=fake_credential + ) as mocked: + result = _generate_credential( + graph_module.AuthMethod.Certificate.name, + Path("/tmp/token"), + client_id="cid", + client_secret="secret", + certificate_path="/tmp/cert.pem", + certificate_password="secret-pass", + username="user", + password="pass", + tenant_id="tenant", + allow_unencrypted_storage=False, + ) + self.assertIs(result, fake_credential) + mocked.assert_called_once_with( + client_id="cid", + tenant_id="tenant", + certificate_path="/tmp/cert.pem", + password="secret-pass", + ) + + def testGenerateCredentialCertificateRequiresPath(self): + with self.assertRaisesRegex( + ValueError, + "certificate_path is required when auth_method is 'Certificate'", + ): + _generate_credential( + graph_module.AuthMethod.Certificate.name, + Path("/tmp/token"), + client_id="cid", + client_secret=None, + certificate_path=None, + certificate_password="secret-pass", + username=None, + password=None, + tenant_id="tenant", + allow_unencrypted_storage=False, + ) + def testInitUsesSharedMailboxScopes(self): class FakeCredential: def __init__(self): @@ -2408,6 +2507,63 @@ class TestGraphConnection(unittest.TestCase): graph_client.call_args.kwargs.get("scopes"), ["Mail.ReadWrite.Shared"] ) + def testInitWithoutUsernameUsesDefaultMailReadWriteScope(self): + class FakeCredential: + def __init__(self): + self.authenticate = MagicMock(return_value="auth-record") + + fake_credential = FakeCredential() + with patch.object( + graph_module, "_generate_credential", return_value=fake_credential + ): + with patch.object(graph_module, "_cache_auth_record") as cache_auth: + with patch.object(graph_module, "GraphClient") as graph_client: + MSGraphConnection( + auth_method=graph_module.AuthMethod.DeviceCode.name, + mailbox="owner@example.com", + graph_url="https://graph.microsoft.com", + client_id="cid", + client_secret="secret", + username=None, + password=None, + tenant_id="tenant", + token_file="/tmp/token-file", + allow_unencrypted_storage=True, + ) + fake_credential.authenticate.assert_called_once_with(scopes=["Mail.ReadWrite"]) + cache_auth.assert_called_once() + graph_client.assert_called_once() + self.assertEqual(graph_client.call_args.kwargs.get("scopes"), ["Mail.ReadWrite"]) + + def testInitCertificateAuthSkipsInteractiveAuthenticate(self): + class DummyCertificateCredential: + pass + + fake_credential = DummyCertificateCredential() + with patch.object(graph_module, "CertificateCredential", DummyCertificateCredential): + with patch.object( + graph_module, "_generate_credential", return_value=fake_credential + ): + with patch.object(graph_module, "_cache_auth_record") as cache_auth: + with patch.object(graph_module, "GraphClient") as graph_client: + MSGraphConnection( + auth_method=graph_module.AuthMethod.Certificate.name, + mailbox="shared@example.com", + graph_url="https://graph.microsoft.com", + client_id="cid", + client_secret=None, + certificate_path="/tmp/cert.pem", + certificate_password="secret-pass", + username=None, + password=None, + tenant_id="tenant", + token_file="/tmp/token-file", + allow_unencrypted_storage=False, + ) + cache_auth.assert_not_called() + graph_client.assert_called_once() + self.assertNotIn("scopes", graph_client.call_args.kwargs) + def testCreateFolderAndMoveErrors(self): connection = MSGraphConnection.__new__(MSGraphConnection) connection.mailbox_name = "mailbox@example.com" @@ -2783,5 +2939,199 @@ since = 2d self.assertEqual(mock_watch_inbox.call_args.kwargs.get("since"), "2d") +class _DummyMailboxConnection: + def __init__(self): + self.fetch_calls = [] + + def create_folder(self, folder_name): + return None + + def fetch_messages(self, reports_folder, **kwargs): + self.fetch_calls.append({"reports_folder": reports_folder, **kwargs}) + return [] + + def fetch_message(self, message_id, **kwargs): + return "" + + def delete_message(self, message_id): + return None + + def move_message(self, message_id, folder_name): + return None + + def keepalive(self): + return None + + def watch(self, check_callback, check_timeout): + return None + + +class TestMailboxPerformance(unittest.TestCase): + def testBatchModeAvoidsExtraFullFetch(self): + connection = _DummyMailboxConnection() + parsedmarc.get_dmarc_reports_from_mailbox( + connection=connection, + reports_folder="INBOX", + test=True, + batch_size=10, + create_folders=False, + ) + self.assertEqual(len(connection.fetch_calls), 1) + @patch("parsedmarc.cli.get_dmarc_reports_from_mailbox") + @patch("parsedmarc.cli.MSGraphConnection") + def testCliPassesMsGraphCertificateAuthSettings( + self, mock_graph_connection, mock_get_mailbox_reports + ): + mock_graph_connection.return_value = object() + mock_get_mailbox_reports.return_value = { + "aggregate_reports": [], + "forensic_reports": [], + "smtp_tls_reports": [], + } + + config_text = """[general] +silent = true + +[msgraph] +auth_method = Certificate +client_id = client-id +tenant_id = tenant-id +mailbox = shared@example.com +certificate_path = /tmp/msgraph-cert.pem +certificate_password = cert-pass +""" + + with tempfile.NamedTemporaryFile("w", suffix=".ini", delete=False) as cfg: + cfg.write(config_text) + cfg_path = cfg.name + self.addCleanup(lambda: os.path.exists(cfg_path) and os.remove(cfg_path)) + + with patch.object(sys, "argv", ["parsedmarc", "-c", cfg_path]): + parsedmarc.cli._main() + + self.assertEqual( + mock_graph_connection.call_args.kwargs.get("auth_method"), "Certificate" + ) + self.assertEqual( + mock_graph_connection.call_args.kwargs.get("certificate_path"), + "/tmp/msgraph-cert.pem", + ) + self.assertEqual( + mock_graph_connection.call_args.kwargs.get("certificate_password"), + "cert-pass", + ) + + @patch("parsedmarc.cli.get_dmarc_reports_from_mailbox") + @patch("parsedmarc.cli.MSGraphConnection") + @patch("parsedmarc.cli.logger") + def testCliRequiresMsGraphCertificatePath( + self, mock_logger, mock_graph_connection, mock_get_mailbox_reports + ): + config_text = """[general] +silent = true + +[msgraph] +auth_method = Certificate +client_id = client-id +tenant_id = tenant-id +mailbox = shared@example.com +""" + + with tempfile.NamedTemporaryFile("w", suffix=".ini", delete=False) as cfg: + cfg.write(config_text) + cfg_path = cfg.name + self.addCleanup(lambda: os.path.exists(cfg_path) and os.remove(cfg_path)) + + with patch.object(sys, "argv", ["parsedmarc", "-c", cfg_path]): + with self.assertRaises(SystemExit) as system_exit: + parsedmarc.cli._main() + + self.assertEqual(system_exit.exception.code, -1) + mock_logger.critical.assert_called_once_with( + "certificate_path setting missing from the msgraph config section" + ) + mock_graph_connection.assert_not_called() + mock_get_mailbox_reports.assert_not_called() + +class _FakeGraphClient: + def get(self, url, params=None): + if "/mailFolders/inbox?$select=id,displayName" in url: + return _FakeGraphResponse(200, {"id": "inbox-id", "displayName": "Inbox"}) + + if "/mailFolders?$filter=displayName eq 'Inbox'" in url: + return _FakeGraphResponse( + 404, + { + "error": { + "code": "ErrorItemNotFound", + "message": "Default folder Root not found.", + } + }, + ) + + if "/mailFolders?$filter=displayName eq 'Custom'" in url: + return _FakeGraphResponse( + 404, + { + "error": { + "code": "ErrorItemNotFound", + "message": "Default folder Root not found.", + } + }, + ) + + return _FakeGraphResponse(404, {"error": {"code": "NotFound"}}) + + +class TestMSGraphFolderFallback(unittest.TestCase): + def testWellKnownFolderFallback(self): + connection = MSGraphConnection.__new__(MSGraphConnection) + connection.mailbox_name = "shared@example.com" + connection._client = _FakeGraphClient() + connection._request_with_retries = MagicMock( + side_effect=lambda method_name, *args, **kwargs: getattr( + connection._client, method_name + )( + *args, **kwargs + ) + ) + + folder_id = connection._find_folder_id_with_parent("Inbox", None) + self.assertEqual(folder_id, "inbox-id") + connection._request_with_retries.assert_any_call( + "get", "/users/shared@example.com/mailFolders?$filter=displayName eq 'Inbox'" + ) + connection._request_with_retries.assert_any_call( + "get", "/users/shared@example.com/mailFolders/inbox?$select=id,displayName" + ) + + def testUnknownFolderStillFails(self): + connection = MSGraphConnection.__new__(MSGraphConnection) + connection.mailbox_name = "shared@example.com" + connection._client = _FakeGraphClient() + connection._request_with_retries = MagicMock( + side_effect=lambda method_name, *args, **kwargs: getattr( + connection._client, method_name + )( + *args, **kwargs + ) + ) + + with self.assertRaises(RuntimeWarning): + connection._find_folder_id_from_folder_path("Custom") + + def testSingleSegmentPathAvoidsExtraWellKnownLookupWhenListingSucceeds(self): + connection = MSGraphConnection.__new__(MSGraphConnection) + connection.mailbox_name = "shared@example.com" + connection._find_folder_id_with_parent = MagicMock(return_value="custom-id") + connection._get_well_known_folder_id = MagicMock(return_value="inbox-id") + + folder_id = connection._find_folder_id_from_folder_path("Inbox") + + self.assertEqual(folder_id, "custom-id") + connection._find_folder_id_with_parent.assert_called_once_with("Inbox", None) + connection._get_well_known_folder_id.assert_not_called() + + if __name__ == "__main__": unittest.main(verbosity=2)