From e721f5701e00ba443afce1cb8a9f39bb56bde222 Mon Sep 17 00:00:00 2001 From: Panos Gkikakis Date: Mon, 17 Jan 2022 23:43:33 +0200 Subject: [PATCH 1/5] Add GMail API support --- README.rst | 19 +++- parsedmarc/__init__.py | 212 +++++++++++++++++++++++++++++++++++++++++ parsedmarc/cli.py | 44 ++++++++- 3 files changed, 271 insertions(+), 4 deletions(-) diff --git a/README.rst b/README.rst index ab7b96f..ef9d448 100644 --- a/README.rst +++ b/README.rst @@ -166,6 +166,15 @@ For example server = localhost port = 514 + [gmail_api] + credentials_file = /path/to/credentials.json # Get this file from console.google.com. See https://developers.google.com/identity/protocols/oauth2 + token_file = /path/to/token.json # This file will be generated automatically + delete = False # Delete reports after successful processing + scopes = https://mail.google.com/ + include_spam_trash=True + reports_label=DMARC + + The full set of configuration options are: - ``general`` @@ -248,7 +257,15 @@ The full set of configuration options are: - ``syslog`` - ``server`` - str: The Syslog server name or IP address - ``port`` - int: The UDP port to use (Default: 514) - +- ``gmail_api`` + - ``gmail_api_credentials_file`` - str: Path to file containing the credentials, None to disable (Default: None) + - ``gmail_api_token_file`` - str: Path to save the token file (Default: .token) + - ``gmail_api_reports_label`` - str: Label to use when searching for reports to parse (Default: INBOX) + - ``gmail_api_archive_file`` - str: Label to apply to processed reports (Default: DMARC Archive) + - ``gmail_api_include_spam_trash`` - bool: Include messages in Spam and Trash when searching reports (Default: False) + - ``gmail_api_scopes`` - str: Comma separated list of scopes to use when acquiring credentials (Default: https://www.googleapis.com/auth/gmail.modify) + - ``gmail_api_delete`` - bool: Delete messages after processing them, instead of archiving them (Default: False) + - ``gmail_api_test`` - bool: Do not move or delete messages (Default: False) .. warning:: diff --git a/parsedmarc/__init__.py b/parsedmarc/__init__.py index 75fc02a..6516006 100644 --- a/parsedmarc/__init__.py +++ b/parsedmarc/__init__.py @@ -31,6 +31,13 @@ from mailsuite.imap import IMAPClient from mailsuite.smtp import send_email from imapclient.exceptions import IMAPClientError +from google.auth.transport.requests import Request +from google.oauth2.credentials import Credentials +from google_auth_oauthlib.flow import InstalledAppFlow +from googleapiclient.discovery import build +from googleapiclient.errors import HttpError +from base64 import urlsafe_b64decode + from parsedmarc.utils import get_base_domain, get_ip_address_info from parsedmarc.utils import is_outlook_msg, convert_outlook_msg from parsedmarc.utils import timestamp_to_human, human_timestamp_to_datetime @@ -1237,6 +1244,211 @@ def get_dmarc_reports_from_inbox(connection=None, return results +def get_gmail_api_creds(token_file="token.json",credentials_file="credentials.json",scopes=['https://www.googleapis.com/auth/gmail.modify']): + + creds = None + + if os.path.exists(token_file): + creds = Credentials.from_authorized_user_file(token_file, scopes) + # If there are no (valid) credentials available, let the user log in. + if not creds or not creds.valid: + if creds and creds.expired and creds.refresh_token: + creds.refresh(Request()) + else: + flow = InstalledAppFlow.from_client_secrets_file(credentials_file, scopes) + creds = flow.run_console() + # Save the credentials for the next run + with open(token_file, 'w') as token: + token.write(creds.to_json()) + return creds + +def get_dmarc_reports_from_gmail_api(credentials_file=".credentials",token_file=".token", + reports_label="INBOX", archive_label = "DMARC Archive", + offline=False, ip_db_path=None, + scopes = ['https://mail.google.com/'], include_spam_trash=False, + nameservers=None, dns_timeout=2.0, + strip_attachment_payloads=False,delete=False, + test=False,parallel=False): + + logger = logging.getLogger("parsedmarc::gmail_api") + + aggregate_reports = [] + forensic_reports = [] + aggregate_report_msg_uids = [] + forensic_report_msg_uids = [] + + + creds = get_gmail_api_creds(token_file,credentials_file,scopes) + service = build('gmail', 'v1', credentials=creds) + + results = service.users().labels().list(userId='me').execute() + labels = results.get('labels',[]) + + reports_label_id = None + archive_label_id = None + forensic_label_id = None + aggregate_label_id = None + invalid_label_id = None + + invalid_label = "Invalid" + forensic_label = "Forensic" + aggregate_label = "Aggregate" + + for label in labels: + if reports_label == label['id']: + reports_label_id = label['id'] + reports_label = label['name'] + elif reports_label == label['name']: + reports_label_id = label['id'] + + if archive_label == label['id']: + archive_label_id = label['id'] + archive_label == label['name'] + elif archive_label == label['name']: + archive_label_id = label['id'] + + if invalid_label == label['name']: + invalid_label_id = label['id'] + if forensic_label == label['name']: + forensic_label_id = label['id'] + if aggregate_label == label['name']: + aggregate_label_id = label['id'] + if reports_label_id is None: + logger.debug("Creating label {0} for reports".format(reports_label)) + label = service.users().labels().create(userId='me',body={'name': reports_label, 'messageListVisibility': 'show'}).execute() + reports_label_id = label['id'] + + if archive_label_id is None: + logger.debug("Creating label {0} for archive".format(archive_label)) + label = service.users().labels().create(userId='me',body={'name': archive_label, 'messageListVisibility': 'show'}).execute() + archive_label_id = label['id'] + + if forensic_label_id is None: + logger.debug("Creating label {0} for forensic reports".format(forensic_label)) + label = service.users().labels().create(userId='me',body={'name': forensic_label, 'messageListVisibility': 'show'}).execute() + forensic_label_id = label['id'] + + if aggregate_label_id is None: + logger.debug("Creating label {0} for aggregate reports".format(aggregate_label)) + label = service.users().labels().create(userId='me',body={'name': aggregate_label, 'messageListVisibility': 'show'}).execute() + aggregate_label_id = label['id'] + + if invalid_label_id is None: + logger.debug("Creating label {0} for invalid reports".format(invalid_label)) + label = service.users().labels().create(userId='me',body={'name': invalid_label, 'messageListVisibility': 'show'}).execute() + invalid_label_id = label['id'] + + + results = service.users().messages().list(userId='me',includeSpamTrash=include_spam_trash,labelIds=[reports_label_id]).execute() + messages = results.get('messages',[]) + total_messages = results['resultSizeEstimate'] + + while(messages): + for message in messages: + msg_uid = message['id'] + msg = service.users().messages().get(userId='me',id=msg_uid,format="raw").execute() + + try: + parsed_email = parse_report_email(urlsafe_b64decode(msg['raw']),offline, + ip_db_path,nameservers, + dns_timeout,strip_attachment_payloads, + parallel) + + if parsed_email["report_type"] == "aggregate": + aggregate_reports.append(parsed_email["report"]) + aggregate_report_msg_uids.append(msg_uid) + elif parsed_email["report_type"] == "forensic": + forensic_reports.append(parsed_email["report"]) + forensic_report_msg_uids.append(msg_uid) + + except InvalidDMARCReport as error: + logger.warning(error.__str__()) + if not test: + logger.debug("Moving message UID {0} to {1}".format(msg_uid, invalid_label)) + service.users().messages().modify(userId='me',id=msg_uid, + body={'addLabelIds': [invalid_label_id], "removeLabelIds":[reports_label]}).execute() + + if 'nextPageToken' in results: + results = service.users().messages().list(userId='me',includeSpamTrash=include_spam_trash, + labelIds=[reports_label],nextToken=results['nextPageToken']).execute() + messages = results.get('messages',[]) + total_messages = results['resultSizeEstimate'] + else: + break + + if not test: + if delete: + processed_messages = aggregate_report_msg_uids + \ + forensic_report_msg_uids + + number_of_processed_msgs = len(processed_messages) + for i in range(number_of_processed_msgs): + msg_uid = processed_messages[i] + logger.debug( + "Deleting message {0} of {1}: UID {2}".format( + i + 1, number_of_processed_msgs, msg_uid)) + try: + r = service.users().messages().delete(userId='me',id=msg_uid) + if(r): + raise Exception(r) + except Exception as e: + message = "Error deleting message UID" + e = "{0} {1}: " "{2}".format(message, msg_uid, e) + logger.error("GMail error: {0}".format(e)) + else: + if len(aggregate_report_msg_uids) > 0: + log_message = "Moving aggregate report messages from" + logger.debug( + "{0} {1} to {2}".format( + log_message, reports_label, + aggregate_label)) + number_of_agg_report_msgs = len(aggregate_report_msg_uids) + for i in range(number_of_agg_report_msgs): + msg_uid = aggregate_report_msg_uids[i] + logger.debug( + "Moving message {0} of {1}: UID {2}".format( + i+1, number_of_agg_report_msgs, msg_uid)) + try: + r = service.users().messages().modify(userId="me",id=msg_uid, + body={"addLabelIds": [aggregate_label_id, archive_label_id], + "removeLabelIds":[reports_label_id,'INBOX']}).execute() + if(r): + raise Exception(r) + + except Exception as e: + message = "Error moving message UID" + e = "{0} {1}: {2}".format(message, msg_uid, e) + logger.error("Gmail error: {0}".format(e)) + if len(forensic_report_msg_uids) > 0: + message = "Moving forensic report messages from" + logger.debug( + "{0} {1} to {2}".format(message, + reports_label, + forensic_label)) + number_of_forensic_msgs = len(forensic_report_msg_uids) + for i in range(number_of_forensic_msgs): + msg_uid = forensic_report_msg_uids[i] + message = "Moving message" + logger.debug("{0} {1} of {2}: UID {3}".format( + message, + i + 1, number_of_forensic_msgs, msg_uid)) + try: + r = service.users().messages().modify(userId="me",id=msg_uid, + body={"addLabelIds": [forensic_label_id, archive_label_id], + "removeLabelIds":[reports_label_id,'INBOX']}).execute() + if(r): + raise Exception(r) + except Exception as e: + e = "Error moving message UID {0}: {1}".format( + msg_uid, e) + logger.error("GMail error: {0}".format(e)) + + + results = OrderedDict([("aggregate_reports", aggregate_reports), + ("forensic_reports", forensic_reports)]) + + return results + def watch_inbox(host, username, password, callback, port=None, ssl=True, verify=True, reports_folder="INBOX", diff --git a/parsedmarc/cli.py b/parsedmarc/cli.py index 1418e80..517cd6a 100644 --- a/parsedmarc/cli.py +++ b/parsedmarc/cli.py @@ -20,7 +20,7 @@ from tqdm import tqdm from parsedmarc import get_dmarc_reports_from_inbox, watch_inbox, \ parse_report_file, get_dmarc_reports_from_mbox, elastic, kafkaclient, \ splunk, save_output, email_results, ParserError, __version__, \ - InvalidDMARCReport, s3, syslog + InvalidDMARCReport, s3, syslog, get_dmarc_reports_from_gmail_api from parsedmarc.utils import is_mbox logger = logging.getLogger("parsedmarc") @@ -301,6 +301,14 @@ def _main(): s3_path=None, syslog_server=None, syslog_port=None, + gmail_api_credentials_file = None, + gmail_api_token_file = None, + gmail_api_reports_label = 'INBOX', + gmail_api_archive_label = 'DMARC Archive', + gmail_api_include_spam_trash = False, + gmail_api_scopes = ['https://www.googleapis.com/auth/gmail.modify'], + gmail_api_delete = False, + gmail_api_test = False, log_file=args.log_file, n_procs=1, chunk_size=1 @@ -578,6 +586,18 @@ def _main(): else: opts.syslog_port = 514 + if "gmail_api" in config.sections(): + gmail_api_config = config["gmail_api"] + opts.gmail_api_credentials_file = gmail_api_config.get("credentials_file",None) + opts.gmail_api_token_file = gmail_api_config.get("token_file",".token") + opts.gmail_api_reports_label = gmail_api_config.get("reports_label","INBOX") + opts.gmail_api_archive_label = gmail_api_config.get("archive_label","DMARC Archive") + opts.gmail_api_include_spam_trash = gmail_api_config.getboolean("include_spam_trash",False) + opts.gmail_api_scopes = str.split(gmail_api_config.get("scopes","https://www.googleapis.com/auth/gmail.modify"),",") + opts.gmail_api_delete = gmail_api_config.getboolean("delete",None) + opts.gmail_api_test = gmail_api_config.getboolean("test",False) + + logging.basicConfig(level=logging.WARNING) logger.setLevel(logging.WARNING) @@ -594,8 +614,8 @@ def _main(): '%(levelname)s - [%(filename)s:%(lineno)d] - %(message)s') fh.setFormatter(formatter) logger.addHandler(fh) - if opts.imap_host is None and len(opts.file_path) == 0: - logger.error("You must supply input files, or an IMAP configuration") + if opts.imap_host is None and len(opts.file_path) == 0 and opts.gmail_api_credentials_file is None: + logger.error("You must supply input files, or an IMAP or Gmail configurationor") exit(1) logger.info("Starting dmarcparse") @@ -744,6 +764,24 @@ def _main(): logger.error("IMAP Error: {0}".format(error.__str__())) exit(1) + if opts.gmail_api_credentials_file: + if opts.gmail_api_delete: + if 'https://mail.google.com/' not in opts.gmail_api_scopes: + logger.error("Message deletion requires scope 'https://mail.google.com/'. Add the scope and remove token file to acquire proper access.") + opts.gmail_api_delete = False + + reports = get_dmarc_reports_from_gmail_api(credentials_file=opts.gmail_api_credentials_file,token_file=opts.gmail_api_token_file, + reports_label=opts.gmail_api_reports_label, archive_label=opts.gmail_api_archive_label, + offline=opts.offline, ip_db_path=opts.ip_db_path, + scopes = opts.gmail_api_scopes, include_spam_trash= opts.gmail_api_include_spam_trash, + nameservers=opts.nameservers, dns_timeout=opts.dns_timeout, + strip_attachment_payloads=opts.strip_attachment_payloads, + delete=opts.gmail_api_delete, test = opts.gmail_api_test) + + aggregate_reports += reports["aggregate_reports"] + forensic_reports += reports["forensic_reports"] + + results = OrderedDict([("aggregate_reports", aggregate_reports), ("forensic_reports", forensic_reports)]) From 8441f8badd621aa75009c192777f0ffef4d5e577 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Dominik=20Berm=C3=BChler?= Date: Tue, 18 Jan 2022 18:23:23 +0100 Subject: [PATCH 2/5] Removed usage of logging.basicConfig logging.basicConfig will change the configuration of the root logger and not the configuration of your own library logger. Since parsedmarc is a library, it should keep its logging configuration to its own logger, such that the logging configuration of applications using this library are not affected. --- parsedmarc/__init__.py | 8 +++++--- parsedmarc/cli.py | 3 --- 2 files changed, 5 insertions(+), 6 deletions(-) diff --git a/parsedmarc/__init__.py b/parsedmarc/__init__.py index 75fc02a..5f8352e 100644 --- a/parsedmarc/__init__.py +++ b/parsedmarc/__init__.py @@ -38,12 +38,14 @@ from parsedmarc.utils import parse_email __version__ = "7.1.1" -logging.basicConfig( - format='%(levelname)8s:%(filename)s:%(lineno)d:' - '%(message)s', +formatter = logging.Formatter( + fmt='%(levelname)8s:%(filename)s:%(lineno)d:%(message)s', datefmt='%Y-%m-%d:%H:%M:%S') +handler = logging.StreamHandler() +handler.setFormatter(formatter) logger = logging.getLogger("parsedmarc") +logger.addHandler(handler) logger.debug("parsedmarc v{0}".format(__version__)) feedback_report_regex = re.compile(r"^([\w\-]+): (.+)$", re.MULTILINE) diff --git a/parsedmarc/cli.py b/parsedmarc/cli.py index 1418e80..441789f 100644 --- a/parsedmarc/cli.py +++ b/parsedmarc/cli.py @@ -578,14 +578,11 @@ def _main(): else: opts.syslog_port = 514 - logging.basicConfig(level=logging.WARNING) logger.setLevel(logging.WARNING) if opts.verbose: - logging.basicConfig(level=logging.INFO) logger.setLevel(logging.INFO) if opts.debug: - logging.basicConfig(level=logging.DEBUG) logger.setLevel(logging.DEBUG) if opts.log_file: fh = logging.FileHandler(opts.log_file) From 5be36e431c34883bda61a18cad4d39d0b2467f74 Mon Sep 17 00:00:00 2001 From: Panos Gkikakis Date: Mon, 24 Jan 2022 23:45:59 +0200 Subject: [PATCH 3/5] Added dependencies for GMail api --- requirements.txt | 6 ++++++ setup.py | 7 ++++++- 2 files changed, 12 insertions(+), 1 deletion(-) diff --git a/requirements.txt b/requirements.txt index d098312..f51a89e 100644 --- a/requirements.txt +++ b/requirements.txt @@ -29,3 +29,9 @@ sphinx_rtd_theme>=0.4.3 codecov>=2.0.15 lxml>=4.4.0 boto3>=1.16.63 +google-api-core>=2.4.0 +google-api-python-client>=2.35.0 +google-auth>=2.3.3 +google-auth-httplib2>=0.1.0 +google-auth-oauthlib>=0.4.6 + diff --git a/setup.py b/setup.py index 785ba97..5b2325e 100644 --- a/setup.py +++ b/setup.py @@ -102,7 +102,12 @@ setup( 'kafka-python>=1.4.4', 'tqdm>=4.31.1', 'lxml>=4.4.0', - 'boto3>=1.16.63' + 'boto3>=1.16.63', + 'google-api-core>=2.4.0', + 'google-api-python-client>=2.35.0', + 'google-auth>=2.3.3', + 'google-auth-httplib2>=0.1.0', + 'google-auth-oauthlib>=0.4.6', ], entry_points={ From e79dbd702e2e956f28da6e48eef4479fd257aec1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Matthias=20H=C3=B6rmann?= Date: Wed, 2 Mar 2022 11:47:39 +0100 Subject: [PATCH 4/5] add ip_db_path initialization --- parsedmarc/cli.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/parsedmarc/cli.py b/parsedmarc/cli.py index 1418e80..bbbccea 100644 --- a/parsedmarc/cli.py +++ b/parsedmarc/cli.py @@ -303,7 +303,8 @@ def _main(): syslog_port=None, log_file=args.log_file, n_procs=1, - chunk_size=1 + chunk_size=1, + ip_db_path = None ) args = arg_parser.parse_args() From 30539dc1110748b51d6026b3be198d42ebf65031 Mon Sep 17 00:00:00 2001 From: robertomoutinho Date: Tue, 15 Mar 2022 20:52:50 -0300 Subject: [PATCH 5/5] ISSUE-296 - Elastic DSL lib pinned to under 7.14 --- requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/requirements.txt b/requirements.txt index d098312..cda6c1b 100644 --- a/requirements.txt +++ b/requirements.txt @@ -10,7 +10,7 @@ xmltodict>=0.12.0 geoip2>=3.0.0 imapclient>=2.1.0 dateparser>=0.7.2 -elasticsearch-dsl>=7.2.0,<8.0.0 +elasticsearch-dsl>=7.2.0,=1.4.4 mailsuite>=1.6.1 nose>=1.3.7