Gmail API pagination (#469)

* Use pagination to fetch more than 100 Gmail messages at once

* Provide `paginate_messages` option to allow use of previous behavior
This commit is contained in:
mkupferman
2024-02-19 18:54:41 -05:00
committed by GitHub
parent d2145b71ba
commit efe74091f3
3 changed files with 31 additions and 9 deletions
+2
View File
@@ -295,6 +295,8 @@ The full set of configuration options are:
(Default: `https://www.googleapis.com/auth/gmail.modify`)
- `oauth2_port` - int: The TCP port for the local server to
listen on for the OAuth2 response (Default: `8080`)
- `paginate_messages` - bool: When `True`, fetch all applicable Gmail messages.
When `False`, only fetch up to 100 new messages per run (Default: `True`)
- `log_analytics`
- `client_id` - str: The app registration's client ID
- `client_secret` - str: The app registration's client secret
+4
View File
@@ -395,6 +395,7 @@ def _main():
gmail_api_credentials_file=None,
gmail_api_token_file=None,
gmail_api_include_spam_trash=False,
gmail_api_paginate_messages=True,
gmail_api_scopes=[],
gmail_api_oauth2_port=8080,
log_file=args.log_file,
@@ -829,6 +830,8 @@ def _main():
gmail_api_config.get("token_file", ".token")
opts.gmail_api_include_spam_trash = \
gmail_api_config.getboolean("include_spam_trash", False)
opts.gmail_api_paginate_messages = \
gmail_api_config.getboolean("paginate_messages", True)
opts.gmail_api_scopes = \
gmail_api_config.get("scopes",
default_gmail_api_scope)
@@ -1098,6 +1101,7 @@ def _main():
token_file=opts.gmail_api_token_file,
scopes=opts.gmail_api_scopes,
include_spam_trash=opts.gmail_api_include_spam_trash,
paginate_messages=opts.gmail_api_paginate_messages,
reports_folder=opts.mailbox_reports_folder,
oauth2_port=opts.gmail_api_oauth2_port
)
+25 -9
View File
@@ -42,11 +42,13 @@ class GmailConnection(MailboxConnection):
scopes: List[str],
include_spam_trash: bool,
reports_folder: str,
oauth2_port: int):
oauth2_port: int,
paginate_messages: bool):
creds = _get_creds(token_file, credentials_file, scopes, oauth2_port)
self.service = build('gmail', 'v1', credentials=creds)
self.include_spam_trash = include_spam_trash
self.reports_label_id = self._find_label_id_for_label(reports_folder)
self.paginate_messages = paginate_messages
def create_folder(self, folder_name: str):
# Gmail doesn't support the name Archive
@@ -65,16 +67,30 @@ class GmailConnection(MailboxConnection):
else:
raise e
def _fetch_all_message_ids(self, reports_label_id, page_token=None):
results = (
self.service.users()
.messages()
.list(
userId="me",
includeSpamTrash=self.include_spam_trash,
labelIds=[reports_label_id],
pageToken=page_token,
)
.execute()
)
messages = results.get("messages", [])
for message in messages:
yield message["id"]
if "nextPageToken" in results and self.paginate_messages:
yield from self._fetch_all_message_ids(
reports_label_id, results["nextPageToken"]
)
def fetch_messages(self, reports_folder: str, **kwargs) -> List[str]:
reports_label_id = self._find_label_id_for_label(reports_folder)
results = self.service.users().messages()\
.list(userId='me',
includeSpamTrash=self.include_spam_trash,
labelIds=[reports_label_id]
)\
.execute()
messages = results.get('messages', [])
return [message['id'] for message in messages]
return [id for id in self._fetch_all_message_ids(reports_label_id)]
def fetch_message(self, message_id):
msg = self.service.users().messages()\