def batchDeleteMails(service): if not to_delete_ids: return 1 print('Deleting e-mails...') try: # batch delete messages as 1000 msg limit for regular users batch_limit = 1000 iterations = math.ceil(len(to_delete_ids) / batch_limit) skip, take = 0, batch_limit print(f'Total Messages: {len(to_delete_ids)},\ Target Iterations: {iterations}') batch = BatchHttpRequest(callback=deleteCallback) for _ in range(iterations): payload = {'ids': []} payload['ids'].extend( [str(d['id']) for d in to_delete_ids[skip:take]]) batch.add(service.users().messages().batchDelete(userId=USER_ID, body=payload)) skip = take take = len(to_delete_ids) if (take + batch_limit >= len(to_delete_ids))\ else take + batch_limit batch.execute() except errors.HttpError as ex: print(f'Exception:{ex}') return 1
def batched_messages_get(oauth_result, message_ids, params=dict()): email = oauth_result.email credentials = oauth_result.credentials results_list = list() # Google's API client throws away the callback's return value inside # BatchHttpRequest._callback, during BatchHttpRequest.execute. # The solution, in this case, is to define the callback in the scope that # gives it access to the data structure we'll be returning the results in. def process_batch_responses(request_id, response, exception): if exception is not None: results_list.append(exception) else: results_list.append(response) batch = BatchHttpRequest() service = build('gmail', 'v1', http=httplib2.Http()) users_resource = service.users() users_messages = users_resource.messages() for each_id in message_ids: api_call = users_messages.get(userId=email, id=each_id) api_call.uri = add_query_parameters(api_call.uri, params) batch.add(api_call, callback=process_batch_responses) execute_api_call(credentials, batch) return results_list
def main(): """ GMail Dump * Get & Store credentials * Download all GMail messages into Maildir folder named mail """ credentials = get_credentials() http = credentials.authorize(httplib2.Http()) service = discovery.build('gmail', 'v1', http=http) results = service.users().labels().list(userId='me').execute() labels = results.get('labels', []) profile = service.users().getProfile(userId='me').execute() # example: {'historyId': '380957', 'emailAddress': 'user@domain', 'threadsTotal': 1135, 'messagesTotal': 1950} print("Downloading {messagesTotal} messages for {emailAddress}".format(**profile)) for new_dir in ['tmp','new','cur']: os.makedirs("mail/%s" % new_dir, mode=0o700, exist_ok=True) def process_message(request_id, response, exception): if exception is not None: print("ERROR: " + request_id) else: msg_bytes = base64.urlsafe_b64decode(response['raw'].encode('ASCII')) mime_msg = email.message_from_bytes(msg_bytes) maildir_message = mailbox.MaildirMessage(mime_msg) #box.add(maildir_message) message_id = response['id'] with open("mail/cur/%s" % message_id, "wb") as message_file: message_file.write(maildir_message.__bytes__()) try: message_count = 0 start = True while start or 'nextPageToken' in response: if start: page_token = None start = False else: page_token = response['nextPageToken'] response = service.users().messages().list(userId='me', pageToken=page_token).execute() if 'messages' in response: message_count += len(response['messages']) existing_message_count = 0 batch = BatchHttpRequest(callback=process_message) for message in response['messages']: message_id = message['id'] if os.path.exists('mail/cur/%s' % message_id): existing_message_count+=1 else: batch.add(service.users().messages().get(userId='me', format='raw', id=message_id)) batch.execute() info = "Downloaded %s messages" % message_count if existing_message_count: info += " (skipping %s messages already downloaded)" % existing_message_count print(info) except errors.HttpError as error: print('An HTTPError occurred: %s' % error)
def test_execute_initial_refresh_oauth2(self): batch = BatchHttpRequest() callbacks = Callbacks() cred = MockCredentials('Foo') # Pretend this is a OAuth2Credentials object cred.access_token = None http = HttpMockSequence([ ({ 'status': '200', 'content-type': 'multipart/mixed; boundary="batch_foobarbaz"' }, BATCH_SINGLE_RESPONSE), ]) cred.authorize(http) batch.add(self.request1, callback=callbacks.f) batch.execute(http=http) self.assertEqual({'foo': 42}, callbacks.responses['1']) self.assertIsNone(callbacks.exceptions['1']) self.assertEqual(1, cred._refreshed) self.assertEqual(1, cred._authorized) self.assertEqual(1, cred._applied)
def test_execute_request_body_with_custom_long_request_ids(self): batch = BatchHttpRequest() batch.add(self.request1, request_id='abc'*20) batch.add(self.request2, request_id='def'*20) http = HttpMockSequence([ ({'status': '200', 'content-type': 'multipart/mixed; boundary="batch_foobarbaz"'}, 'echo_request_body'), ]) try: batch.execute(http=http) self.fail('Should raise exception') except BatchError as e: boundary, _ = e.content.split(None, 1) self.assertEqual('--', boundary[:2]) parts = e.content.split(boundary) self.assertEqual(4, len(parts)) self.assertEqual('', parts[0]) self.assertEqual('--', parts[3].rstrip()) for partindex, request_id in ((1, 'abc'*20), (2, 'def'*20)): lines = parts[partindex].splitlines() for n, line in enumerate(lines): if line.startswith('Content-ID:'): # assert correct header folding self.assertTrue(line.endswith('+'), line) header_continuation = lines[n+1] self.assertEqual( header_continuation, ' %s>' % request_id, header_continuation )
def list_email_ids_by_label(service, label): email_ids = [] max_results = 100 next_page_token = '' # create the batch request batch_requests = BatchHttpRequest() while True: response = service.users().messages().list( userId='me', labelIds=label, maxResults=max_results, pageToken=next_page_token).execute() # extract id for msg_id in response['messages']: email_ids.append(msg_id['id']) print(len(email_ids)) if 'nextPageToken' in response: print('next page token:', response['nextPageToken']) next_page_token = response['nextPageToken'] else: break # uncomment for testing cap 500 break return email_ids
def test_add_fail_for_resumable(self): batch = BatchHttpRequest() upload = MediaFileUpload( datafile('small.png'), chunksize=500, resumable=True) self.request1.resumable = upload self.assertRaises(BatchError, batch.add, self.request1, request_id='1')
def test_http_errors_passed_to_callback(self): batch = BatchHttpRequest() callbacks = Callbacks() cred_1 = MockCredentials('Foo') cred_2 = MockCredentials('Bar') http = HttpMockSequence([ ({'status': '200', 'content-type': 'multipart/mixed; boundary="batch_foobarbaz"'}, BATCH_RESPONSE_WITH_401), ({'status': '200', 'content-type': 'multipart/mixed; boundary="batch_foobarbaz"'}, BATCH_RESPONSE_WITH_401), ]) creds_http_1 = HttpMockSequence([]) cred_1.authorize(creds_http_1) creds_http_2 = HttpMockSequence([]) cred_2.authorize(creds_http_2) self.request1.http = creds_http_1 self.request2.http = creds_http_2 batch.add(self.request1, callback=callbacks.f) batch.add(self.request2, callback=callbacks.f) batch.execute(http=http) self.assertEqual(None, callbacks.responses['1']) self.assertEqual(401, callbacks.exceptions['1'].resp.status) self.assertEqual( 'Authorization Required', callbacks.exceptions['1'].resp.reason) self.assertEqual({u'baz': u'qux'}, callbacks.responses['2']) self.assertEqual(None, callbacks.exceptions['2'])
def get_message_list_info(self, message_ids): """ Batch fetch message info given message_ids Args: message_ids (list): of message_ids Returns: dict with messages info """ messages_info = {} # Callback function for every service request def get_message_info(request_id, response, exception): if response: messages_info[response['id']] = response if exception: # If 404, message no longer exists, otherwise raise error if exception.resp.status != 404: raise exception else: logger.error('404 error: %s' % exception) # Setup batch batch = BatchHttpRequest(callback=get_message_info) for message_id in message_ids: batch.add(self.service.users().messages().get(userId='me', id=message_id)) self.execute_service_call(batch) return messages_info
def batch_add_users(service, accountId, users): """ Batch add users Parameters: users (tuple): (user_email, list of requests) """ def handle_create_user(requestId, response, exception): if exception is not None: print(f"There was an error: {exception}") else: print(f"Request ID: {requestId}, Created user: {response}") batch = BatchHttpRequest(callback=handle_create_user) for user in users: user_email, requests = user for request in requests: webPropertyId, profileId, permissions = request batch.add(service.management().profileUserLinks().insert( accountId=accountId, webPropertyId=webPropertyId, profileId=profileId, body={ "permissions": { "local": permissions }, "userRef": { "email": user_email }, }, )) batch.execute()
def get_label_list_info(self, messages_ids): """ Batch fetch label info given message_ids Args: message_ids (list): of message_ids Returns: dict with label info """ label_info_dict = {} # Callback function for every service request def get_label_info(request_id, response, exception): if response: label_info_dict[response['id']] = response if exception: # If 404, message no longer exists, otherwise raise error if exception.resp.status != 404: raise exception batch = BatchHttpRequest(callback=get_label_info) for message_id in messages_ids: # Temporary add snippet # TODO: remove snippet batch.add(self.service.users().messages().get( userId='me', id=message_id, fields='labelIds,id,threadId,snippet')) self.execute_service_call(batch) return label_info_dict
def test_deserialize_response(self): batch = BatchHttpRequest() resp, content = batch._deserialize_response(RESPONSE) self.assertEqual(200, resp.status) self.assertEqual('OK', resp.reason) self.assertEqual(11, resp.version) self.assertEqual('{"answer": 42}', content)
def __init__(self, service, **kwargs): if "batch_uri" not in kwargs: kwargs["batch_uri"] = service.new_batch_http_request()._batch_uri super(EnhancedBatchHttpRequest, self).__init__(**kwargs) self._counter = 0 self._kwargs = kwargs # Hidden batch for modtime. self._modtime_batch = BatchHttpRequest(**kwargs)
def test_add_fail_for_resumable(self): batch = BatchHttpRequest() upload = MediaFileUpload( datafile('small.png'), chunksize=500, resumable=True) self.request1.resumable = upload with self.assertRaises(BatchError) as batch_error: batch.add(self.request1, request_id='1') str(batch_error.exception)
def threadsToArchiveCallback(id, response, exception): """ :param id: :param response: :param exception: :return: """ logger.debug( "threadsToArchiveCallback:\nid: {}\nresponse: {}\nexception {}".format( id, response, exception)) # No threads found if not response or 'resultSizeEstimate' not in response or response[ 'resultSizeEstimate'] == 0 or 'threads' not in response: return counter = 0 archiveBatch = BatchHttpRequest() for thread in response['threads']: logger.debug("Thread to archive: {}".format(thread)) body = { "removeLabelIds": [ INBOX_LABEL_NAME, ], "addLabelIds": [], } archiveBatch.add(service.users().threads().modify(userId='me', id=thread['id'], body=body), callback=threadCallback) counter += 1 # Only do 1 run for debug instead of all if DEBUG: archiveBatch.execute() return if counter >= MAX_BATCH_SIZE: archiveBatch.execute() counter = 0 archiveBatch = BatchHttpRequest() if counter > 0: archiveBatch.execute()
def main(): start_time = time.time() # building out the api service creds = get_creds() service = build('gmail', 'v1', credentials=creds) # label = 'CATEGORY_PERSONAL' # labels = list_user_labels(service) labels = ['INBOX'] pprint(labels) logging.basicConfig( level=args.loglevel, format='%(asctime)s %(levelname)s %(funcName)s:%(lineno)d %(message)s', stream=sys.stdout) # remove_bad_labels(labels) for label in labels: print('current label', label) # gets email ids per label global msg_ids msg_ids = list_email_ids_by_label(service, label) # print(len(msg_ids)) # run each batch while len(msg_ids) > 0: # break into batches msg_ids_batches = break_into_n_size_each_batch( msg_ids, max_per_batch) print('batches to process:', len(msg_ids_batches), ',per batch:', len(msg_ids_batches[0]), ',last batch:', len(msg_ids_batches[-1])) b_id = 0 for batch in msg_ids_batches: # print('batch ids', batch) b_id += 1 print('running batch', b_id) batch_request = BatchHttpRequest() for msg_id in batch: # print(msg_id) batch_request.add( service.users().messages().get( userId='me', id=msg_id, format='metadata', metadataHeaders=metadata), get_message) batch_request.execute() print(msg_df.shape) print(msg_df.head()) print('writing to', label + '.csv') msg_df.to_csv('new/' + label + '.csv', encoding='utf-8', index=False) print('program time:', time.time() - start_time)
def fetch(self, msg_ids): fetcher = MessageFetcher() batch = BatchHttpRequest() for msg_desc in msg_ids: batch.add(self.service.users().messages().get(userId='me', id=msg_desc['id'], format='raw'), callback=fetcher.fetch_message) batch.execute() return fetcher.messages
def query_email_api(access_token): """ this queries the Gmail Batch API for Gmail messages. param access_token: an oauth access token return: """ f = open("emaildata.json", "w+") f.close() def callback(response): """ generic callback function used to deal with the responses from the Batch API. """ with open('emaildata.json', 'a') as outfile: json.dump(response, outfile, indent=4, sort_keys=True) return credentials = google.oauth2.credentials.Credentials(access_token) GMAIL = build('gmail', 'v1', credentials=credentials) message_ids = GMAIL.users().messages().list(userId='me', ).execute() try: messages = [] count = 0 if 'messages' in message_ids: messages.extend(message_ids['messages']) while 'nextPageToken' in message_ids: page_token = message_ids['nextPageToken'] message_ids = GMAIL.users().messages().list( userId='me', pageToken=page_token).execute() messages.extend(message_ids['messages']) count += 1 message_estimate = count * 100 except errors.HttpError as error: print('An error occurred: %s' % error) choice = input( "%d Pages of Messages have been found on this account. It is estimated that" "would be %d Messages.\nWould you like to continue? Type Y to Proceed, otherwise type any key: " % (count, message_estimate)) if choice == "Y": message_ids_sorted = ([message_id['id'] for message_id in messages]) batch = BatchHttpRequest() for msg_id in message_ids_sorted: batch.add(GMAIL.users().messages().get(userId='me', id=msg_id, format='minimal'), callback=callback) batch.execute() return else: return
def getMailsByFilter(service, user_id, cleanup_list): try: batch = BatchHttpRequest(callback=getCallback) for lookup in cleanup_list: batch.add(service.users().messages().list(userId=user_id, q=lookup), request_id=lookup) batch.execute() return 0 except errors.HttpError as ex: print(f'Exception:{ex}') return 1
def test_new_id(self): batch = BatchHttpRequest() id_ = batch._new_id() self.assertEqual('1', id_) id_ = batch._new_id() self.assertEqual('2', id_) batch.add(self.request1, request_id='3') id_ = batch._new_id() self.assertEqual('4', id_)
def test_serialize_get_request_no_body(self): batch = BatchHttpRequest() request = HttpRequest( None, None, 'https://www.googleapis.com/someapi/v1/collection/?foo=bar', method='GET', body=None, headers={'content-type': 'application/json'}, methodId=None, resumable=None) s = batch._serialize_request(request).splitlines() self.assertEqual(NO_BODY_EXPECTED_GET.splitlines(), s)
def GetEmails(user: str, message_ids: list, email_format: str = 'full', batch_size=100, batch_wait=0): """ Get's all of the emails given a list of message id's (typically from a list request). :param user: The user to get the emails from. :param message_ids: List of message id's to retrieve. :param email_format: The format that the email is returned as, default: "full". - "full": Returns the full email message data with body content parsed in the payload field; the raw field is not used. (default) - "metadata": Returns only email message ID, labels, and email headers. - "minimal": Returns only email message ID and labels; does not return the email headers, body, or payload. - "raw": Returns the full email message data with body content in the raw field as a base64url encoded string; the payload field is not used. :param batch_size: Size of the batch request (how many emails to download each iteration). Note: Gmail API's rate limit is 250 request units per second, but any batch_size over 100 tends to exceed that. :param batch_wait: How long to wait (seconds) between requests. :return: A list of dicts containing the emails and their metadata. """ service = GetService( email_address=user, scopes=['https://www.googleapis.com/auth/gmail.readonly']) print(f'Getting {len(message_ids)} Messages...') messages = [] good = 0 batches = [] for i in range(0, len(message_ids), batch_size): print(f"Messages: {i+1} - {min(i+batch_size, len(message_ids))}") batch = BatchHttpRequest() # start building a batch request for message in message_ids[i:( i + batch_size)]: # add a request for each id batch.add(service.users().messages().get(userId='me', id=message, format=email_format)) batch.execute() batches.append(batch) for header, body in batch._responses.values(): if header['status'] == '200': good += 1 message = json.loads(body) messages.append(message) time.sleep(batch_wait) print(f"Successfully retrieved {good}/{len(message_ids)} messages!") return messages, batches
def test_execute_global_callback(self): callbacks = Callbacks() batch = BatchHttpRequest(callback=callbacks.f) batch.add(self.request1) batch.add(self.request2) http = HttpMockSequence([ ({'status': '200', 'content-type': 'multipart/mixed; boundary="batch_foobarbaz"'}, BATCH_RESPONSE), ]) batch.execute(http=http) self.assertEqual({'foo': 42}, callbacks.responses['1']) self.assertEqual({'baz': 'qux'}, callbacks.responses['2'])
def test_add_fail_for_over_limit(self): from googleapiclient.http import MAX_BATCH_LIMIT batch = BatchHttpRequest() for i in range(0, MAX_BATCH_LIMIT): batch.add(HttpRequest( None, None, 'https://www.googleapis.com/someapi/v1/collection/?foo=bar', method='POST', body='{}', headers={'content-type': 'application/json'}) ) self.assertRaises(BatchError, batch.add, self.request1)
def get_messages_batch(msg_ids): # create the batch request batch_requests = BatchHttpRequest() print('in get_messages_batch') # print(msg_ids) for msg_id in msg_ids: print('msg_id', msg_id) batch_requests.add( service.users().messages().get(userId='me', id=str(msg_id), format='metadata', metadata=metadata), get_message) batch_requests.execute()
def handle_messages_details(message_list: dict, gmail_client: Resource, user_email: str) -> None: batch = BatchHttpRequest() for i in message_list["messages"]: callback = partial(find_and_save_flight_bookings, service=gmail_client, user_id=user_email) batch.add( gmail_client.users().messages().get(userId=user_email, id=i["id"]), callback=callback, ) batch.execute()
def messages(self, messages_ids): #results = self.service.users().messages().list(userId='me', maxResults=100, labelIds= labelIds).execute() #Dev.pprint(len(results['messages'])) results = [] batch = BatchHttpRequest() def handle_message(index, message, error): results.append(self.parse_Message(message)) for message_id in messages_ids: batch.add(self.service.users().messages().get(userId='me', id=message_id), callback=handle_message) batch.execute() return results
def new_batch_http_request(callback=None): """Create a BatchHttpRequest object based on the discovery document. Args: callback: callable, A callback to be called for each response, of the form callback(id, response, exception). The first parameter is the request id, and the second is the deserialized response object. The third is an apiclient.errors.HttpError exception object if an HTTP error occurred while processing the request, or None if no error occurred. Returns: A BatchHttpRequest object based on the discovery document. """ return BatchHttpRequest(callback=callback, batch_uri=batch_uri)
def getThreadsForLabels(labelIdMap): """ :param labelIdMap: :return: """ threadFetchBatch = BatchHttpRequest() for labelName, id in labelIdMap.iteritems(): numDays = re.match(LABEL_REGEX, labelName).group(1) query = "label:{} older_than:{}d in:inbox".format(labelName, numDays) threadFetchBatch.add(service.users().threads().list(userId='me', q=query), callback=threadsToArchiveCallback) threadFetchBatch.execute()
def batch_add_events(events, oauth2_clinet_id, oauth2_secrete, sender_id, change, org_credentials, callback): raw_connection = google_calendar_raw_connection(oauth2_clinet_id, oauth2_secrete, sender_id, change, org_credentials) http = raw_connection['http'] service = raw_connection['service'] batch = BatchHttpRequest() for event in events: batch.add( service.events().insert( calendarId=settings.GOOGLE_CALENDAR_API_DEFAULT_CALENDAR_ID, body=event), callback) batch.execute(http=http)