def run(self, batch=None, user_process_id=None): user_process = MoveUserProcess.get_by_id(user_process_id) move_process = user_process.move_process_key.get() failed_messages = [] #Create connection imap = IMAPHelper() imap.oauth1_2lo_login(user_email=user_process.user_email) for msg_id in batch: message_process = MoveMessageProcess( email=user_process.user_email, message_id=msg_id, user_process_key=user_process.key, status=constants.STARTED) message_process_key = message_process.put() try: move_message(user_process=user_process, message_process_id=message_process_key.id(), label=move_process.tag, imap=imap) except Exception as e: logging.exception( 'Failed while moving message [%s] for user [%s], ' 'try again...', msg_id, user_process.user_email) failed_messages.append( (yield MoveMessageProcessPipeline( message_process_id=message_process_key.id(), move_process_id=move_process.key.id())) ) imap.close()
def get_messages(user_email=None, tag=None, process_id=None): imap = None msg_ids = [] try: imap = IMAPHelper() imap.oauth1_2lo_login(user_email=user_email) try: if tag: logging.info('Creating label [%s]', tag) imap.create_label(tag) msg_ids = imap.list_messages(only_from_trash=True) except Exception as e: logging.exception('Error creating label or retrieving messages for ' 'user [%s]', user_email) processed_user = ProcessedUser.get_by_id(email) if not processed_user: processed_user = ProcessedUser(id=user_email, ok_count=0, error_count=0, total_count=list(), error_description=list()) processed_user.error_description.append(e.message) processed_user.put() return [] except Exception as e: logging.exception('Authentication or connection problem for user ' '[%s]', user_email) processed_user = ProcessedUser.get_by_id(user_email) if not processed_user: processed_user = ProcessedUser(id=user_email, ok_count=0, error_count=0, total_count=list(), error_description=list()) processed_user.error_description.append(e.message) processed_user.put() return [] finally: if imap: imap.close() # Assuming IMAP connection was OK if len(msg_ids) > 0: counter.load_and_increment_counter('%s_total_count' % user_email, delta=len(msg_ids), namespace=str(process_id)) return chunkify(msg_ids, num_chunks=constants.USER_CONNECTION_LIMIT) else: counter.load_and_increment_counter('%s_total_count' % user_email, delta=0, namespace=str(process_id)) return []
def get_messages_for_cleaning(user_email=None, process_id=None): clean_process = CleanUserProcess.get_by_id(process_id) imap = IMAPHelper() imap.login(email=user_email, password=clean_process.source_password) msg_ids = imap.list_messages(criteria=clean_process.search_criteria, only_with_attachments=True, not_migrated=True) imap.close() if len(msg_ids) > 0: if constants.USER_CONNECTION_LIMIT < len(msg_ids): n = constants.USER_CONNECTION_LIMIT else: n = len(msg_ids) counter.load_and_increment_counter( 'cleaning_%s_total_count' % user_email, delta=len(msg_ids), namespace=str(process_id)) # chunkify: due to the migration API 1QPS limit # should this optimization be used? # return [msg_ids[i::n] for i in xrange(n)] return [msg_ids] else: counter.load_and_increment_counter( 'cleaning_%s_total_count' % user_email, delta=0, namespace=str(process_id)) process = CleanUserProcess.get_by_id(process_id) process.status = constants.FINISHED process.put() return []
def clean_messages(user_email=None, password=None, chunk_ids=list(), retry_count=0, process_id=None): cleaned_successfully = [] remaining = [] if len(chunk_ids) <= 0: process = CleanUserProcess.get_by_id(process_id) process.status = constants.FINISHED process.put() return True try: process = CleanUserProcess.get_by_id(process_id) imap = IMAPHelper() imap.login(email=user_email, password=process.source_password) imap.select() domain_name = user_email.split('@')[1] primary_domain = PrimaryDomain.get_or_create( domain_name) try: drive = DriveHelper(credentials_json=primary_domain.credentials, admin_email=primary_domain.admin_email, refresh_token=primary_domain.refresh_token) folder = drive.get_folder(constants.ATTACHMENT_FOLDER) if not folder: folder = drive.create_folder(constants.ATTACHMENT_FOLDER) sub_folder = drive.get_folder(user_email) if not sub_folder: sub_folder = drive.create_folder(user_email, [{'id': folder['id']}]) except Exception as e: logging.error( "Couldn't authenticate drive for user %s" % user_email) raise e try: migration = MigrationHelper( credentials_json=primary_domain.credentials, refresh_token=primary_domain.refresh_token) except Exception as e: logging.error( "Couldn't authenticate migration api for user %s" % user_email) raise e for message_id in chunk_ids: try: result = clean_message(msg_id=message_id, imap=imap, drive=drive, migration=migration, folder_id=sub_folder['id'], user_email=user_email, process_id=process_id) if result: counter.load_and_increment_counter( 'cleaning_%s_ok_count' % (user_email), namespace=str(process_id)) cleaned_successfully.append(message_id) else: counter.load_and_increment_counter( 'cleaning_%s_error_count' % user_email, namespace=str(process_id)) logging.error( 'Error cleaning message ID [%s] for user [%s]: [%s] ', message_id, user_email, result) except Exception as e: logging.exception( 'Failed cleaning individual message ID [%s] for user [%s]', message_id, user_email) remaining = [] if retry_count < constants.MAX_CLEAN_RETRIES: for chunk_msg in chunk_ids: if chunk_msg not in cleaned_successfully: remaining.append(chunk_msg) logging.info( 'Scheduling [%s] remaining cleaning messages for user [%s]', len(remaining), user_email) deferred.defer(clean_messages, user_email=user_email, chunk_ids=remaining, process_id=process_id, retry_count=retry_count + 1) else: for chunk_msg in chunk_ids: if message_id == chunk_msg: continue if chunk_msg not in cleaned_successfully: remaining.append(chunk_msg) logging.info( 'Giving up cleaning message [%s] for ' 'user [%s]', message_id, user_email) counter.load_and_increment_counter( 'cleaning_%s_error_count' % user_email, delta=1, namespace=str(process_id)) deferred.defer(clean_messages, user_email=user_email, chunk_ids=remaining, process_id=process_id) break except Exception as e: logging.exception('Failed cleaning messages chunk') raise e finally: if imap: imap.close() if len(chunk_ids) < 10 or (len(cleaned_successfully) + 10 > len(chunk_ids)): process.status = constants.FINISHED process.put()
def delayed_delete_message(msg_id=None, process_id=None, retries=0): process = CleanUserProcess.get_by_id(process_id) criteria = process.search_criteria msg_process = CleanMessageProcess.query(ndb.AND( CleanMessageProcess.msg_id == msg_id, CleanMessageProcess.clean_process_id == process_id) ).get() if msg_process.status != constants.MIGRATED: if retries < constants.MAX_RETRIES: deferred.defer(delayed_delete_message, msg_id=msg_id, process_id=process_id, retries=retries+1, _countdown=60*2**retries, _queue="elimination") else: logging.error("Couldn't delete msg %s for user %s" % (msg_id, process.source_email)) return imap = IMAPHelper() imap.login(process.source_email, process.source_password) imap.select() # Look for the migrated email, if it doesn't exist yet # retry later try: subject = imap.get_subject(msg_id=msg_id) except Exception as e: if retries < constants.MAX_RETRIES: deferred.defer(delayed_delete_message, msg_id=msg_id, process_id=process_id, retries=retries+1, _countdown=60*2**retries) else: logging.error("Couldn't delete msg %s for user %s, error %s" % (msg_id, process.source_email, e.message)) return messages = imap.list_messages(criteria="subject:(%s) label:Migrated-Migrados" % subject) if len(messages) < 1: if retries < constants.MAX_RETRIES: deferred.defer(delayed_delete_message, msg_id=msg_id, process_id=process_id, retries=retries+1, _countdown=60*2**retries, _queue="elimination") else: logging.error("Couldn't delete msg %s for user %s" % (msg_id, process.source_email)) return imap.delete_message(msg_id=msg_id, criteria=criteria) imap.close() msg_process.status = constants.FINISHED msg_process.put() all_done = True all_cleaning_messages = CleanMessageProcess.query( CleanMessageProcess.clean_process_id == process_id ).fetch() progress = 0 for message in all_cleaning_messages: if not message.status == constants.FINISHED: all_done = False else: progress += 1 if all_done: process.status = constants.FINISHED utc_now = datetime.datetime.utcnow() local_tz = pytz.timezone('America/Bogota') tz_offset = local_tz.utcoffset(utc_now) now = utc_now + tz_offset process.progress = progress process.latest_activity = "%s" % now process.put()
def move_messages(user_email=None, tag=None, chunk_ids=list(), process_id=None, retry_count=0, chunk_sizes=None): moved_successfully = [] imap = None number_moved_successfully = 0 number_moved_unsuccessfully = 0 if len(chunk_ids) <= 0: return True try: imap = IMAPHelper() imap.oauth1_2lo_login(user_email=user_email) imap.select(only_from_trash=True) for i, chunk in enumerate(chunk_ids): chunk_size = chunk_sizes[i] try: result, data = imap.copy_message( msg_id="%s:%s" % (chunk[0], chunk[-1]), destination_label=tag ) if result == 'OK': counter.load_and_increment_counter( '%s_ok_count' % (user_email), namespace=str(process_id), delta=chunk_size) moved_successfully.extend(chunk) number_moved_successfully += chunk_size else: counter.load_and_increment_counter( '%s_error_count' % user_email, namespace=str(process_id), delta=chunk_size) number_moved_unsuccessfully += chunk_size logging.error( 'Error moving message IDs [%s-%s] for user [%s]: ' 'Result [%s] data [%s]', chunk[0], chunk[-1], user_email, result, data) except Exception as e: logging.exception( 'Failed moving message range IDs [%s-%s] for user [%s]', chunk[0], chunk[-1], user_email) remaining = [] remaining_chunk_sizes = [] number_moved_unsuccessfully += chunk_size for j, original_chunk in enumerate(chunk_ids): if original_chunk not in moved_successfully: remaining.append(original_chunk) remaining_chunk_sizes.append(chunk_sizes[j]) # Keep retrying if messages are being moved if retry_count >= 3 and len(moved_successfully) == 0: logging.error('Giving up with remaining [%s] messages for ' 'user [%s]', number_moved_unsuccessfully, user_email) counter.load_and_increment_counter( '%s_error_count' % user_email, delta=number_moved_unsuccessfully, namespace=str(process_id)) else: logging.info( 'Scheduling [%s] remaining messages for user [%s]', number_moved_unsuccessfully, user_email) deferred.defer(move_messages, user_email=user_email, tag=tag, chunk_ids=remaining, process_id=process_id, retry_count=retry_count + 1, chunk_sizes=remaining_chunk_sizes) break except Exception as e: logging.exception('Authentication, connection or select problem for ' 'user [%s]', user_email) counter.load_and_increment_counter( '%s_error_count' % user_email, delta=len(chunk_ids), namespace=str(process_id)) finally: logging.info( 'Succesfully moved [%s] messages for user [%s] in this task', number_moved_successfully, user_email) if imap: imap.close()