def prepare_sync(self): """Ensures that canonical tags are created for the account, and gets and save Folder objects for folders on the IMAP backend. Returns a list of tuples (folder_name, folder_id) for each folder we want to sync (in order).""" with mailsync_session_scope() as db_session: with connection_pool(self.account_id).get() as crispin_client: # the folders we should be syncing sync_folders = crispin_client.sync_folders() # get a fresh list of the folder names from the remote remote_folders = crispin_client.folder_names(force_resync=True) save_folder_names(log, self.account_id, remote_folders, db_session) sync_folder_names_ids = [] for folder_name in sync_folders: try: id_, = db_session.query(Folder.id). \ filter(Folder.name == folder_name, Folder.account_id == self.account_id).one() sync_folder_names_ids.append((folder_name, id_)) except NoResultFound: log.error("Missing Folder object when starting sync", folder_name=folder_name) raise MailsyncError("Missing Folder '{}' on account {}" .format(folder_name, self.account_id)) return sync_folder_names_ids
def _run_impl(self): self.log.info('Starting LabelRenameHandler', label_name=self.label_name) with connection_pool(self.account_id).get() as crispin_client: folder_names = [] with session_scope(self.account_id) as db_session: folders = db_session.query(Folder).filter( Folder.account_id == self.account_id) folder_names = [folder.name for folder in folders] db_session.expunge_all() for folder_name in folder_names: crispin_client.select_folder(folder_name, uidvalidity_cb) found_uids = crispin_client.search_uids( ['X-GM-LABELS', utf7_encode(self.label_name)]) flags = crispin_client.flags(found_uids) self.log.info('Running metadata update for folder', folder_name=folder_name) with session_scope(self.account_id) as db_session: common.update_metadata(self.account_id, folder.id, flags, db_session) db_session.commit()
def get_imap_raw_contents(message): account = message.namespace.account if len(message.imapuids) == 0: raise EmailDeletedException( "Message was deleted on the backend server.") uid = message.imapuids[0] folder = uid.folder with connection_pool(account.id).get() as crispin_client: crispin_client.select_folder(folder.name, uidvalidity_cb) try: uids = crispin_client.uids([uid.msg_uid]) if len(uids) == 0: raise EmailDeletedException( "Message was deleted on the backend server.") return uids[0].body except imapclient.IMAPClient.Error: log.error( "Error while fetching raw contents", exc_info=True, logstash_tag="fetching_error", ) raise EmailFetchException("Couldn't get message from server. " "Please try again in a few minutes.")
def prepare_sync(self): """ Gets and save Folder objects for folders on the IMAP backend. Returns a list of tuples (folder_name, folder_id) for each folder we want to sync (in order). """ with session_scope(self.namespace_id) as db_session: with connection_pool(self.account_id).get() as crispin_client: # Get a fresh list of the folder names from the remote remote_folders = crispin_client.folders() if self.saved_remote_folders != remote_folders: self.save_folder_names(db_session, remote_folders) self.saved_remote_folders = remote_folders # The folders we should be syncing sync_folders = crispin_client.sync_folders() sync_folder_names_ids = [] for folder_name in sync_folders: try: id_, = db_session.query(Folder.id). \ filter(Folder.name == folder_name, Folder.account_id == self.account_id).one() sync_folder_names_ids.append((folder_name, id_)) except NoResultFound: log.error('Missing Folder object when starting sync', folder_name=folder_name) raise MailsyncError( u"Missing Folder '{}' on account {}".format( folder_name, self.account_id)) return sync_folder_names_ids
def prepare_sync(self): """Ensures that canonical tags are created for the account, and gets and save Folder objects for folders on the IMAP backend. Returns a list of tuples (folder_name, folder_id) for each folder we want to sync (in order).""" with mailsync_session_scope() as db_session: with connection_pool(self.account_id).get() as crispin_client: # the folders we should be syncing sync_folders = crispin_client.sync_folders() # get a fresh list of the folder names from the remote remote_folders = crispin_client.folder_names(force_resync=True) save_folder_names(log, self.account_id, remote_folders, db_session) sync_folder_names_ids = [] for folder_name in sync_folders: try: id_, = db_session.query(Folder.id). \ filter(Folder.name == folder_name, Folder.account_id == self.account_id).one() sync_folder_names_ids.append((folder_name, id_)) except NoResultFound: log.error("Missing Folder object when starting sync", folder_name=folder_name) raise MailsyncError( "Missing Folder '{}' on account {}".format( folder_name, self.account_id)) return sync_folder_names_ids
def prepare_sync(self): """ Gets and save Folder objects for folders on the IMAP backend. Returns a list of tuples (folder_name, folder_id) for each folder we want to sync (in order). """ with mailsync_session_scope() as db_session: with connection_pool(self.account_id).get() as crispin_client: # Get a fresh list of the folder names from the remote remote_folders = crispin_client.folders() if self.saved_remote_folders != remote_folders: self.save_folder_names(db_session, remote_folders) self.saved_remote_folders = remote_folders # The folders we should be syncing sync_folders = crispin_client.sync_folders() sync_folder_names_ids = [] for folder_name in sync_folders: try: id_, = db_session.query(Folder.id). \ filter(Folder.name == folder_name, Folder.account_id == self.account_id).one() sync_folder_names_ids.append((folder_name, id_)) except NoResultFound: log.error('Missing Folder object when starting sync', folder_name=folder_name) raise MailsyncError("Missing Folder '{}' on account {}" .format(folder_name, self.account_id)) return sync_folder_names_ids
def _run_impl(self): # We defer initializing the pool to here so that we'll retry if there # are any errors (remote server 503s or similar) when initializing it. self.conn_pool = connection_pool(self.account_id) try: saved_folder_status = self._load_state() except IntegrityError: # The state insert failed because the folder ID ForeignKey # was no longer valid, ie. the folder for this engine was deleted # while we were starting up. # Exit the sync and let the monitor sort things out. log.info("Folder state loading failed due to IntegrityError", folder_id=self.folder_id, account_id=self.account_id) raise MailsyncDone() # NOTE: The parent ImapSyncMonitor handler could kill us at any # time if it receives a shutdown command. The shutdown command is # equivalent to ctrl-c. while True: old_state = self.state try: self.state = self.state_handlers[old_state]() self.heartbeat_status.publish(state=self.state) except UidInvalid: self.state = self.state + ' uidinvalid' self.heartbeat_status.publish(state=self.state) except FolderMissingError: # Folder was deleted by monitor while its sync was running. # TODO: Monitor should handle shutting down the folder engine. log.info('Folder disappeared. Stopping sync.', account_id=self.account_id, folder_name=self.folder_name, folder_id=self.folder_id) raise MailsyncDone() except ValidationError as exc: log.error('Error authenticating; stopping sync', exc_info=True, account_id=self.account_id, folder_id=self.folder_id, logstash_tag='mark_invalid') with mailsync_session_scope() as db_session: account = db_session.query(Account).get(self.account_id) account.mark_invalid() account.update_sync_error(str(exc)) raise MailsyncDone() # State handlers are idempotent, so it's okay if we're # killed between the end of the handler and the commit. if self.state != old_state: # Don't need to re-query, will auto refresh on re-associate. with mailsync_session_scope() as db_session: db_session.add(saved_folder_status) saved_folder_status.state = self.state db_session.commit() if self.state == 'finish': return
def __init__(self, account_id, namespace_id, folder_name, email_address, provider_name, syncmanager_lock): with session_scope(namespace_id) as db_session: try: folder = db_session.query(Folder). \ filter(Folder.name == folder_name, Folder.account_id == account_id).one() except NoResultFound: raise MailsyncError(u"Missing Folder '{}' on account {}" .format(folder_name, account_id)) self.folder_id = folder.id self.folder_role = folder.canonical_name # Metric flags for sync performance self.is_initial_sync = folder.initial_sync_end is None self.is_first_sync = folder.initial_sync_start is None self.is_first_message = self.is_first_sync bind_context(self, 'foldersyncengine', account_id, self.folder_id) self.account_id = account_id self.namespace_id = namespace_id self.folder_name = folder_name if self.folder_name.lower() == 'inbox': self.poll_frequency = INBOX_POLL_FREQUENCY else: self.poll_frequency = DEFAULT_POLL_FREQUENCY self.syncmanager_lock = syncmanager_lock self.state = None self.provider_name = provider_name self.last_fast_refresh = None self.flags_fetch_results = {} self.conn_pool = connection_pool(self.account_id) self.state_handlers = { 'initial': self.initial_sync, 'initial uidinvalid': self.resync_uids, 'poll': self.poll, 'poll uidinvalid': self.resync_uids, } Greenlet.__init__(self) self.heartbeat_status = HeartbeatStatusProxy(self.account_id, self.folder_id, self.folder_name, email_address, self.provider_name) # Some generic IMAP servers are throwing UIDVALIDITY # errors forever. Instead of resyncing those servers # ad vitam, we keep track of the number of consecutive # times we got such an error and bail out if it's higher than # MAX_UIDINVALID_RESYNCS. self.uidinvalid_count = 0
def __init__(self, account_id, namespace_id, folder_name, email_address, provider_name, syncmanager_lock, sync_signal): with session_scope(namespace_id) as db_session: try: folder = db_session.query(Folder). \ filter(Folder.name == folder_name, Folder.account_id == account_id).one() except NoResultFound: raise MailsyncError( u"Missing Folder '{}' on account {}".format( folder_name, account_id)) self.folder_id = folder.id self.folder_role = folder.canonical_name # Metric flags for sync performance self.is_initial_sync = folder.initial_sync_end is None self.is_first_sync = folder.initial_sync_start is None self.is_first_message = self.is_first_sync bind_context(self, 'foldersyncengine', account_id, self.folder_id) self.account_id = account_id self.namespace_id = namespace_id self.folder_name = folder_name self.email_address = email_address if self.folder_name.lower() == 'inbox': self.poll_frequency = INBOX_POLL_FREQUENCY else: self.poll_frequency = DEFAULT_POLL_FREQUENCY self.syncmanager_lock = syncmanager_lock self.state = None self.provider_name = provider_name self.last_fast_refresh = None self.flags_fetch_results = {} self.conn_pool = connection_pool(self.account_id) self.sync_signal = sync_signal self.state_handlers = { 'initial': self.initial_sync, 'initial uidinvalid': self.resync_uids, 'poll': self.poll, 'poll uidinvalid': self.resync_uids, } self.setup_heartbeats() Greenlet.__init__(self) # Some generic IMAP servers are throwing UIDVALIDITY # errors forever. Instead of resyncing those servers # ad vitam, we keep track of the number of consecutive # times we got such an error and bail out if it's higher than # MAX_UIDINVALID_RESYNCS. self.uidinvalid_count = 0
def sync(self): """ Start per-folder syncs. Only have one per-folder sync in the 'initial' state at a time. """ with session_scope(ignore_soft_deletes=False) as db_session: with connection_pool(self.account_id).get() as crispin_client: sync_folders = crispin_client.sync_folders() account = db_session.query(ImapAccount)\ .get(self.account_id) save_folder_names(self.log, account, crispin_client.folder_names(), db_session) Tag.create_canonical_tags(account.namespace, db_session) folder_id_for = {name: id_ for id_, name in db_session.query( Folder.id, Folder.name).filter_by(account_id=self.account_id)} saved_states = {name: state for name, state in db_session.query(Folder.name, ImapFolderSyncStatus.state) .join(ImapFolderSyncStatus.folder) .filter(ImapFolderSyncStatus.account_id == self.account_id)} for folder_name in sync_folders: if folder_name not in folder_id_for: self.log.error("Missing Folder object when starting sync", folder_name=folder_name, folder_id_for=folder_id_for) raise MailsyncError("Missing Folder '{}' on account {}" .format(folder_name, self.account_id)) if saved_states.get(folder_name) != 'finish': self.log.info('initializing folder sync') thread = ImapFolderSyncMonitor(self.account_id, folder_name, folder_id_for[folder_name], self.email_address, self.provider, self.shared_state, self.folder_state_handlers, self.retry_fail_classes) thread.start() self.folder_monitors.add(thread) while not self._thread_polling(thread) and \ not self._thread_finished(thread): sleep(self.heartbeat) # Allow individual folder sync monitors to shut themselves down # after completing the initial sync. if self._thread_finished(thread): self.log.info('folder sync finished') # NOTE: Greenlet is automatically removed from the group # after finishing. self.folder_monitors.join()
def check_new_uids(account_id, provider, folder_name, log, uid_download_stack, poll_frequency, syncmanager_lock): """ Check for new UIDs and add them to the download stack. We do this by comparing local UID lists to remote UID lists, maintaining the invariant that (stack uids)+(local uids) == (remote uids). We also remove local messages that have disappeared from the remote, since it's totally probable that users will be archiving mail as the initial sync goes on. We grab a new IMAP connection from the pool for this to isolate its actions from whatever the main greenlet may be doing. Runs until killed. (Intended to be run in a greenlet.) """ log.info("Spinning up new UID-check poller for {}".format(folder_name)) with connection_pool(account_id).get() as crispin_client: with session_scope() as db_session: crispin_client.select_folder(folder_name, uidvalidity_cb( db_session, crispin_client.account_id)) while True: remote_uids = set(crispin_client.all_uids()) # We lock this section to make sure no messages are being # created while we make sure the queue is in a good state. with syncmanager_lock: log.debug("check_new_uids acquired syncmanager_lock") with session_scope(ignore_soft_deletes=False) as db_session: local_uids = set(account.all_uids(account_id, db_session, folder_name)) stack_uids = set(uid_download_stack.queue) local_with_pending_uids = local_uids | stack_uids deleted_uids = remove_deleted_uids( account_id, db_session, log, folder_name, local_uids, remote_uids) log.info("Removed {} deleted UIDs from {}".format( len(deleted_uids), folder_name)) # filter out messages that have disappeared on the remote side new_uid_download_stack = {u for u in uid_download_stack.queue if u in remote_uids} # add in any new uids from the remote for uid in remote_uids: if uid not in local_with_pending_uids: log.debug("adding new message {} to download queue" .format(uid)) new_uid_download_stack.add(uid) uid_download_stack.queue = sorted(new_uid_download_stack, key=int) sleep(poll_frequency)
def main(min_id, max_id, shard_id): maybe_enable_rollbar() generic_accounts = [] failed = [] if min_id is not None or max_id is not None: # Get the list of running Gmail accounts. with global_session_scope() as db_session: generic_accounts = db_session.query(GenericAccount).filter( GenericAccount.sync_state == "running") if min_id is not None: generic_accounts = generic_accounts.filter( GenericAccount.id > min_id) if max_id is not None: generic_accounts = generic_accounts.filter( GenericAccount.id <= max_id) generic_accounts = [acc.id for acc in generic_accounts] db_session.expunge_all() elif shard_id is not None: with session_scope_by_shard_id(shard_id) as db_session: generic_accounts = db_session.query(GenericAccount).filter( GenericAccount.sync_state == "running") generic_accounts = [acc.id for acc in generic_accounts] db_session.expunge_all() print("Total accounts", len(generic_accounts)) for account_id in generic_accounts: try: with session_scope(account_id) as db_session: account = db_session.query(GenericAccount).get(account_id) print("Updating", account.email_address) with connection_pool(account.id).get() as crispin_client: account.folder_prefix = crispin_client.folder_prefix account.folder_separator = crispin_client.folder_separator db_session.commit() except Exception: failed.append(account_id) print("Processed accounts:") print(generic_accounts) print("Failed accounts:") print(failed)
def main(min_id, max_id, shard_id): generic_accounts = [] failed = [] if min_id is not None or max_id is not None: # Get the list of running Gmail accounts. with global_session_scope() as db_session: generic_accounts = db_session.query(GenericAccount).filter( GenericAccount.sync_state == 'running') if min_id is not None: generic_accounts = generic_accounts.filter( GenericAccount.id > min_id) if max_id is not None: generic_accounts = generic_accounts.filter( GenericAccount.id <= max_id) generic_accounts = [acc.id for acc in generic_accounts] db_session.expunge_all() elif shard_id is not None: with session_scope_by_shard_id(shard_id) as db_session: generic_accounts = db_session.query(GenericAccount).filter( GenericAccount.sync_state == 'running') generic_accounts = [acc.id for acc in generic_accounts] db_session.expunge_all() print "Total accounts: %d" % len(generic_accounts) for account_id in generic_accounts: try: with session_scope(account_id) as db_session: account = db_session.query(GenericAccount).get(account_id) print "Updating %s" % account.email_address with connection_pool(account.id).get() as crispin_client: account.folder_prefix = crispin_client.folder_prefix account.folder_separator = crispin_client.folder_separator db_session.commit() except Exception: failed.append(account_id) print "Processed accounts:" print generic_accounts print "Failed accounts:" print failed
def sync(self): """ Start per-folder syncs. Only have one per-folder sync in the 'initial' state at a time. """ with session_scope() as db_session: saved_states = dict() folder_id_for = dict() for saved_state in db_session.query(ImapFolderSyncStatus)\ .filter_by(account_id=self.account_id): saved_states[saved_state.folder.name] = saved_state.state folder_id_for[saved_state.folder.name] = saved_state.folder.id # it's possible we've never started syncs for these folders before for folder_id, folder_name, in \ db_session.query(Folder.id, Folder.name).filter_by( account_id=self.account_id): folder_id_for[folder_name] = folder_id with connection_pool(self.account_id).get() as crispin_client: sync_folders = crispin_client.sync_folders() account = db_session.query(ImapAccount)\ .get(self.account_id) save_folder_names(self.log, account, crispin_client.folder_names(), db_session) Tag.create_canonical_tags(account.namespace, db_session) for folder_name in sync_folders: if saved_states.get(folder_name) != 'finish': self.log.info("Initializing folder sync for {0}" .format(folder_name)) thread = ImapFolderSyncMonitor(self.account_id, folder_name, folder_id_for[folder_name], self.email_address, self.provider, self.shared_state, self.folder_state_handlers, self.retry_fail_classes) thread.start() self.folder_monitors.add(thread) while not self._thread_polling(thread) and \ not self._thread_finished(thread): sleep(self.heartbeat) # Allow individual folder sync monitors to shut themselves down # after completing the initial sync. if self._thread_finished(thread): self.log.info("Folder sync for {} is done." .format(folder_name)) # NOTE: Greenlet is automatically removed from the group # after finishing. self.folder_monitors.join()
def __init__(self, account_id, namespace_id, folder_name, email_address, provider_name, syncmanager_lock): with session_scope(namespace_id) as db_session: try: folder = db_session.query(Folder). \ filter(Folder.name == folder_name, Folder.account_id == account_id).one() except NoResultFound: raise MailsyncError( u"Missing Folder '{}' on account {}".format( folder_name, account_id)) self.folder_id = folder.id self.folder_role = folder.canonical_name # Metric flags for sync performance self.is_initial_sync = folder.initial_sync_end is None self.is_first_sync = folder.initial_sync_start is None self.is_first_message = self.is_first_sync bind_context(self, 'foldersyncengine', account_id, self.folder_id) self.account_id = account_id self.namespace_id = namespace_id self.folder_name = folder_name if self.folder_name.lower() == 'inbox': self.poll_frequency = INBOX_POLL_FREQUENCY else: self.poll_frequency = DEFAULT_POLL_FREQUENCY self.syncmanager_lock = syncmanager_lock self.state = None self.provider_name = provider_name self.last_fast_refresh = None self.flags_fetch_results = {} self.conn_pool = connection_pool(self.account_id) self.state_handlers = { 'initial': self.initial_sync, 'initial uidinvalid': self.resync_uids, 'poll': self.poll, 'poll uidinvalid': self.resync_uids, } Greenlet.__init__(self) self.heartbeat_status = HeartbeatStatusProxy(self.account_id, self.folder_id, self.folder_name, email_address, self.provider_name)
def __init__(self, account_id, folder_name, folder_id, email_address, provider, shared_state, state_handlers): self.account_id = account_id self.folder_name = folder_name self.folder_id = folder_id self.shared_state = shared_state self.state_handlers = state_handlers self.state = None self.conn_pool = connection_pool(self.account_id) self.log = get_logger(account_id, 'mailsync') Greenlet.__init__(self) self.link_value(lambda _: report_stopped(account_id=self.account_id, folder_name=self.folder_name))
def prepare_sync(self): """ Gets and save Folder objects for folders on the IMAP backend. Returns a list of folder names for the folders we want to sync (in order). """ with connection_pool(self.account_id).get() as crispin_client: # Get a fresh list of the folder names from the remote remote_folders = crispin_client.folders() # The folders we should be syncing sync_folders = crispin_client.sync_folders() if self.saved_remote_folders != remote_folders: with session_scope(self.namespace_id) as db_session: self.save_folder_names(db_session, remote_folders) self.saved_remote_folders = remote_folders return sync_folders
def _run_impl(self): self.log.info("Starting LabelRenameHandler", label_name=self.label_name) self.semaphore.acquire(blocking=True) try: with connection_pool(self.account_id).get() as crispin_client: folder_names = [] with session_scope(self.account_id) as db_session: folders = db_session.query(Folder).filter( Folder.account_id == self.account_id) folder_names = [folder.name for folder in folders] db_session.expunge_all() for folder_name in folder_names: crispin_client.select_folder(folder_name, uidvalidity_cb) found_uids = crispin_client.search_uids( ["X-GM-LABELS", utf7_encode(self.label_name)]) for chnk in chunk(found_uids, 200): flags = crispin_client.flags(chnk) self.log.info( "Running metadata update for folder", folder_name=folder_name, ) with session_scope(self.account_id) as db_session: fld = (db_session.query(Folder).options( load_only("id")).filter( Folder.account_id == self.account_id, Folder.name == folder_name, ).one()) common.update_metadata( self.account_id, fld.id, fld.canonical_name, flags, db_session, ) db_session.commit() finally: self.semaphore.release()
def __init__(self, account_id, namespace_id, folder_name, email_address, provider_name, syncmanager_lock): with session_scope(namespace_id) as db_session: try: folder = db_session.query(Folder). \ filter(Folder.name == folder_name, Folder.account_id == account_id).one() except NoResultFound: raise MailsyncError(u"Missing Folder '{}' on account {}" .format(folder_name, account_id)) self.folder_id = folder.id self.folder_role = folder.canonical_name # Metric flags for sync performance self.is_initial_sync = folder.initial_sync_end is None self.is_first_sync = folder.initial_sync_start is None self.is_first_message = self.is_first_sync bind_context(self, 'foldersyncengine', account_id, self.folder_id) self.account_id = account_id self.namespace_id = namespace_id self.folder_name = folder_name if self.folder_name.lower() == 'inbox': self.poll_frequency = INBOX_POLL_FREQUENCY else: self.poll_frequency = DEFAULT_POLL_FREQUENCY self.syncmanager_lock = syncmanager_lock self.state = None self.provider_name = provider_name self.last_fast_refresh = None self.conn_pool = connection_pool(self.account_id) self.state_handlers = { 'initial': self.initial_sync, 'initial uidinvalid': self.resync_uids, 'poll': self.poll, 'poll uidinvalid': self.resync_uids, } Greenlet.__init__(self) self.heartbeat_status = HeartbeatStatusProxy(self.account_id, self.folder_id, self.folder_name, email_address, self.provider_name)
def imap_check_flags(account_id, folder_name, log, poll_frequency, syncmanager_lock, refresh_flags_max): """ Periodically update message flags for those servers who don't support CONDSTORE. Runs until killed. (Intended to be run in a greenlet) Parameters ---------- account_id : String folder_name : String log : Logger poll_frequency : Integer Number of seconds to wait between polls. syncmanager_lock : Locking Context Manager refresh_flags_max : Integer Maximum number of messages to check FLAGS of. """ log.info("Spinning up new flags-refresher for ", folder_name=folder_name) with connection_pool(account_id).get() as crispin_client: with session_scope(ignore_soft_deletes=False) as db_session: crispin_client.select_folder(folder_name, uidvalidity_cb( db_session, crispin_client.account_id)) while True: remote_uids = set(crispin_client.all_uids()) local_uids = set(account.all_uids(account_id, db_session, folder_name)) to_refresh = sorted(remote_uids & local_uids)[-refresh_flags_max:] update_metadata(crispin_client, db_session, log, folder_name, to_refresh, syncmanager_lock) update_uid_counts(db_session, log, crispin_client.account_id, folder_name, update_uid_count=len(to_refresh)) sleep(poll_frequency)
def __init__(self, account_id, folder_name, folder_id, email_address, provider, shared_state, state_handlers, retry_fail_classes): self.account_id = account_id self.folder_name = folder_name self.folder_id = folder_id self.shared_state = shared_state self.state_handlers = state_handlers self.state = None self.conn_pool = connection_pool(self.account_id) self.retry_fail_classes = retry_fail_classes self.log = logger.new(account_id=account_id, folder=folder_name) Greenlet.__init__(self) self.link_value(lambda _: report_stopped(account_id=self.account_id, folder_name=self.folder_name))
def __init__(self, account_id, folder_name, folder_id, email_address, provider_name, syncmanager_lock): bind_context(self, 'foldersyncengine', account_id, folder_id) self.account_id = account_id self.folder_name = folder_name self.folder_id = folder_id if self.folder_name.lower() == 'inbox': self.poll_frequency = INBOX_POLL_FREQUENCY else: self.poll_frequency = DEFAULT_POLL_FREQUENCY self.syncmanager_lock = syncmanager_lock self.state = None self.provider_name = provider_name self.last_fast_refresh = None self.conn_pool = connection_pool(self.account_id) # Metric flags for sync performance self.is_initial_sync = False self.is_first_sync = False self.is_first_message = False with session_scope() as db_session: account = Account.get(self.account_id, db_session) self.namespace_id = account.namespace.id assert self.namespace_id is not None, "namespace_id is None" folder = Folder.get(self.folder_id, db_session) if folder: self.is_initial_sync = folder.initial_sync_end is None self.is_first_sync = folder.initial_sync_start is None self.is_first_message = self.is_first_sync self.state_handlers = { 'initial': self.initial_sync, 'initial uidinvalid': self.resync_uids, 'poll': self.poll, 'poll uidinvalid': self.resync_uids, } Greenlet.__init__(self) self.heartbeat_status = HeartbeatStatusProxy(self.account_id, self.folder_id, self.folder_name, email_address, self.provider_name)
def user_console(user_email_address): with session_scope() as db_session: account = db_session.query(Account).filter_by( email_address=user_email_address).one() with connection_pool(account.id, pool_size=1).get() as crispin_client: if account.provider == 'gmail' \ and 'all' in crispin_client.folder_names(): crispin_client.select_folder( crispin_client.folder_names()['all'], uidvalidity_cb) banner = """ You can access the crispin instance with the 'crispin_client' variable. IMAPClient docs are at: http://imapclient.readthedocs.org/en/latest/#imapclient-class-reference """ IPython.embed(banner1=banner)
def _run_impl(self): self.log.info('Starting LabelRenameHandler', label_name=self.label_name) self.semaphore.acquire(blocking=True) try: with connection_pool(self.account_id).get() as crispin_client: folder_names = [] with session_scope(self.account_id) as db_session: folders = db_session.query(Folder).filter( Folder.account_id == self.account_id) folder_names = [folder.name for folder in folders] db_session.expunge_all() for folder_name in folder_names: crispin_client.select_folder(folder_name, uidvalidity_cb) found_uids = crispin_client.search_uids(['X-GM-LABELS', utf7_encode(self.label_name)]) for chnk in chunk(found_uids, 200): flags = crispin_client.flags(chnk) self.log.info('Running metadata update for folder', folder_name=folder_name) with session_scope(self.account_id) as db_session: fld = db_session.query(Folder).options(load_only("id"))\ .filter(Folder.account_id == self.account_id, Folder.name == folder_name).one() common.update_metadata(self.account_id, fld.id, fld.canonical_name, flags, db_session) db_session.commit() finally: self.semaphore.release()
def get_imap_raw_contents(message): account = message.namespace.account if len(message.imapuids) == 0: raise EmailDeletedException("Message was deleted on the backend server.") uid = message.imapuids[0] folder = uid.folder with connection_pool(account.id).get() as crispin_client: crispin_client.select_folder(folder.name, uidvalidity_cb) try: uids = crispin_client.uids([uid.msg_uid]) if len(uids) == 0: raise EmailDeletedException("Message was deleted on the backend server.") return uids[0].body except imapclient.IMAPClient.Error: log.error("Error while fetching raw contents", exc_info=True, logstash_tag='fetching_error') raise EmailFetchException("Couldn't get message from server. " "Please try again in a few minutes.")
def crispin_client(account_id, account_provider): from inbox.crispin import connection_pool return connection_pool(account_id, pool_size=1).get()
def __download_queued_threads(self, crispin_client, message_download_stack): """ Download threads until `message_download_stack` is empty. UIDs and g_metadata that come out of `message_download_stack` are for the _folder that threads are being expanded in_. Threads are downloaded in the order they come out of the stack, which _ought_ to be putting newest threads at the top. Messages are downloaded newest-to-oldest in thread. (Threads are expanded to all messages in the email archive that belong to the threads corresponding to the given uids.) """ num_total_messages = message_download_stack.qsize() log.info(num_total_messages=num_total_messages) log.info('Expanding threads and downloading messages.') # We still need the original crispin connection for progress reporting, # so the easiest thing to do here with the current pooling setup is to # create a new crispin client for querying All Mail. with connection_pool(self.account_id).get() as all_mail_crispin_client: all_mail_crispin_client.select_folder( crispin_client.folder_names()['all'], uidvalidity_cb(self.account_id)) # Since we do thread expansion, for any given thread, even if we # already have the UID in the given GMessage downloaded, we may not # have _every_ message in the thread. We have to expand it and make # sure we have all messages. while not message_download_stack.empty(): message = message_download_stack.get_nowait() # Don't try to re-download any messages that are in the same # thread. (Putting this _before_ the download to guarantee no # context switches happen in the meantime; we _should_ # re-download if another message arrives on the thread.) processed_msgs = [ m for m in message_download_stack.queue if m.g_metadata.thrid == message.g_metadata.thrid ] processed_msgs.append(message) message_download_stack.queue = [ m for m in message_download_stack.queue if m.g_metadata.thrid != message.g_metadata.thrid ] thread_uids = all_mail_crispin_client.expand_thread( message.g_metadata.thrid) thread_g_metadata = all_mail_crispin_client.g_metadata( thread_uids) self.__download_thread(all_mail_crispin_client, thread_g_metadata, message.g_metadata.thrid, thread_uids) # In theory we only ever have one Greenlet modifying ImapUid # entries for a non-All Mail folder, but grab the lock anyway # to be safe. with self.syncmanager_lock: # Since we download msgs from All Mail, we need to # separately make sure we have ImapUids recorded for this # folder (used in progress tracking, queuing, and delete # detection). log.debug('adding imapuid rows', count=len(processed_msgs)) with mailsync_session_scope() as db_session: acc = db_session.query(GmailAccount).get( crispin_client.account_id) for msg in processed_msgs: add_new_imapuid(db_session, msg, self.folder_name, acc) report_progress(self.account_id, self.folder_name, len(processed_msgs), message_download_stack.qsize()) log.info('Message download queue emptied')
def __download_queued_threads(self, crispin_client, message_download_stack): """ Download threads until `message_download_stack` is empty. UIDs and g_metadata that come out of `message_download_stack` are for the _folder that threads are being expanded in_. Threads are downloaded in the order they come out of the stack, which _ought_ to be putting newest threads at the top. Messages are downloaded newest-to-oldest in thread. (Threads are expanded to all messages in the email archive that belong to the threads corresponding to the given uids.) """ num_total_messages = message_download_stack.qsize() log.info(num_total_messages=num_total_messages) log.info('Expanding threads and downloading messages.') # We still need the original crispin connection for progress reporting, # so the easiest thing to do here with the current pooling setup is to # create a new crispin client for querying All Mail. with connection_pool(self.account_id).get() as all_mail_crispin_client: all_mail_crispin_client.select_folder( crispin_client.folder_names()['all'], uidvalidity_cb) # Since we do thread expansion, for any given thread, even if we # already have the UID in the given GMessage downloaded, we may not # have _every_ message in the thread. We have to expand it and make # sure we have all messages. while not message_download_stack.empty(): message = message_download_stack.get() # Don't try to re-download any messages that are in the same # thread. (Putting this _before_ the download to guarantee no # context switches happen in the meantime; we _should_ # re-download if another message arrives on the thread.) msgs_to_process = [m for m in message_download_stack if m.g_metadata.thrid == message.g_metadata.thrid] msgs_to_process.append(message) message_download_stack.discard([ m for m in message_download_stack if m.g_metadata.thrid == message.g_metadata.thrid]) thread_uids = all_mail_crispin_client.expand_thread( message.g_metadata.thrid) thread_g_metadata = all_mail_crispin_client.g_metadata( thread_uids) self.__download_thread(all_mail_crispin_client, thread_g_metadata, message.g_metadata.thrid, thread_uids) # In theory we only ever have one Greenlet modifying ImapUid # entries for a non-All Mail folder, but grab the lock anyway # to be safe. with self.syncmanager_lock: # Since we download msgs from All Mail, we need to # separately make sure we have ImapUids recorded for this # folder (used in progress tracking, queuing, and delete # detection). log.debug('adding imapuid rows', count=len(msgs_to_process)) with mailsync_session_scope() as db_session: acc = db_session.query(GmailAccount).get( crispin_client.account_id) for msg in msgs_to_process: add_new_imapuid(db_session, msg, self.folder_name, acc) report_progress(self.account_id, self.folder_name, len(msgs_to_process), message_download_stack.qsize()) log.info('Message download queue emptied')
def _pool(account_id): """Get a crispin pool, throwing an error if it's invalid.""" try: return connection_pool(account_id) except AuthError: raise MailsyncDone()
def _pool(account_id): """ Get a crispin pool, throwing an error if it's invalid.""" pool = connection_pool(account_id) if not pool.valid: raise MailsyncDone() return pool
def check_new_g_thrids(account_id, provider, folder_name, log, message_download_stack, poll_frequency, syncmanager_lock): """ Check for new X-GM-THRIDs and add them to the download stack. We do this by comparing local UID lists to remote UID lists, maintaining the invariant that (stack uids)+(local uids) == (remote uids). We also remove local messages that have disappeared from the remote, since it's totally probable that users will be archiving mail as the initial sync goes on. We grab a new IMAP connection from the pool for this to isolate its actions from whatever the main greenlet may be doing. Runs until killed. (Intended to be run in a greenlet.) """ with connection_pool(account_id).get() as crispin_client: with session_scope(ignore_soft_deletes=False) as db_session: crispin_client.select_folder(folder_name, uidvalidity_cb( db_session, crispin_client.account_id)) while True: log.info('Checking for new/deleted messages during initial sync.') remote_uids = set(crispin_client.all_uids()) # We lock this section to make sure no messages are being modified # in the database while we make sure the queue is in a good state. with syncmanager_lock: log.debug('check_new_g_thrids acquired syncmanager_lock') with session_scope(ignore_soft_deletes=False) as db_session: local_uids = set(account.all_uids(account_id, db_session, folder_name)) stack_uids = {gm.uid for gm in message_download_stack.queue} local_with_pending_uids = local_uids | stack_uids deleted_uids = remove_deleted_uids( account_id, db_session, log, folder_name, local_uids, remote_uids) log.info(deleted_uid_count=len(deleted_uids)) # filter out messages that have disappeared on the remote side new_message_download_stack = [gm for gm in message_download_stack.queue if gm.uid in remote_uids] # add in any new uids from the remote new_uids = [uid for uid in remote_uids if uid not in local_with_pending_uids] flags = crispin_client.flags(new_uids) g_metadata = crispin_client.g_metadata(new_uids) log.info('adding new messages to download queue', count=min(len(flags), len(g_metadata))) for new_uid in new_uids: # could have disappeared from the folder in the meantime if new_uid in flags and new_uid in g_metadata: new_message_download_stack.append( GMessage(new_uid, g_metadata[new_uid], flags[new_uid].flags, flags[new_uid].labels)) message_download_stack.queue = sorted( new_message_download_stack, key=lambda m: m.uid) log.info('idling', timeout=poll_frequency) crispin_client.conn.idle() crispin_client.conn.idle_check(timeout=poll_frequency) crispin_client.conn.idle_done() log.info('IDLE detected changes or timeout reached')
def __check_new_g_thrids(self, message_download_stack): """ Check for new X-GM-THRIDs and add them to the download stack. We do this by comparing local UID lists to remote UID lists, maintaining the invariant that (stack uids)+(local uids) == (remote uids). We also remove local messages that have disappeared from the remote, since it's totally probable that users will be archiving mail as the initial sync goes on. We grab a new IMAP connection from the pool for this to isolate its actions from whatever the main greenlet may be doing. Runs until killed. (Intended to be run in a greenlet.) """ with connection_pool(self.account_id).get() as crispin_client: crispin_client.select_folder(self.folder_name, uidvalidity_cb(self.account_id)) while True: log.info('Checking for new/deleted messages during initial ' 'sync.') remote_uids = set(crispin_client.all_uids()) # We lock this section to make sure no messages are being # modified in the database while we make sure the queue is in a # good state. with self.syncmanager_lock: with mailsync_session_scope() as db_session: local_uids = common.all_uids(self.account_id, db_session, self.folder_name) stack_uids = { gm.uid for gm in message_download_stack.queue } local_with_pending_uids = local_uids | stack_uids deleted_uids = self.remove_deleted_uids( db_session, local_uids, remote_uids) log.info(deleted_uid_count=len(deleted_uids)) # filter out messages that have disappeared on the remote # side new_message_download_stack = [ gm for gm in message_download_stack.queue if gm.uid in remote_uids ] # add in any new uids from the remote new_uids = [ uid for uid in remote_uids if uid not in local_with_pending_uids ] flags = crispin_client.flags(new_uids) g_metadata = crispin_client.g_metadata(new_uids) log.info('adding new messages to download queue', count=min(len(flags), len(g_metadata))) for new_uid in new_uids: # could have disappeared from the folder in the # meantime if new_uid in flags and new_uid in g_metadata: new_message_download_stack.append( GMessage(new_uid, g_metadata[new_uid], flags[new_uid].flags, flags[new_uid].labels)) message_download_stack.queue = sorted( new_message_download_stack, key=lambda m: m.uid) with mailsync_session_scope() as db_session: self.update_uid_counts( db_session, remote_uid_count=len(remote_uids), download_uid_count=message_download_stack.qsize(), delete_uid_count=len(deleted_uids)) log.info('idling', timeout=self.poll_frequency) crispin_client.conn.idle() crispin_client.conn.idle_check(timeout=self.poll_frequency) crispin_client.conn.idle_done() log.info('IDLE detected changes or timeout reached')