def receive_checkout(dbapi_connection, connection_record, connection_proxy): """Log checkedout and overflow when a connection is checked out""" hostname = gethostname().replace(".", "-") process_name = str(config.get("PROCESS_NAME", "main_process")) if config.get("ENABLE_DB_TXN_METRICS", False): statsd_client.gauge( ".".join( ["dbconn", database_name, hostname, process_name, "checkedout"] ), connection_proxy._pool.checkedout(), ) statsd_client.gauge( ".".join(["dbconn", database_name, hostname, process_name, "overflow"]), connection_proxy._pool.overflow(), ) # Keep track of where and why this connection was checked out. log = get_logger() context = log._context._dict.copy() f, name = find_first_app_frame_and_name( ignores=["sqlalchemy", "inbox.ignition", "inbox.logging"] ) source = "{}:{}".format(name, f.f_lineno) pool_tracker[dbapi_connection] = { "source": source, "context": context, "checkedout_at": time.time(), }
def receive_checkout(dbapi_connection, connection_record, connection_proxy): '''Log checkedout and overflow when a connection is checked out''' hostname = gethostname().replace(".", "-") process_name = str(config.get("PROCESS_NAME", "unknown")) statsd_client.gauge( ".".join([ "dbconn", database_name, hostname, process_name, "checkedout" ]), connection_proxy._pool.checkedout()) statsd_client.gauge( ".".join( ["dbconn", database_name, hostname, process_name, "overflow"]), connection_proxy._pool.overflow()) # Keep track of where and why this connection was checked out. log = get_logger() context = log._context._dict.copy() f, name = find_first_app_frame_and_name( ignores=['sqlalchemy', 'inbox.ignition', 'nylas.logging']) source = '{}:{}'.format(name, f.f_lineno) pool_tracker[dbapi_connection] = { 'source': source, 'context': context, 'checkedout_at': time.time() }
def poll(self, event): # Determine which accounts to sync start_accounts = self.account_ids_to_sync() statsd_client.gauge( "mailsync.account_counts.{}.mailsync-{}.count".format( self.host, self.process_number), len(start_accounts), ) # Perform the appropriate action on each account for account_id in start_accounts: if account_id not in self.syncing_accounts: try: self.start_sync(account_id) except OperationalError: self.log.error("Database error starting account sync", exc_info=True) log_uncaught_errors() stop_accounts = self.account_ids_owned() - set(start_accounts) for account_id in stop_accounts: self.log.info("sync service stopping sync", account_id=account_id) try: self.stop_sync(account_id) except OperationalError: self.log.error("Database error stopping account sync", exc_info=True) log_uncaught_errors()
def poll(self, event): # Determine which accounts to sync start_accounts = self.account_ids_to_sync() statsd_client.gauge( 'mailsync.account_counts.{}.mailsync-{}.count'.format( self.host, self.process_number), len(start_accounts)) # Perform the appropriate action on each account for account_id in start_accounts: if account_id not in self.syncing_accounts: try: self.start_sync(account_id) except OperationalError: self.log.error('Database error starting account sync', exc_info=True) log_uncaught_errors() stop_accounts = self.account_ids_owned() - set(start_accounts) for account_id in stop_accounts: self.log.info('sync service stopping sync', account_id=account_id) try: self.stop_sync(account_id) except OperationalError: self.log.error('Database error stopping account sync', exc_info=True) log_uncaught_errors()
def poll(self): if self.stealing_enabled: r = self.queue_client.claim_next(self.process_identifier) if r: self.log.info('Claimed new account sync', account_id=r) # Determine which accounts to sync start_accounts = self.accounts_to_sync() statsd_client.gauge( 'accounts.{}.mailsync-{}.count'.format(self.host, self.cpu_id), len(start_accounts)) # Perform the appropriate action on each account for account_id in start_accounts: if account_id not in self.syncing_accounts: try: self.start_sync(account_id) except OperationalError: self.log.error('Database error starting account sync', exc_info=True) log_uncaught_errors() stop_accounts = self.syncing_accounts - set(start_accounts) for account_id in stop_accounts: self.log.info('sync service stopping sync', account_id=account_id) try: self.stop_sync(account_id) except OperationalError: self.log.error('Database error stopping account sync', exc_info=True) log_uncaught_errors()
def poll(self): if self.stealing_enabled: r = self.queue_client.claim_next(self.process_identifier) if r: self.log.info('Claimed new account sync', account_id=r) # Determine which accounts to sync start_accounts = self.accounts_to_sync() statsd_client.gauge( 'accounts.{}.mailsync-{}.count'.format(self.host, self.cpu_id), len(start_accounts)) # Perform the appropriate action on each account for account_id in start_accounts: if account_id not in self.syncing_accounts: try: self.start_sync(account_id) except OperationalError: self.log.error('Database error starting account sync', exc_info=True) log_uncaught_errors() stop_accounts = self.syncing_accounts - set(start_accounts) for account_id in stop_accounts: self.log.info('sync service stopping sync', account_id=account_id) try: self.stop_sync(account_id) except OperationalError: self.log.error('Database error stopping account sync', exc_info=True) log_uncaught_errors()
def receive_checkout(dbapi_connection, connection_record, connection_proxy): '''Log checkedout and overflow when a connection is checked out''' hostname = gethostname().replace(".", "-") process_name = str(config.get("PROCESS_NAME", "unknown")) statsd_client.gauge(".".join( ["dbconn", database_name, hostname, process_name, "checkedout"]), connection_proxy._pool.checkedout()) statsd_client.gauge(".".join( ["dbconn", database_name, hostname, process_name, "overflow"]), connection_proxy._pool.overflow()) # Keep track of where and why this connection was checked out. log = get_logger() context = log._context._dict.copy() f, name = find_first_app_frame_and_name(ignores=['sqlalchemy', 'inbox.ignition', 'nylas.logging']) source = '{}:{}'.format(name, f.f_lineno) pool_tracker[dbapi_connection] = { 'source': source, 'context': context, 'checkedout_at': time.time() }
def poll(self): # We really don't want to take on more load than we can bear, so we # need to check the CPU usage before accepting new accounts. # Note that we can't check this for the current core because the kernel # transparently moves programs across cores. usage_per_cpu = psutil.cpu_percent(percpu=True) self.rolling_cpu_counts.append(usage_per_cpu) cpu_averages = self._compute_cpu_average() cpus_over_nominal = all( [cpu_usage > NOMINAL_THRESHOLD for cpu_usage in cpu_averages]) # Conservatively, stop accepting accounts if the CPU usage is over # NOMINAL_THRESHOLD for every core, or if the total # of accounts # being synced by a single process exceeds the threshold. Excessive # concurrency per process can result in lowered database throughput # or availability problems, since many transactions may be held open # at the same time. if self.stealing_enabled and not cpus_over_nominal and \ len(self.syncing_accounts) < MAX_ACCOUNTS_PER_PROCESS: r = self.queue_client.claim_next(self.process_identifier) if r: self.log.info('Claimed new account sync', account_id=r) else: if not self.stealing_enabled: reason = 'stealing disabled' elif cpus_over_nominal: reason = 'CPU too high' else: reason = 'reached max accounts for process' self.log.info('Not claiming new account sync', reason=reason) # Determine which accounts to sync start_accounts = self.accounts_to_sync() statsd_client.gauge( 'accounts.{}.mailsync-{}.count'.format(self.host, self.process_number), len(start_accounts)) # Perform the appropriate action on each account for account_id in start_accounts: if account_id not in self.syncing_accounts: try: self.start_sync(account_id) except OperationalError: self.log.error('Database error starting account sync', exc_info=True) log_uncaught_errors() stop_accounts = self.syncing_accounts - set(start_accounts) for account_id in stop_accounts: self.log.info('sync service stopping sync', account_id=account_id) try: self.stop_sync(account_id) except OperationalError: self.log.error('Database error stopping account sync', exc_info=True) log_uncaught_errors()
def _run_impl(self): self.enqueue_new_accounts() self.unassign_disabled_accounts() statsd_client.gauge('syncqueue.queue.{}.length'.format(self.zone), self.queue_client.qsize()) statsd_client.incr('syncqueue.service.{}.heartbeat'. format(self.zone)) gevent.sleep(self.poll_interval)
def poll(self): # We really don't want to take on more load than we can bear, so we # need to check the CPU usage before accepting new accounts. # Note that we can't check this for the current core because the kernel # transparently moves programs across cores. usage_per_cpu = psutil.cpu_percent(percpu=True) self.rolling_cpu_counts.append(usage_per_cpu) cpu_averages = self._compute_cpu_average() has_overloaded_cpus = all( [cpu_usage > 90.0 for cpu_usage in cpu_averages]) cpus_over_nominal = all( [cpu_usage > 85.0 for cpu_usage in cpu_averages]) # Conservatively, stop accepting accounts if the CPU usage is over 85% # for every core. if self.stealing_enabled and not cpus_over_nominal: r = self.queue_client.claim_next(self.process_identifier) if r: self.log.info('Claimed new account sync', account_id=r) if has_overloaded_cpus: # Unload a single account. acc = self._pick_account() if acc is not None: self.log.info('Overloaded CPU, unloading account', account_id=acc) self.stop_sync(acc) else: self.log.error("Couldn't find an account to unload!") # Determine which accounts to sync start_accounts = self.accounts_to_sync() statsd_client.gauge( 'accounts.{}.mailsync-{}.count'.format(self.host, self.cpu_id), len(start_accounts)) # Perform the appropriate action on each account for account_id in start_accounts: if account_id not in self.syncing_accounts: try: self.start_sync(account_id) except OperationalError: self.log.error('Database error starting account sync', exc_info=True) log_uncaught_errors() stop_accounts = self.syncing_accounts - set(start_accounts) for account_id in stop_accounts: self.log.info('sync service stopping sync', account_id=account_id) try: self.stop_sync(account_id) except OperationalError: self.log.error('Database error stopping account sync', exc_info=True) log_uncaught_errors()
def poll(self): # We really don't want to take on more load than we can bear, so we # need to check the CPU usage before accepting new accounts. # Note that we can't check this for the current core because the kernel # transparently moves programs across cores. usage_per_cpu = psutil.cpu_percent(percpu=True) self.rolling_cpu_counts.append(usage_per_cpu) cpu_averages = self._compute_cpu_average() cpus_over_nominal = all([cpu_usage > NOMINAL_THRESHOLD for cpu_usage in cpu_averages]) # Conservatively, stop accepting accounts if the CPU usage is over # NOMINAL_THRESHOLD for every core, or if the total # of accounts # being synced by a single process exceeds the threshold. Excessive # concurrency per process can result in lowered database throughput # or availability problems, since many transactions may be held open # at the same time. if self.stealing_enabled and not cpus_over_nominal and \ len(self.syncing_accounts) < MAX_ACCOUNTS_PER_PROCESS: r = self.queue_client.claim_next(self.process_identifier) if r: self.log.info('Claimed new account sync', account_id=r) else: if not self.stealing_enabled: reason = 'stealing disabled' elif cpus_over_nominal: reason = 'CPU too high' else: reason = 'reached max accounts for process' self.log.info('Not claiming new account sync', reason=reason) # Determine which accounts to sync start_accounts = self.accounts_to_sync() statsd_client.gauge( 'accounts.{}.mailsync-{}.count'.format( self.host, self.process_number), len(start_accounts)) # Perform the appropriate action on each account for account_id in start_accounts: if account_id not in self.syncing_accounts: try: self.start_sync(account_id) except OperationalError: self.log.error('Database error starting account sync', exc_info=True) log_uncaught_errors() stop_accounts = self.syncing_accounts - set(start_accounts) for account_id in stop_accounts: self.log.info('sync service stopping sync', account_id=account_id) try: self.stop_sync(account_id) except OperationalError: self.log.error('Database error stopping account sync', exc_info=True) log_uncaught_errors()
def poll(self): # We really don't want to take on more load than we can bear, so we # need to check the CPU usage before accepting new accounts. # Note that we can't check this for the current core because the kernel # transparently moves programs across cores. usage_per_cpu = psutil.cpu_percent(percpu=True) self.rolling_cpu_counts.append(usage_per_cpu) cpu_averages = self._compute_cpu_average() has_overloaded_cpus = all([cpu_usage > 90.0 for cpu_usage in cpu_averages]) cpus_over_nominal = all([cpu_usage > 85.0 for cpu_usage in cpu_averages]) # Conservatively, stop accepting accounts if the CPU usage is over 85% # for every core. if self.stealing_enabled and not cpus_over_nominal: r = self.queue_client.claim_next(self.process_identifier) if r: self.log.info('Claimed new account sync', account_id=r) if has_overloaded_cpus: # Unload a single account. acc = self._pick_account() if acc is not None: self.log.info('Overloaded CPU, unloading account', account_id=acc) self.stop_sync(acc) else: self.log.error("Couldn't find an account to unload!") # Determine which accounts to sync start_accounts = self.accounts_to_sync() statsd_client.gauge( 'accounts.{}.mailsync-{}.count'.format(self.host, self.cpu_id), len(start_accounts)) # Perform the appropriate action on each account for account_id in start_accounts: if account_id not in self.syncing_accounts: try: self.start_sync(account_id) except OperationalError: self.log.error('Database error starting account sync', exc_info=True) log_uncaught_errors() stop_accounts = self.syncing_accounts - set(start_accounts) for account_id in stop_accounts: self.log.info('sync service stopping sync', account_id=account_id) try: self.stop_sync(account_id) except OperationalError: self.log.error('Database error stopping account sync', exc_info=True) log_uncaught_errors()
def delete_marked_accounts(shard_id, throttle=False, dry_run=False): start = time.time() deleted_count = 0 ids_to_delete = [] with session_scope_by_shard_id(shard_id) as db_session: ids_to_delete = [(acc.id, acc.namespace.id) for acc in db_session.query(Account) if acc.is_deleted] queue_size = len(ids_to_delete) for account_id, namespace_id in ids_to_delete: # queue_size = length of queue # deleted_count = number of accounts deleted during loop iteration # this is necessary because the length of ids_to_delete doesn't # change during loop iteration statsd_client.gauge('mailsync.{}.account_deletion.queue.length' .format(shard_id), queue_size - deleted_count) try: with session_scope(namespace_id) as db_session: account = db_session.query(Account).get(account_id) if not account: log.critical('Account with does not exist', account_id=account_id) continue if account.sync_should_run or not account.is_deleted: log.warn('Account NOT marked for deletion. ' 'Will not delete', account_id=account_id) continue log.info('Deleting account', account_id=account_id) start_time = time.time() # Delete data in database try: log.info('Deleting database data', account_id=account_id) delete_namespace(account_id, namespace_id, throttle=throttle, dry_run=dry_run) except Exception as e: log.critical('Database data deletion failed', error=e, account_id=account_id) continue # Delete liveness data log.debug('Deleting liveness data', account_id=account_id) clear_heartbeat_status(account_id) deleted_count += 1 statsd_client.incr('mailsync.account_deletion.queue.deleted', 1) statsd_client.timing('mailsync.account_deletion.queue.deleted', time.time() - start_time) except Exception: log_uncaught_errors(log, account_id=account_id) end = time.time() log.info('All data deleted successfully', shard_id=shard_id, time=end - start, count=deleted_count)
def _run_impl(self): log.info('Queueing accounts', zone=self.zone, shards=self.shards) while True: self.enqueue_new_accounts() self.unassign_disabled_accounts() statsd_client.gauge('syncqueue.queue.{}.length'.format(self.zone), self.queue_client.qsize()) statsd_client.incr('syncqueue.service.{}.heartbeat'. format(self.zone)) gevent.sleep(self.poll_interval)
def _run_impl(self): log.info('Queueing accounts', zone=self.zone, shards=self.shards) while True: self.enqueue_new_accounts() self.unassign_disabled_accounts() statsd_client.gauge('syncqueue.queue.{}.length'.format(self.zone), self.queue_client.qsize()) statsd_client.incr('syncqueue.service.{}.heartbeat'.format( self.zone)) gevent.sleep(self.poll_interval)
def receive_checkout(dbapi_connection, connection_record, connection_proxy): """Log checkedout and overflow when a connection is checked out""" hostname = gethostname().replace(".", "-") process_name = str(config.get("PROCESS_NAME", "unknown")) statsd_client.gauge( ".".join(["dbconn", database_name, hostname, process_name, "checkedout"]), connection_proxy._pool.checkedout(), ) statsd_client.gauge( ".".join(["dbconn", database_name, hostname, process_name, "overflow"]), connection_proxy._pool.overflow() )
def receive_checkout(dbapi_connection, connection_record, connection_proxy): '''Log checkedout and overflow when a connection is checked out''' hostname = gethostname().replace(".", "-") process_name = str(config.get("PROCESS_NAME", "unknown")) statsd_client.gauge( ".".join([ "dbconn", database_name, hostname, process_name, "checkedout" ]), connection_proxy._pool.checkedout()) statsd_client.gauge( ".".join( ["dbconn", database_name, hostname, process_name, "overflow"]), connection_proxy._pool.overflow())
def poll(self): # We really don't want to take on more load than we can bear, so we need # to check the CPU usage before accepting new accounts. # Note that we can't check this for the current core because the kernel # transparently moves programs across cores. usage_per_cpu = psutil.cpu_percent(percpu=True) # Conservatively, stop accepting accounts if the CPU usage is over 90% # for every core. overloaded_cpus = all( [cpu_usage > 90.0 for cpu_usage in usage_per_cpu]) if self.stealing_enabled and not overloaded_cpus: r = self.queue_client.claim_next(self.process_identifier) if r: self.log.info('Claimed new account sync', account_id=r) # Determine which accounts to sync start_accounts = self.accounts_to_sync() statsd_client.gauge( 'accounts.{}.mailsync-{}.count'.format(self.host, self.cpu_id), len(start_accounts)) # Perform the appropriate action on each account for account_id in start_accounts: if account_id not in self.syncing_accounts: try: self.start_sync(account_id) except OperationalError: self.log.error('Database error starting account sync', exc_info=True) log_uncaught_errors() stop_accounts = self.syncing_accounts - set(start_accounts) for account_id in stop_accounts: self.log.info('sync service stopping sync', account_id=account_id) try: self.stop_sync(account_id) except OperationalError: self.log.error('Database error stopping account sync', exc_info=True) log_uncaught_errors()
def poll(self): # We really don't want to take on more load than we can bear, so we need # to check the CPU usage before accepting new accounts. # Note that we can't check this for the current core because the kernel # transparently moves programs across cores. usage_per_cpu = psutil.cpu_percent(percpu=True) # Conservatively, stop accepting accounts if the CPU usage is over 90% # for every core. overloaded_cpus = all([cpu_usage > 90.0 for cpu_usage in usage_per_cpu]) if self.stealing_enabled and not overloaded_cpus: r = self.queue_client.claim_next(self.process_identifier) if r: self.log.info('Claimed new account sync', account_id=r) # Determine which accounts to sync start_accounts = self.accounts_to_sync() statsd_client.gauge( 'accounts.{}.mailsync-{}.count'.format(self.host, self.cpu_id), len(start_accounts)) # Perform the appropriate action on each account for account_id in start_accounts: if account_id not in self.syncing_accounts: try: self.start_sync(account_id) except OperationalError: self.log.error('Database error starting account sync', exc_info=True) log_uncaught_errors() stop_accounts = self.syncing_accounts - set(start_accounts) for account_id in stop_accounts: self.log.info('sync service stopping sync', account_id=account_id) try: self.stop_sync(account_id) except OperationalError: self.log.error('Database error stopping account sync', exc_info=True) log_uncaught_errors()
def report_progress(account_id, folder_name, downloaded_uid_count, num_remaining_messages): """ Inform listeners of sync progress. """ with mailsync_session_scope() as db_session: saved_status = db_session.query(ImapFolderSyncStatus).join(Folder)\ .filter( ImapFolderSyncStatus.account_id == account_id, Folder.name == folder_name).one() previous_count = saved_status.metrics.get( 'num_downloaded_since_timestamp', 0) metrics = dict(num_downloaded_since_timestamp=(previous_count + downloaded_uid_count), download_uid_count=num_remaining_messages, queue_checked_at=datetime.utcnow()) saved_status.update_metrics(metrics) db_session.commit() statsd_client.gauge( ".".join(["accounts", str(account_id), "messages_downloaded"]), metrics.get("num_downloaded_since_timestamp"))
def report_progress(account_id, folder_name, downloaded_uid_count, num_remaining_messages): """ Inform listeners of sync progress. """ with mailsync_session_scope() as db_session: saved_status = db_session.query(ImapFolderSyncStatus).join(Folder)\ .filter( ImapFolderSyncStatus.account_id == account_id, Folder.name == folder_name).one() previous_count = saved_status.metrics.get( 'num_downloaded_since_timestamp', 0) metrics = dict(num_downloaded_since_timestamp=(previous_count + downloaded_uid_count), download_uid_count=num_remaining_messages, queue_checked_at=datetime.utcnow()) saved_status.update_metrics(metrics) db_session.commit() statsd_client.gauge( ".".join(["accounts", str(account_id), "messages_downloaded"]), metrics.get("num_downloaded_since_timestamp"))
def _run_impl(self): self.enqueue_new_accounts() self.unassign_disabled_accounts() statsd_client.gauge("syncqueue.queue.{}.length".format(self.zone), self.queue_client.qsize()) statsd_client.incr("syncqueue.service.{}.heartbeat".format(self.zone)) gevent.sleep(self.poll_interval)
def initial_sync_impl(self, crispin_client): assert crispin_client.selected_folder_name == self.folder_name remote_uids = crispin_client.all_uids() uids = sorted(remote_uids, reverse=True) starting_uid = None with session_scope(self.namespace_id) as db_session: account = db_session.query(Account).get(self.account_id) s3_resync_status = account._sync_status.get( 's3_resync_status', {}) folder_id = str(self.folder_id) if folder_id in s3_resync_status: folder_status = s3_resync_status[folder_id] resync_status = folder_status.get('status') # We've synced everything we had to sync. if resync_status == 'done': raise MailsyncDone() starting_uid = s3_resync_status[folder_id].get( 'last_synced_uid') if starting_uid is not None: # We're not starting from zero try: i = uids.index(starting_uid) uids = uids[i:] except ValueError: pass # We need the provider and account id to ship per-account # data to statsd. with session_scope(self.namespace_id) as db_session: account = db_session.query(Account).get(self.account_id) statsd_prefix = '.'.join(['s3_resync', account.provider, str(account.id), str(self.folder_id)]) statsd_client.gauge(statsd_prefix + '.messages_total', len(remote_uids)) remaining_messages = len(uids) statsd_client.gauge(statsd_prefix + '.remaining_messages', remaining_messages) if len(uids) == 0: log.info('Done syncing to S3', account_id=self.account_id) self._update_uid_resync_status(status='done') raise MailsyncDone() for chnk in chunk(uids, BATCH_SIZE): to_download = [uid for uid in chnk if _message_missing_s3_object( self.account_id, self.folder_id, uid)] self.download_and_commit_uids(crispin_client, to_download) # FIXME: publish some heartbeats. log.info('Resynced another batch of uids. Updating position.', batch_size=BATCH_SIZE, position=chnk[-1]) self._update_uid_resync_status(uid=chnk[-1]) remaining_messages -= BATCH_SIZE statsd_client.gauge(statsd_prefix + '.remaining_messages', remaining_messages) sleep(S3_RESYNC_FREQUENCY) self._update_uid_resync_status(status='done') raise MailsyncDone()
def initial_sync_impl(self, crispin_client): assert crispin_client.selected_folder_name == self.folder_name remote_uids = crispin_client.all_uids() uids = sorted(remote_uids, reverse=True) starting_uid = None with session_scope(self.namespace_id) as db_session: account = db_session.query(Account).get(self.account_id) s3_resync_status = account._sync_status.get( 's3_resync_status', {}) folder_id = str(self.folder_id) if folder_id in s3_resync_status: folder_status = s3_resync_status[folder_id] resync_status = folder_status.get('status') # We've synced everything we had to sync. if resync_status == 'done': raise MailsyncDone() starting_uid = s3_resync_status[folder_id].get( 'last_synced_uid') if starting_uid is not None: # We're not starting from zero try: i = uids.index(starting_uid) uids = uids[i:] except ValueError: pass # We need the provider and account id to ship per-account # data to statsd. with session_scope(self.namespace_id) as db_session: account = db_session.query(Account).get(self.account_id) statsd_prefix = '.'.join(['s3_resync', account.provider, str(account.id), str(self.folder_id)]) statsd_client.gauge(statsd_prefix + '.messages_total', len(remote_uids)) remaining_messages = len(uids) statsd_client.gauge(statsd_prefix + '.remaining_messages', remaining_messages) if len(uids) == 0: log.info('Done syncing to S3', account_id=self.account_id) self._update_uid_resync_status(status='done') raise MailsyncDone() for chnk in chunk(uids, BATCH_SIZE): to_download = [uid for uid in chnk if _message_missing_s3_object( self.account_id, self.folder_id, uid)] self.download_and_commit_uids(crispin_client, to_download) # FIXME: publish some heartbeats. log.info('Resynced another batch of uids. Updating position.', batch_size=BATCH_SIZE, position=chnk[-1]) self._update_uid_resync_status(uid=chnk[-1]) remaining_messages -= BATCH_SIZE statsd_client.gauge(statsd_prefix + '.remaining_messages', remaining_messages) sleep(S3_RESYNC_FREQUENCY) self._update_uid_resync_status(status='done') raise MailsyncDone()
def delete_marked_accounts(shard_id, throttle=False, dry_run=False): start = time.time() deleted_count = 0 ids_to_delete = [] with session_scope_by_shard_id(shard_id) as db_session: ids_to_delete = [(acc.id, acc.namespace.id) for acc in db_session.query(Account) if acc.is_deleted] queue_size = len(ids_to_delete) for account_id, namespace_id in ids_to_delete: # queue_size = length of queue # deleted_count = number of accounts deleted during loop iteration # this is necessary because the length of ids_to_delete doesn't # change during loop iteration statsd_client.gauge( 'mailsync.{}.account_deletion.queue.length'.format(shard_id), queue_size - deleted_count) try: with session_scope(namespace_id) as db_session: account = db_session.query(Account).get(account_id) if not account: log.critical('Account with does not exist', account_id=account_id) continue if account.sync_should_run or not account.is_deleted: log.warn( 'Account NOT marked for deletion. ' 'Will not delete', account_id=account_id) continue log.info('Deleting account', account_id=account_id) start_time = time.time() # Delete data in database try: log.info('Deleting database data', account_id=account_id) delete_namespace(account_id, namespace_id, throttle=throttle, dry_run=dry_run) except Exception as e: log.critical('Database data deletion failed', error=e, account_id=account_id) continue # Delete liveness data log.debug('Deleting liveness data', account_id=account_id) clear_heartbeat_status(account_id) deleted_count += 1 statsd_client.incr('mailsync.account_deletion.queue.deleted', 1) statsd_client.timing('mailsync.account_deletion.queue.deleted', time.time() - start_time) except Exception: log_uncaught_errors(log, account_id=account_id) end = time.time() log.info('All data deleted successfully', shard_id=shard_id, time=end - start, count=deleted_count)