Пример #1
0
    def receive_checkout(dbapi_connection, connection_record, connection_proxy):
        """Log checkedout and overflow when a connection is checked out"""
        hostname = gethostname().replace(".", "-")
        process_name = str(config.get("PROCESS_NAME", "main_process"))

        if config.get("ENABLE_DB_TXN_METRICS", False):
            statsd_client.gauge(
                ".".join(
                    ["dbconn", database_name, hostname, process_name, "checkedout"]
                ),
                connection_proxy._pool.checkedout(),
            )

            statsd_client.gauge(
                ".".join(["dbconn", database_name, hostname, process_name, "overflow"]),
                connection_proxy._pool.overflow(),
            )

        # Keep track of where and why this connection was checked out.
        log = get_logger()
        context = log._context._dict.copy()
        f, name = find_first_app_frame_and_name(
            ignores=["sqlalchemy", "inbox.ignition", "inbox.logging"]
        )
        source = "{}:{}".format(name, f.f_lineno)

        pool_tracker[dbapi_connection] = {
            "source": source,
            "context": context,
            "checkedout_at": time.time(),
        }
Пример #2
0
    def receive_checkout(dbapi_connection, connection_record,
                         connection_proxy):
        '''Log checkedout and overflow when a connection is checked out'''
        hostname = gethostname().replace(".", "-")
        process_name = str(config.get("PROCESS_NAME", "unknown"))

        statsd_client.gauge(
            ".".join([
                "dbconn", database_name, hostname, process_name, "checkedout"
            ]), connection_proxy._pool.checkedout())

        statsd_client.gauge(
            ".".join(
                ["dbconn", database_name, hostname, process_name, "overflow"]),
            connection_proxy._pool.overflow())

        # Keep track of where and why this connection was checked out.
        log = get_logger()
        context = log._context._dict.copy()
        f, name = find_first_app_frame_and_name(
            ignores=['sqlalchemy', 'inbox.ignition', 'nylas.logging'])
        source = '{}:{}'.format(name, f.f_lineno)

        pool_tracker[dbapi_connection] = {
            'source': source,
            'context': context,
            'checkedout_at': time.time()
        }
Пример #3
0
    def poll(self, event):
        # Determine which accounts to sync
        start_accounts = self.account_ids_to_sync()
        statsd_client.gauge(
            "mailsync.account_counts.{}.mailsync-{}.count".format(
                self.host, self.process_number),
            len(start_accounts),
        )

        # Perform the appropriate action on each account
        for account_id in start_accounts:
            if account_id not in self.syncing_accounts:
                try:
                    self.start_sync(account_id)
                except OperationalError:
                    self.log.error("Database error starting account sync",
                                   exc_info=True)
                    log_uncaught_errors()

        stop_accounts = self.account_ids_owned() - set(start_accounts)
        for account_id in stop_accounts:
            self.log.info("sync service stopping sync", account_id=account_id)
            try:
                self.stop_sync(account_id)
            except OperationalError:
                self.log.error("Database error stopping account sync",
                               exc_info=True)
                log_uncaught_errors()
Пример #4
0
    def poll(self, event):
        # Determine which accounts to sync
        start_accounts = self.account_ids_to_sync()
        statsd_client.gauge(
            'mailsync.account_counts.{}.mailsync-{}.count'.format(
                self.host, self.process_number), len(start_accounts))

        # Perform the appropriate action on each account
        for account_id in start_accounts:
            if account_id not in self.syncing_accounts:
                try:
                    self.start_sync(account_id)
                except OperationalError:
                    self.log.error('Database error starting account sync',
                                   exc_info=True)
                    log_uncaught_errors()

        stop_accounts = self.account_ids_owned() - set(start_accounts)
        for account_id in stop_accounts:
            self.log.info('sync service stopping sync',
                          account_id=account_id)
            try:
                self.stop_sync(account_id)
            except OperationalError:
                self.log.error('Database error stopping account sync',
                               exc_info=True)
                log_uncaught_errors()
Пример #5
0
    def poll(self):
        if self.stealing_enabled:
            r = self.queue_client.claim_next(self.process_identifier)
            if r:
                self.log.info('Claimed new account sync', account_id=r)

        # Determine which accounts to sync
        start_accounts = self.accounts_to_sync()
        statsd_client.gauge(
            'accounts.{}.mailsync-{}.count'.format(self.host, self.cpu_id),
            len(start_accounts))

        # Perform the appropriate action on each account
        for account_id in start_accounts:
            if account_id not in self.syncing_accounts:
                try:
                    self.start_sync(account_id)
                except OperationalError:
                    self.log.error('Database error starting account sync',
                                   exc_info=True)
                    log_uncaught_errors()

        stop_accounts = self.syncing_accounts - set(start_accounts)
        for account_id in stop_accounts:
            self.log.info('sync service stopping sync', account_id=account_id)
            try:
                self.stop_sync(account_id)
            except OperationalError:
                self.log.error('Database error stopping account sync',
                               exc_info=True)
                log_uncaught_errors()
Пример #6
0
    def poll(self):
        if self.stealing_enabled:
            r = self.queue_client.claim_next(self.process_identifier)
            if r:
                self.log.info('Claimed new account sync', account_id=r)

        # Determine which accounts to sync
        start_accounts = self.accounts_to_sync()
        statsd_client.gauge(
            'accounts.{}.mailsync-{}.count'.format(self.host, self.cpu_id),
            len(start_accounts))

        # Perform the appropriate action on each account
        for account_id in start_accounts:
            if account_id not in self.syncing_accounts:
                try:
                    self.start_sync(account_id)
                except OperationalError:
                    self.log.error('Database error starting account sync',
                                   exc_info=True)
                    log_uncaught_errors()

        stop_accounts = self.syncing_accounts - set(start_accounts)
        for account_id in stop_accounts:
            self.log.info('sync service stopping sync',
                          account_id=account_id)
            try:
                self.stop_sync(account_id)
            except OperationalError:
                self.log.error('Database error stopping account sync',
                               exc_info=True)
                log_uncaught_errors()
Пример #7
0
    def receive_checkout(dbapi_connection, connection_record,
                         connection_proxy):
        '''Log checkedout and overflow when a connection is checked out'''
        hostname = gethostname().replace(".", "-")
        process_name = str(config.get("PROCESS_NAME", "unknown"))

        statsd_client.gauge(".".join(
            ["dbconn", database_name, hostname, process_name,
             "checkedout"]),
            connection_proxy._pool.checkedout())

        statsd_client.gauge(".".join(
            ["dbconn", database_name, hostname, process_name,
             "overflow"]),
            connection_proxy._pool.overflow())

        # Keep track of where and why this connection was checked out.
        log = get_logger()
        context = log._context._dict.copy()
        f, name = find_first_app_frame_and_name(ignores=['sqlalchemy',
                                                         'inbox.ignition',
                                                         'nylas.logging'])
        source = '{}:{}'.format(name, f.f_lineno)

        pool_tracker[dbapi_connection] = {
            'source': source,
            'context': context,
            'checkedout_at': time.time()
        }
Пример #8
0
    def poll(self):
        # We really don't want to take on more load than we can bear, so we
        # need to check the CPU usage before accepting new accounts.
        # Note that we can't check this for the current core because the kernel
        # transparently moves programs across cores.
        usage_per_cpu = psutil.cpu_percent(percpu=True)
        self.rolling_cpu_counts.append(usage_per_cpu)

        cpu_averages = self._compute_cpu_average()

        cpus_over_nominal = all(
            [cpu_usage > NOMINAL_THRESHOLD for cpu_usage in cpu_averages])

        # Conservatively, stop accepting accounts if the CPU usage is over
        # NOMINAL_THRESHOLD for every core, or if the total # of accounts
        # being synced by a single process exceeds the threshold. Excessive
        # concurrency per process can result in lowered database throughput
        # or availability problems, since many transactions may be held open
        # at the same time.
        if self.stealing_enabled and not cpus_over_nominal and \
                len(self.syncing_accounts) < MAX_ACCOUNTS_PER_PROCESS:
            r = self.queue_client.claim_next(self.process_identifier)
            if r:
                self.log.info('Claimed new account sync', account_id=r)
        else:
            if not self.stealing_enabled:
                reason = 'stealing disabled'
            elif cpus_over_nominal:
                reason = 'CPU too high'
            else:
                reason = 'reached max accounts for process'
            self.log.info('Not claiming new account sync', reason=reason)

        # Determine which accounts to sync
        start_accounts = self.accounts_to_sync()
        statsd_client.gauge(
            'accounts.{}.mailsync-{}.count'.format(self.host,
                                                   self.process_number),
            len(start_accounts))

        # Perform the appropriate action on each account
        for account_id in start_accounts:
            if account_id not in self.syncing_accounts:
                try:
                    self.start_sync(account_id)
                except OperationalError:
                    self.log.error('Database error starting account sync',
                                   exc_info=True)
                    log_uncaught_errors()

        stop_accounts = self.syncing_accounts - set(start_accounts)
        for account_id in stop_accounts:
            self.log.info('sync service stopping sync', account_id=account_id)
            try:
                self.stop_sync(account_id)
            except OperationalError:
                self.log.error('Database error stopping account sync',
                               exc_info=True)
                log_uncaught_errors()
Пример #9
0
 def _run_impl(self):
     self.enqueue_new_accounts()
     self.unassign_disabled_accounts()
     statsd_client.gauge('syncqueue.queue.{}.length'.format(self.zone),
                         self.queue_client.qsize())
     statsd_client.incr('syncqueue.service.{}.heartbeat'.
                        format(self.zone))
     gevent.sleep(self.poll_interval)
Пример #10
0
    def poll(self):
        # We really don't want to take on more load than we can bear, so we
        # need to check the CPU usage before accepting new accounts.
        # Note that we can't check this for the current core because the kernel
        # transparently moves programs across cores.
        usage_per_cpu = psutil.cpu_percent(percpu=True)
        self.rolling_cpu_counts.append(usage_per_cpu)

        cpu_averages = self._compute_cpu_average()

        has_overloaded_cpus = all(
            [cpu_usage > 90.0 for cpu_usage in cpu_averages])
        cpus_over_nominal = all(
            [cpu_usage > 85.0 for cpu_usage in cpu_averages])

        # Conservatively, stop accepting accounts if the CPU usage is over 85%
        # for every core.
        if self.stealing_enabled and not cpus_over_nominal:
            r = self.queue_client.claim_next(self.process_identifier)
            if r:
                self.log.info('Claimed new account sync', account_id=r)

        if has_overloaded_cpus:
            # Unload a single account.
            acc = self._pick_account()

            if acc is not None:
                self.log.info('Overloaded CPU, unloading account',
                              account_id=acc)
                self.stop_sync(acc)
            else:
                self.log.error("Couldn't find an account to unload!")

        # Determine which accounts to sync
        start_accounts = self.accounts_to_sync()
        statsd_client.gauge(
            'accounts.{}.mailsync-{}.count'.format(self.host, self.cpu_id),
            len(start_accounts))

        # Perform the appropriate action on each account
        for account_id in start_accounts:
            if account_id not in self.syncing_accounts:
                try:
                    self.start_sync(account_id)
                except OperationalError:
                    self.log.error('Database error starting account sync',
                                   exc_info=True)
                    log_uncaught_errors()

        stop_accounts = self.syncing_accounts - set(start_accounts)
        for account_id in stop_accounts:
            self.log.info('sync service stopping sync', account_id=account_id)
            try:
                self.stop_sync(account_id)
            except OperationalError:
                self.log.error('Database error stopping account sync',
                               exc_info=True)
                log_uncaught_errors()
Пример #11
0
    def poll(self):
        # We really don't want to take on more load than we can bear, so we
        # need to check the CPU usage before accepting new accounts.
        # Note that we can't check this for the current core because the kernel
        # transparently moves programs across cores.
        usage_per_cpu = psutil.cpu_percent(percpu=True)
        self.rolling_cpu_counts.append(usage_per_cpu)

        cpu_averages = self._compute_cpu_average()

        cpus_over_nominal = all([cpu_usage > NOMINAL_THRESHOLD for cpu_usage in cpu_averages])

        # Conservatively, stop accepting accounts if the CPU usage is over
        # NOMINAL_THRESHOLD for every core, or if the total # of accounts
        # being synced by a single process exceeds the threshold. Excessive
        # concurrency per process can result in lowered database throughput
        # or availability problems, since many transactions may be held open
        # at the same time.
        if self.stealing_enabled and not cpus_over_nominal and \
                len(self.syncing_accounts) < MAX_ACCOUNTS_PER_PROCESS:
            r = self.queue_client.claim_next(self.process_identifier)
            if r:
                self.log.info('Claimed new account sync', account_id=r)
        else:
            if not self.stealing_enabled:
                reason = 'stealing disabled'
            elif cpus_over_nominal:
                reason = 'CPU too high'
            else:
                reason = 'reached max accounts for process'
            self.log.info('Not claiming new account sync', reason=reason)

        # Determine which accounts to sync
        start_accounts = self.accounts_to_sync()
        statsd_client.gauge(
            'accounts.{}.mailsync-{}.count'.format(
                self.host, self.process_number), len(start_accounts))

        # Perform the appropriate action on each account
        for account_id in start_accounts:
            if account_id not in self.syncing_accounts:
                try:
                    self.start_sync(account_id)
                except OperationalError:
                    self.log.error('Database error starting account sync',
                                   exc_info=True)
                    log_uncaught_errors()

        stop_accounts = self.syncing_accounts - set(start_accounts)
        for account_id in stop_accounts:
            self.log.info('sync service stopping sync',
                          account_id=account_id)
            try:
                self.stop_sync(account_id)
            except OperationalError:
                self.log.error('Database error stopping account sync',
                               exc_info=True)
                log_uncaught_errors()
Пример #12
0
    def poll(self):
        # We really don't want to take on more load than we can bear, so we
        # need to check the CPU usage before accepting new accounts.
        # Note that we can't check this for the current core because the kernel
        # transparently moves programs across cores.
        usage_per_cpu = psutil.cpu_percent(percpu=True)
        self.rolling_cpu_counts.append(usage_per_cpu)

        cpu_averages = self._compute_cpu_average()

        has_overloaded_cpus = all([cpu_usage > 90.0 for cpu_usage in cpu_averages])
        cpus_over_nominal = all([cpu_usage > 85.0 for cpu_usage in cpu_averages])

        # Conservatively, stop accepting accounts if the CPU usage is over 85%
        # for every core.
        if self.stealing_enabled and not cpus_over_nominal:
            r = self.queue_client.claim_next(self.process_identifier)
            if r:
                self.log.info('Claimed new account sync', account_id=r)

        if has_overloaded_cpus:
            # Unload a single account.
            acc = self._pick_account()

            if acc is not None:
                self.log.info('Overloaded CPU, unloading account',
                              account_id=acc)
                self.stop_sync(acc)
            else:
                self.log.error("Couldn't find an account to unload!")

        # Determine which accounts to sync
        start_accounts = self.accounts_to_sync()
        statsd_client.gauge(
            'accounts.{}.mailsync-{}.count'.format(self.host, self.cpu_id),
            len(start_accounts))

        # Perform the appropriate action on each account
        for account_id in start_accounts:
            if account_id not in self.syncing_accounts:
                try:
                    self.start_sync(account_id)
                except OperationalError:
                    self.log.error('Database error starting account sync',
                                   exc_info=True)
                    log_uncaught_errors()

        stop_accounts = self.syncing_accounts - set(start_accounts)
        for account_id in stop_accounts:
            self.log.info('sync service stopping sync',
                          account_id=account_id)
            try:
                self.stop_sync(account_id)
            except OperationalError:
                self.log.error('Database error stopping account sync',
                               exc_info=True)
                log_uncaught_errors()
Пример #13
0
def delete_marked_accounts(shard_id, throttle=False, dry_run=False):
    start = time.time()
    deleted_count = 0
    ids_to_delete = []

    with session_scope_by_shard_id(shard_id) as db_session:
        ids_to_delete = [(acc.id, acc.namespace.id) for acc
                         in db_session.query(Account) if acc.is_deleted]

    queue_size = len(ids_to_delete)
    for account_id, namespace_id in ids_to_delete:
        # queue_size = length of queue
        # deleted_count = number of accounts deleted during loop iteration
        # this is necessary because the length of ids_to_delete doesn't
        # change during loop iteration
        statsd_client.gauge('mailsync.{}.account_deletion.queue.length'
                            .format(shard_id),
                            queue_size - deleted_count)
        try:
            with session_scope(namespace_id) as db_session:
                account = db_session.query(Account).get(account_id)
                if not account:
                    log.critical('Account with does not exist',
                                 account_id=account_id)
                    continue

                if account.sync_should_run or not account.is_deleted:
                    log.warn('Account NOT marked for deletion. '
                             'Will not delete', account_id=account_id)
                    continue

            log.info('Deleting account', account_id=account_id)
            start_time = time.time()
            # Delete data in database
            try:
                log.info('Deleting database data', account_id=account_id)
                delete_namespace(account_id, namespace_id, throttle=throttle,
                                 dry_run=dry_run)
            except Exception as e:
                log.critical('Database data deletion failed', error=e,
                             account_id=account_id)
                continue

            # Delete liveness data
            log.debug('Deleting liveness data', account_id=account_id)
            clear_heartbeat_status(account_id)
            deleted_count += 1
            statsd_client.incr('mailsync.account_deletion.queue.deleted', 1)
            statsd_client.timing('mailsync.account_deletion.queue.deleted',
                                 time.time() - start_time)
        except Exception:
            log_uncaught_errors(log, account_id=account_id)

    end = time.time()
    log.info('All data deleted successfully', shard_id=shard_id,
             time=end - start, count=deleted_count)
Пример #14
0
 def _run_impl(self):
     log.info('Queueing accounts', zone=self.zone, shards=self.shards)
     while True:
         self.enqueue_new_accounts()
         self.unassign_disabled_accounts()
         statsd_client.gauge('syncqueue.queue.{}.length'.format(self.zone),
                             self.queue_client.qsize())
         statsd_client.incr('syncqueue.service.{}.heartbeat'.
                            format(self.zone))
         gevent.sleep(self.poll_interval)
Пример #15
0
 def _run_impl(self):
     log.info('Queueing accounts', zone=self.zone, shards=self.shards)
     while True:
         self.enqueue_new_accounts()
         self.unassign_disabled_accounts()
         statsd_client.gauge('syncqueue.queue.{}.length'.format(self.zone),
                             self.queue_client.qsize())
         statsd_client.incr('syncqueue.service.{}.heartbeat'.format(
             self.zone))
         gevent.sleep(self.poll_interval)
Пример #16
0
    def receive_checkout(dbapi_connection, connection_record, connection_proxy):
        """Log checkedout and overflow when a connection is checked out"""
        hostname = gethostname().replace(".", "-")
        process_name = str(config.get("PROCESS_NAME", "unknown"))

        statsd_client.gauge(
            ".".join(["dbconn", database_name, hostname, process_name, "checkedout"]),
            connection_proxy._pool.checkedout(),
        )

        statsd_client.gauge(
            ".".join(["dbconn", database_name, hostname, process_name, "overflow"]), connection_proxy._pool.overflow()
        )
Пример #17
0
    def receive_checkout(dbapi_connection, connection_record,
                         connection_proxy):
        '''Log checkedout and overflow when a connection is checked out'''
        hostname = gethostname().replace(".", "-")
        process_name = str(config.get("PROCESS_NAME", "unknown"))

        statsd_client.gauge(
            ".".join([
                "dbconn", database_name, hostname, process_name, "checkedout"
            ]), connection_proxy._pool.checkedout())

        statsd_client.gauge(
            ".".join(
                ["dbconn", database_name, hostname, process_name, "overflow"]),
            connection_proxy._pool.overflow())
Пример #18
0
    def poll(self):
        # We really don't want to take on more load than we can bear, so we need
        # to check the CPU usage before accepting new accounts.
        # Note that we can't check this for the current core because the kernel
        # transparently moves programs across cores.
        usage_per_cpu = psutil.cpu_percent(percpu=True)

        # Conservatively, stop accepting accounts if the CPU usage is over 90%
        # for every core.
        overloaded_cpus = all(
            [cpu_usage > 90.0 for cpu_usage in usage_per_cpu])

        if self.stealing_enabled and not overloaded_cpus:
            r = self.queue_client.claim_next(self.process_identifier)
            if r:
                self.log.info('Claimed new account sync', account_id=r)

        # Determine which accounts to sync
        start_accounts = self.accounts_to_sync()
        statsd_client.gauge(
            'accounts.{}.mailsync-{}.count'.format(self.host, self.cpu_id),
            len(start_accounts))

        # Perform the appropriate action on each account
        for account_id in start_accounts:
            if account_id not in self.syncing_accounts:
                try:
                    self.start_sync(account_id)
                except OperationalError:
                    self.log.error('Database error starting account sync',
                                   exc_info=True)
                    log_uncaught_errors()

        stop_accounts = self.syncing_accounts - set(start_accounts)
        for account_id in stop_accounts:
            self.log.info('sync service stopping sync', account_id=account_id)
            try:
                self.stop_sync(account_id)
            except OperationalError:
                self.log.error('Database error stopping account sync',
                               exc_info=True)
                log_uncaught_errors()
Пример #19
0
    def poll(self):
        # We really don't want to take on more load than we can bear, so we need
        # to check the CPU usage before accepting new accounts.
        # Note that we can't check this for the current core because the kernel
        # transparently moves programs across cores.
        usage_per_cpu = psutil.cpu_percent(percpu=True)

        # Conservatively, stop accepting accounts if the CPU usage is over 90%
        # for every core.
        overloaded_cpus = all([cpu_usage > 90.0 for cpu_usage in usage_per_cpu])

        if self.stealing_enabled and not overloaded_cpus:
            r = self.queue_client.claim_next(self.process_identifier)
            if r:
                self.log.info('Claimed new account sync', account_id=r)

        # Determine which accounts to sync
        start_accounts = self.accounts_to_sync()
        statsd_client.gauge(
            'accounts.{}.mailsync-{}.count'.format(self.host, self.cpu_id),
            len(start_accounts))

        # Perform the appropriate action on each account
        for account_id in start_accounts:
            if account_id not in self.syncing_accounts:
                try:
                    self.start_sync(account_id)
                except OperationalError:
                    self.log.error('Database error starting account sync',
                                   exc_info=True)
                    log_uncaught_errors()

        stop_accounts = self.syncing_accounts - set(start_accounts)
        for account_id in stop_accounts:
            self.log.info('sync service stopping sync',
                          account_id=account_id)
            try:
                self.stop_sync(account_id)
            except OperationalError:
                self.log.error('Database error stopping account sync',
                               exc_info=True)
                log_uncaught_errors()
Пример #20
0
def report_progress(account_id, folder_name, downloaded_uid_count,
                    num_remaining_messages):
    """ Inform listeners of sync progress. """
    with mailsync_session_scope() as db_session:
        saved_status = db_session.query(ImapFolderSyncStatus).join(Folder)\
            .filter(
                ImapFolderSyncStatus.account_id == account_id,
                Folder.name == folder_name).one()

        previous_count = saved_status.metrics.get(
            'num_downloaded_since_timestamp', 0)

        metrics = dict(num_downloaded_since_timestamp=(previous_count +
                                                       downloaded_uid_count),
                       download_uid_count=num_remaining_messages,
                       queue_checked_at=datetime.utcnow())

        saved_status.update_metrics(metrics)
        db_session.commit()

    statsd_client.gauge(
        ".".join(["accounts", str(account_id), "messages_downloaded"]),
        metrics.get("num_downloaded_since_timestamp"))
Пример #21
0
def report_progress(account_id, folder_name, downloaded_uid_count,
                    num_remaining_messages):
    """ Inform listeners of sync progress. """
    with mailsync_session_scope() as db_session:
        saved_status = db_session.query(ImapFolderSyncStatus).join(Folder)\
            .filter(
                ImapFolderSyncStatus.account_id == account_id,
                Folder.name == folder_name).one()

        previous_count = saved_status.metrics.get(
            'num_downloaded_since_timestamp', 0)

        metrics = dict(num_downloaded_since_timestamp=(previous_count +
                                                       downloaded_uid_count),
                       download_uid_count=num_remaining_messages,
                       queue_checked_at=datetime.utcnow())

        saved_status.update_metrics(metrics)
        db_session.commit()

    statsd_client.gauge(
        ".".join(["accounts",
                  str(account_id), "messages_downloaded"]),
        metrics.get("num_downloaded_since_timestamp"))
Пример #22
0
 def _run_impl(self):
     self.enqueue_new_accounts()
     self.unassign_disabled_accounts()
     statsd_client.gauge("syncqueue.queue.{}.length".format(self.zone), self.queue_client.qsize())
     statsd_client.incr("syncqueue.service.{}.heartbeat".format(self.zone))
     gevent.sleep(self.poll_interval)
Пример #23
0
    def initial_sync_impl(self, crispin_client):
        assert crispin_client.selected_folder_name == self.folder_name
        remote_uids = crispin_client.all_uids()
        uids = sorted(remote_uids, reverse=True)

        starting_uid = None
        with session_scope(self.namespace_id) as db_session:
            account = db_session.query(Account).get(self.account_id)
            s3_resync_status = account._sync_status.get(
                's3_resync_status', {})

            folder_id = str(self.folder_id)
            if folder_id in s3_resync_status:
                folder_status = s3_resync_status[folder_id]
                resync_status = folder_status.get('status')

                # We've synced everything we had to sync.
                if resync_status == 'done':
                    raise MailsyncDone()

                starting_uid = s3_resync_status[folder_id].get(
                    'last_synced_uid')

        if starting_uid is not None:
            # We're not starting from zero
            try:
                i = uids.index(starting_uid)
                uids = uids[i:]
            except ValueError:
                pass

        # We need the provider and account id to ship per-account
        # data to statsd.
        with session_scope(self.namespace_id) as db_session:
            account = db_session.query(Account).get(self.account_id)
            statsd_prefix = '.'.join(['s3_resync', account.provider, str(account.id), str(self.folder_id)])

        statsd_client.gauge(statsd_prefix + '.messages_total', len(remote_uids))

        remaining_messages = len(uids)
        statsd_client.gauge(statsd_prefix + '.remaining_messages', remaining_messages)

        if len(uids) == 0:
            log.info('Done syncing to S3', account_id=self.account_id)
            self._update_uid_resync_status(status='done')
            raise MailsyncDone()

        for chnk in chunk(uids, BATCH_SIZE):
            to_download = [uid for uid in chnk if _message_missing_s3_object(
                            self.account_id, self.folder_id, uid)]
            self.download_and_commit_uids(crispin_client, to_download)

            # FIXME: publish some heartbeats.

            log.info('Resynced another batch of uids. Updating position.',
                     batch_size=BATCH_SIZE, position=chnk[-1])
            self._update_uid_resync_status(uid=chnk[-1])

            remaining_messages -= BATCH_SIZE
            statsd_client.gauge(statsd_prefix + '.remaining_messages',
                                remaining_messages)

            sleep(S3_RESYNC_FREQUENCY)

        self._update_uid_resync_status(status='done')
        raise MailsyncDone()
Пример #24
0
    def initial_sync_impl(self, crispin_client):
        assert crispin_client.selected_folder_name == self.folder_name
        remote_uids = crispin_client.all_uids()
        uids = sorted(remote_uids, reverse=True)

        starting_uid = None
        with session_scope(self.namespace_id) as db_session:
            account = db_session.query(Account).get(self.account_id)
            s3_resync_status = account._sync_status.get(
                's3_resync_status', {})

            folder_id = str(self.folder_id)
            if folder_id in s3_resync_status:
                folder_status = s3_resync_status[folder_id]
                resync_status = folder_status.get('status')

                # We've synced everything we had to sync.
                if resync_status == 'done':
                    raise MailsyncDone()

                starting_uid = s3_resync_status[folder_id].get(
                    'last_synced_uid')

        if starting_uid is not None:
            # We're not starting from zero
            try:
                i = uids.index(starting_uid)
                uids = uids[i:]
            except ValueError:
                pass

        # We need the provider and account id to ship per-account
        # data to statsd.
        with session_scope(self.namespace_id) as db_session:
            account = db_session.query(Account).get(self.account_id)
            statsd_prefix = '.'.join(['s3_resync', account.provider,
                                      str(account.id), str(self.folder_id)])

        statsd_client.gauge(statsd_prefix + '.messages_total', len(remote_uids))

        remaining_messages = len(uids)
        statsd_client.gauge(statsd_prefix + '.remaining_messages',
                            remaining_messages)

        if len(uids) == 0:
            log.info('Done syncing to S3', account_id=self.account_id)
            self._update_uid_resync_status(status='done')
            raise MailsyncDone()

        for chnk in chunk(uids, BATCH_SIZE):
            to_download = [uid for uid in chnk if _message_missing_s3_object(
                            self.account_id, self.folder_id, uid)]
            self.download_and_commit_uids(crispin_client, to_download)

            # FIXME: publish some heartbeats.

            log.info('Resynced another batch of uids. Updating position.',
                     batch_size=BATCH_SIZE, position=chnk[-1])
            self._update_uid_resync_status(uid=chnk[-1])

            remaining_messages -= BATCH_SIZE
            statsd_client.gauge(statsd_prefix + '.remaining_messages',
                                remaining_messages)

            sleep(S3_RESYNC_FREQUENCY)

        self._update_uid_resync_status(status='done')
        raise MailsyncDone()
Пример #25
0
def delete_marked_accounts(shard_id, throttle=False, dry_run=False):
    start = time.time()
    deleted_count = 0
    ids_to_delete = []

    with session_scope_by_shard_id(shard_id) as db_session:
        ids_to_delete = [(acc.id, acc.namespace.id)
                         for acc in db_session.query(Account)
                         if acc.is_deleted]

    queue_size = len(ids_to_delete)
    for account_id, namespace_id in ids_to_delete:
        # queue_size = length of queue
        # deleted_count = number of accounts deleted during loop iteration
        # this is necessary because the length of ids_to_delete doesn't
        # change during loop iteration
        statsd_client.gauge(
            'mailsync.{}.account_deletion.queue.length'.format(shard_id),
            queue_size - deleted_count)
        try:
            with session_scope(namespace_id) as db_session:
                account = db_session.query(Account).get(account_id)
                if not account:
                    log.critical('Account with does not exist',
                                 account_id=account_id)
                    continue

                if account.sync_should_run or not account.is_deleted:
                    log.warn(
                        'Account NOT marked for deletion. '
                        'Will not delete',
                        account_id=account_id)
                    continue

            log.info('Deleting account', account_id=account_id)
            start_time = time.time()
            # Delete data in database
            try:
                log.info('Deleting database data', account_id=account_id)
                delete_namespace(account_id,
                                 namespace_id,
                                 throttle=throttle,
                                 dry_run=dry_run)
            except Exception as e:
                log.critical('Database data deletion failed',
                             error=e,
                             account_id=account_id)
                continue

            # Delete liveness data
            log.debug('Deleting liveness data', account_id=account_id)
            clear_heartbeat_status(account_id)
            deleted_count += 1
            statsd_client.incr('mailsync.account_deletion.queue.deleted', 1)
            statsd_client.timing('mailsync.account_deletion.queue.deleted',
                                 time.time() - start_time)
        except Exception:
            log_uncaught_errors(log, account_id=account_id)

    end = time.time()
    log.info('All data deleted successfully',
             shard_id=shard_id,
             time=end - start,
             count=deleted_count)