Пример #1
0
    def condstore_refresh_flags(self, crispin_client):
        new_highestmodseq = crispin_client.conn.folder_status(
            self.folder_name, ['HIGHESTMODSEQ'])['HIGHESTMODSEQ']
        # Ensure that we have an initial highestmodseq value stored before we
        # begin polling for changes.
        if self.highestmodseq is None:
            self.highestmodseq = new_highestmodseq

        if new_highestmodseq == self.highestmodseq:
            # Don't need to do anything if the highestmodseq hasn't
            # changed.
            return
        elif new_highestmodseq < self.highestmodseq:
            # This should really never happen, but if it does, handle it.
            log.warning('got server highestmodseq less than saved '
                        'highestmodseq',
                        new_highestmodseq=new_highestmodseq,
                        saved_highestmodseq=self.highestmodseq)
            return

        # Highestmodseq has changed, update accordingly.
        crispin_client.select_folder(self.folder_name, self.uidvalidity_cb)
        changed_flags = crispin_client.condstore_changed_flags(
            self.highestmodseq)
        remote_uids = crispin_client.all_uids()
        with session_scope() as db_session:
            common.update_metadata(self.account_id, self.folder_id,
                                   changed_flags, db_session)
            local_uids = common.local_uids(self.account_id, db_session,
                                           self.folder_id)
            expunged_uids = set(local_uids).difference(remote_uids)
            common.remove_deleted_uids(self.account_id, self.folder_id,
                                       expunged_uids, db_session)
            db_session.commit()
        self.highestmodseq = new_highestmodseq
Пример #2
0
    def refresh_flags_impl(self, crispin_client, max_uids):
        crispin_client.select_folder(self.folder_name, self.uidvalidity_cb)
        with session_scope(self.namespace_id) as db_session:
            local_uids = common.local_uids(account_id=self.account_id,
                                           session=db_session,
                                           folder_id=self.folder_id,
                                           limit=max_uids)

        flags = crispin_client.flags(local_uids)
        if (max_uids in self.flags_fetch_results and
                self.flags_fetch_results[max_uids] == (local_uids, flags)):
            # If the flags fetch response is exactly the same as the last one
            # we got, then we don't need to persist any changes.
            log.debug('Unchanged flags refresh response, '
                      'not persisting changes', max_uids=max_uids)
            return
        log.debug('Changed flags refresh response, persisting changes',
                  max_uids=max_uids)
        expunged_uids = set(local_uids).difference(flags.keys())
        common.remove_deleted_uids(self.account_id, self.folder_id,
                                   expunged_uids)
        with session_scope(self.namespace_id) as db_session:
            common.update_metadata(self.account_id, self.folder_id,
                                   self.folder_role, flags, db_session)
        self.flags_fetch_results[max_uids] = (local_uids, flags)
Пример #3
0
    def initial_sync_impl(self, crispin_client):
        # We wrap the block in a try/finally because the greenlets like
        # change_poller need to be killed when this greenlet is interrupted
        change_poller = None
        try:
            remote_uids = sorted(crispin_client.all_uids(), key=int)
            with self.syncmanager_lock:
                with session_scope(self.namespace_id) as db_session:
                    local_uids = common.local_uids(
                        self.account_id, db_session, self.folder_id
                    )
                common.remove_deleted_uids(
                    self.account_id, self.folder_id, set(local_uids) - set(remote_uids)
                )
                unknown_uids = set(remote_uids) - local_uids
                with session_scope(self.namespace_id) as db_session:
                    self.update_uid_counts(
                        db_session,
                        remote_uid_count=len(remote_uids),
                        download_uid_count=len(unknown_uids),
                    )

            change_poller = gevent.spawn(self.poll_for_changes)
            bind_context(change_poller, "changepoller", self.account_id, self.folder_id)

            if self.is_all_mail(crispin_client):
                # Prioritize UIDs for messages in the inbox folder.
                if len(remote_uids) < 1e6:
                    inbox_uids = set(
                        crispin_client.search_uids(["X-GM-LABELS", "inbox"])
                    )
                else:
                    # The search above is really slow (times out) on really
                    # large mailboxes, so bound the search to messages within
                    # the past month in order to get anywhere.
                    since = datetime.utcnow() - timedelta(days=30)
                    inbox_uids = set(
                        crispin_client.search_uids(
                            ["X-GM-LABELS", "inbox", "SINCE", since]
                        )
                    )

                uids_to_download = sorted(unknown_uids - inbox_uids) + sorted(
                    unknown_uids & inbox_uids
                )
            else:
                uids_to_download = sorted(unknown_uids)

            for uids in chunk(reversed(uids_to_download), 1024):
                g_metadata = crispin_client.g_metadata(uids)
                # UIDs might have been expunged since sync started, in which
                # case the g_metadata call above will return nothing.
                # They may also have been preemptively downloaded by thread
                # expansion. We can omit such UIDs.
                uids = [u for u in uids if u in g_metadata and u not in self.saved_uids]
                self.batch_download_uids(crispin_client, uids, g_metadata)
        finally:
            if change_poller is not None:
                # schedule change_poller to die
                gevent.kill(change_poller)
Пример #4
0
    def refresh_flags_impl(self, crispin_client, max_uids):
        crispin_client.select_folder(self.folder_name, self.uidvalidity_cb)

        # Check for any deleted messages.
        remote_uids = crispin_client.all_uids()
        with session_scope(self.namespace_id) as db_session:
            local_uids = common.local_uids(self.account_id, db_session,
                                           self.folder_id)
            expunged_uids = set(local_uids).difference(remote_uids)
        if expunged_uids:
            with self.syncmanager_lock:
                common.remove_deleted_uids(self.account_id, self.folder_id,
                                           expunged_uids)

        # Get recent UIDs to monitor for flag changes.
        with session_scope(self.namespace_id) as db_session:
            local_uids = common.local_uids(
                account_id=self.account_id,
                session=db_session,
                folder_id=self.folder_id,
                limit=max_uids,
            )

        flags = crispin_client.flags(local_uids)
        if max_uids in self.flags_fetch_results and self.flags_fetch_results[
                max_uids] == (local_uids, flags):
            # If the flags fetch response is exactly the same as the last one
            # we got, then we don't need to persist any changes.

            # Stopped logging this to reduce overall logging volume
            # log.debug('Unchanged flags refresh response, '
            #          'not persisting changes', max_uids=max_uids)
            return
        log.debug("Changed flags refresh response, persisting changes",
                  max_uids=max_uids)
        expunged_uids = set(local_uids).difference(flags.keys())
        with self.syncmanager_lock:
            common.remove_deleted_uids(self.account_id, self.folder_id,
                                       expunged_uids)
        with self.syncmanager_lock, session_scope(
                self.namespace_id) as db_session:
            common.update_metadata(self.account_id, self.folder_id,
                                   self.folder_role, flags, db_session)
        self.flags_fetch_results[max_uids] = (local_uids, flags)
Пример #5
0
    def initial_sync_impl(self, crispin_client):
        # We wrap the block in a try/finally because the greenlets like
        # change_poller need to be killed when this greenlet is interrupted
        change_poller = None
        try:
            remote_uids = sorted(crispin_client.all_uids(), key=int)
            with self.syncmanager_lock:
                with session_scope(self.namespace_id) as db_session:
                    local_uids = common.local_uids(self.account_id, db_session,
                                                   self.folder_id)
                common.remove_deleted_uids(
                    self.account_id, self.folder_id,
                    set(local_uids) - set(remote_uids))
                unknown_uids = set(remote_uids) - local_uids
                with session_scope(self.namespace_id) as db_session:
                    self.update_uid_counts(
                        db_session, remote_uid_count=len(remote_uids),
                        download_uid_count=len(unknown_uids))

            change_poller = spawn(self.poll_for_changes)
            bind_context(change_poller, 'changepoller', self.account_id,
                         self.folder_id)

            if self.is_all_mail(crispin_client):
                # Prioritize UIDs for messages in the inbox folder.
                if len(remote_uids) < 1e6:
                    inbox_uids = set(
                        crispin_client.search_uids(['X-GM-LABELS', 'inbox']))
                else:
                    # The search above is really slow (times out) on really
                    # large mailboxes, so bound the search to messages within
                    # the past month in order to get anywhere.
                    since = datetime.utcnow() - timedelta(days=30)
                    inbox_uids = set(crispin_client.search_uids([
                        'X-GM-LABELS', 'inbox',
                        'SINCE', since]))

                uids_to_download = (sorted(unknown_uids - inbox_uids) +
                                    sorted(unknown_uids & inbox_uids))
            else:
                uids_to_download = sorted(unknown_uids)

            for uids in chunk(reversed(uids_to_download), 1024):
                g_metadata = crispin_client.g_metadata(uids)
                # UIDs might have been expunged since sync started, in which
                # case the g_metadata call above will return nothing.
                # They may also have been preemptively downloaded by thread
                # expansion. We can omit such UIDs.
                uids = [u for u in uids if u in g_metadata and u not in
                        self.saved_uids]
                self.batch_download_uids(crispin_client, uids, g_metadata)
        finally:
            if change_poller is not None:
                # schedule change_poller to die
                kill(change_poller)
Пример #6
0
    def refresh_flags_impl(self, crispin_client, max_uids):
        crispin_client.select_folder(self.folder_name, self.uidvalidity_cb)
        with session_scope(self.namespace_id) as db_session:
            local_uids = common.local_uids(
                account_id=self.account_id, session=db_session, folder_id=self.folder_id, limit=max_uids
            )

        flags = crispin_client.flags(local_uids)
        expunged_uids = set(local_uids).difference(flags.keys())
        common.remove_deleted_uids(self.account_id, self.folder_id, expunged_uids)
        with session_scope(self.namespace_id) as db_session:
            common.update_metadata(self.account_id, self.folder_id, flags, db_session)
Пример #7
0
    def initial_sync_impl(self, crispin_client):
        # We wrap the block in a try/finally because the change_poller greenlet
        # needs to be killed when this greenlet is interrupted
        change_poller = None
        try:
            assert crispin_client.selected_folder_name == self.folder_name
            remote_uids = crispin_client.all_uids()
            with self.syncmanager_lock:
                with session_scope(self.namespace_id) as db_session:
                    local_uids = common.local_uids(self.account_id, db_session,
                                                   self.folder_id)
                common.remove_deleted_uids(
                    self.account_id,
                    self.folder_id,
                    set(local_uids).difference(remote_uids),
                )

            new_uids = set(remote_uids).difference(local_uids)
            with session_scope(self.namespace_id) as db_session:
                account = db_session.query(Account).get(self.account_id)
                throttled = account.throttled
                self.update_uid_counts(
                    db_session,
                    remote_uid_count=len(remote_uids),
                    # This is the initial size of our download_queue
                    download_uid_count=len(new_uids),
                )

            change_poller = gevent.spawn(self.poll_for_changes)
            bind_context(change_poller, "changepoller", self.account_id,
                         self.folder_id)
            uids = sorted(new_uids, reverse=True)
            count = 0
            for uid in uids:
                # The speedup from batching appears to be less clear for
                # non-Gmail accounts, so for now just download one-at-a-time.
                self.download_and_commit_uids(crispin_client, [uid])
                self.heartbeat_status.publish()
                count += 1
                if throttled and count >= THROTTLE_COUNT:
                    # Throttled accounts' folders sync at a rate of
                    # 1 message/ minute, after the first approx. THROTTLE_COUNT
                    # messages per folder are synced.
                    # Note this is an approx. limit since we use the #(uids),
                    # not the #(messages).
                    gevent.sleep(THROTTLE_WAIT)
        finally:
            if change_poller is not None:
                # schedule change_poller to die
                gevent.kill(change_poller)
Пример #8
0
    def condstore_refresh_flags(self, crispin_client):
        new_highestmodseq = crispin_client.conn.folder_status(
            self.folder_name, ['HIGHESTMODSEQ'])['HIGHESTMODSEQ']
        # Ensure that we have an initial highestmodseq value stored before we
        # begin polling for changes.
        if self.highestmodseq is None:
            self.highestmodseq = new_highestmodseq

        if new_highestmodseq == self.highestmodseq:
            # Don't need to do anything if the highestmodseq hasn't
            # changed.
            return
        elif new_highestmodseq < self.highestmodseq:
            # This should really never happen, but if it does, handle it.
            log.warning(
                'got server highestmodseq less than saved '
                'highestmodseq',
                new_highestmodseq=new_highestmodseq,
                saved_highestmodseq=self.highestmodseq)
            return

        # Highestmodseq has changed, update accordingly.
        crispin_client.select_folder(self.folder_name, self.uidvalidity_cb)
        changed_flags = crispin_client.condstore_changed_flags(
            self.highestmodseq)
        remote_uids = crispin_client.all_uids()
        with session_scope(self.namespace_id) as db_session:
            common.update_metadata(self.account_id, self.folder_id,
                                   changed_flags, db_session)
            local_uids = common.local_uids(self.account_id, db_session,
                                           self.folder_id)
            expunged_uids = set(local_uids).difference(remote_uids)

        if expunged_uids:
            # If new UIDs have appeared since we last checked in
            # get_new_uids, save them first. We want to always have the
            # latest UIDs before expunging anything, in order to properly
            # capture draft revisions.
            with session_scope(self.namespace_id) as db_session:
                lastseenuid = common.lastseenuid(self.account_id, db_session,
                                                 self.folder_id)
            if remote_uids and lastseenuid < max(remote_uids):
                log.info('Downloading new UIDs before expunging')
                self.get_new_uids(crispin_client)
            with session_scope(self.namespace_id) as db_session:
                common.remove_deleted_uids(self.account_id, self.folder_id,
                                           expunged_uids, db_session)
                db_session.commit()
        self.highestmodseq = new_highestmodseq
Пример #9
0
    def refresh_flags_impl(self, crispin_client, max_uids):
        crispin_client.select_folder(self.folder_name, self.uidvalidity_cb)
        with session_scope(self.namespace_id) as db_session:
            local_uids = common.local_uids(account_id=self.account_id,
                                           session=db_session,
                                           folder_id=self.folder_id,
                                           limit=max_uids)

        flags = crispin_client.flags(local_uids)
        expunged_uids = set(local_uids).difference(flags.keys())
        with session_scope(self.namespace_id) as db_session:
            common.remove_deleted_uids(self.account_id, self.folder_id,
                                       expunged_uids, db_session)
            common.update_metadata(self.account_id, self.folder_id, flags,
                                   db_session)
Пример #10
0
    def condstore_refresh_flags(self, crispin_client):
        new_highestmodseq = crispin_client.conn.folder_status(
            self.folder_name, ['HIGHESTMODSEQ'])['HIGHESTMODSEQ']
        # Ensure that we have an initial highestmodseq value stored before we
        # begin polling for changes.
        if self.highestmodseq is None:
            self.highestmodseq = new_highestmodseq

        if new_highestmodseq == self.highestmodseq:
            # Don't need to do anything if the highestmodseq hasn't
            # changed.
            return
        elif new_highestmodseq < self.highestmodseq:
            # This should really never happen, but if it does, handle it.
            log.warning('got server highestmodseq less than saved '
                        'highestmodseq',
                        new_highestmodseq=new_highestmodseq,
                        saved_highestmodseq=self.highestmodseq)
            return

        # Highestmodseq has changed, update accordingly.
        crispin_client.select_folder(self.folder_name, self.uidvalidity_cb)
        changed_flags = crispin_client.condstore_changed_flags(
            self.highestmodseq)
        remote_uids = crispin_client.all_uids()
        with session_scope(self.namespace_id) as db_session:
            common.update_metadata(self.account_id, self.folder_id,
                                   changed_flags, db_session)
            local_uids = common.local_uids(self.account_id, db_session,
                                           self.folder_id)
            expunged_uids = set(local_uids).difference(remote_uids)

        if expunged_uids:
            # If new UIDs have appeared since we last checked in
            # get_new_uids, save them first. We want to always have the
            # latest UIDs before expunging anything, in order to properly
            # capture draft revisions.
            with session_scope(self.namespace_id) as db_session:
                lastseenuid = common.lastseenuid(self.account_id, db_session,
                                                 self.folder_id)
            if remote_uids and lastseenuid < max(remote_uids):
                log.info('Downloading new UIDs before expunging')
                self.get_new_uids(crispin_client)
            with session_scope(self.namespace_id) as db_session:
                common.remove_deleted_uids(self.account_id, self.folder_id,
                                           expunged_uids, db_session)
                db_session.commit()
        self.highestmodseq = new_highestmodseq
Пример #11
0
    def initial_sync_impl(self, crispin_client):
        # We wrap the block in a try/finally because the change_poller greenlet
        # needs to be killed when this greenlet is interrupted
        change_poller = None
        try:
            assert crispin_client.selected_folder_name == self.folder_name
            remote_uids = crispin_client.all_uids()
            with self.syncmanager_lock:
                with session_scope(self.namespace_id) as db_session:
                    local_uids = common.local_uids(self.account_id, db_session,
                                                   self.folder_id)
                    common.remove_deleted_uids(
                        self.account_id, self.folder_id,
                        set(local_uids).difference(remote_uids),
                        db_session)

            new_uids = set(remote_uids).difference(local_uids)
            with session_scope(self.namespace_id) as db_session:
                account = db_session.query(Account).get(self.account_id)
                throttled = account.throttled
                self.update_uid_counts(
                    db_session,
                    remote_uid_count=len(remote_uids),
                    # This is the initial size of our download_queue
                    download_uid_count=len(new_uids))

            change_poller = spawn(self.poll_for_changes)
            bind_context(change_poller, 'changepoller', self.account_id,
                         self.folder_id)
            uids = sorted(new_uids, reverse=True)
            count = 0
            for uid in uids:
                # The speedup from batching appears to be less clear for
                # non-Gmail accounts, so for now just download one-at-a-time.
                self.download_and_commit_uids(crispin_client, [uid])
                self.heartbeat_status.publish()
                count += 1
                if throttled and count >= THROTTLE_COUNT:
                    # Throttled accounts' folders sync at a rate of
                    # 1 message/ minute, after the first approx. THROTTLE_COUNT
                    # messages per folder are synced.
                    # Note this is an approx. limit since we use the #(uids),
                    # not the #(messages).
                    sleep(THROTTLE_WAIT)
        finally:
            if change_poller is not None:
                # schedule change_poller to die
                kill(change_poller)
Пример #12
0
    def initial_sync_impl(self, crispin_client):
        # We wrap the block in a try/finally because the change_poller greenlet
        # needs to be killed when this greenlet is interrupted
        change_poller = None
        try:
            assert crispin_client.selected_folder_name == self.folder_name
            remote_uids = crispin_client.all_uids()
            with self.syncmanager_lock:
                with session_scope() as db_session:
                    local_uids = common.local_uids(self.account_id, db_session,
                                                   self.folder_id)
                    common.remove_deleted_uids(
                        self.account_id, self.folder_id,
                        set(local_uids).difference(remote_uids),
                        db_session)

            new_uids = set(remote_uids).difference(local_uids)
            with session_scope() as db_session:
                self.update_uid_counts(
                    db_session,
                    remote_uid_count=len(remote_uids),
                    # This is the initial size of our download_queue
                    download_uid_count=len(new_uids))

            change_poller = spawn(self.poll_for_changes)
            bind_context(change_poller, 'changepoller', self.account_id,
                         self.folder_id)
            uids = sorted(new_uids, reverse=True)
            for uid in uids:
                # The speedup from batching appears to be less clear for
                # non-Gmail accounts, so for now just download one-at-a-time.
                self.download_and_commit_uids(crispin_client, [uid])
                self.heartbeat_status.publish()

        finally:
            if change_poller is not None:
                # schedule change_poller to die
                kill(change_poller)
Пример #13
0
    def initial_sync_impl(self, crispin_client):
        # We wrap the block in a try/finally because the change_poller greenlet
        # needs to be killed when this greenlet is interrupted
        change_poller = None
        try:
            assert crispin_client.selected_folder_name == self.folder_name
            remote_uids = crispin_client.all_uids()
            with self.syncmanager_lock:
                with session_scope() as db_session:
                    local_uids = common.local_uids(self.account_id, db_session,
                                                   self.folder_id)
                    common.remove_deleted_uids(
                        self.account_id, self.folder_id,
                        set(local_uids).difference(remote_uids), db_session)

            new_uids = set(remote_uids).difference(local_uids)
            with session_scope() as db_session:
                self.update_uid_counts(
                    db_session,
                    remote_uid_count=len(remote_uids),
                    # This is the initial size of our download_queue
                    download_uid_count=len(new_uids))

            change_poller = spawn(self.poll_for_changes)
            bind_context(change_poller, 'changepoller', self.account_id,
                         self.folder_id)
            uids = sorted(new_uids, reverse=True)
            for uid in uids:
                # The speedup from batching appears to be less clear for
                # non-Gmail accounts, so for now just download one-at-a-time.
                self.download_and_commit_uids(crispin_client, [uid])
                self.heartbeat_status.publish()

        finally:
            if change_poller is not None:
                # schedule change_poller to die
                kill(change_poller)
Пример #14
0
    def condstore_refresh_flags(self, crispin_client):
        new_highestmodseq = crispin_client.conn.folder_status(
            self.folder_name, ['HIGHESTMODSEQ'])['HIGHESTMODSEQ']
        # Ensure that we have an initial highestmodseq value stored before we
        # begin polling for changes.
        if self.highestmodseq is None:
            self.highestmodseq = new_highestmodseq

        if new_highestmodseq == self.highestmodseq:
            # Don't need to do anything if the highestmodseq hasn't
            # changed.
            return
        elif new_highestmodseq < self.highestmodseq:
            # This should really never happen, but if it does, handle it.
            log.warning('got server highestmodseq less than saved '
                        'highestmodseq',
                        new_highestmodseq=new_highestmodseq,
                        saved_highestmodseq=self.highestmodseq)
            return

        log.info('HIGHESTMODSEQ has changed, getting changed UIDs',
                 new_highestmodseq=new_highestmodseq,
                 saved_highestmodseq=self.highestmodseq)
        crispin_client.select_folder(self.folder_name, self.uidvalidity_cb)
        changed_flags = crispin_client.condstore_changed_flags(
            self.highestmodseq)
        remote_uids = crispin_client.all_uids()

        # In order to be able to sync changes to tens of thousands of flags at
        # once, we commit updates in batches. We do this in ascending order by
        # modseq and periodically "checkpoint" our saved highestmodseq. (It's
        # safe to checkpoint *because* we go in ascending order by modseq.)
        # That way if the process gets restarted halfway through this refresh,
        # we don't have to completely start over. It's also slow to load many
        # objects into the SQLAlchemy session and then issue lots of commits;
        # we avoid that by batching.
        flag_batches = chunk(
            sorted(changed_flags.items(), key=lambda (k, v): v.modseq),
            CONDSTORE_FLAGS_REFRESH_BATCH_SIZE)
        for flag_batch in flag_batches:
            with session_scope(self.namespace_id) as db_session:
                common.update_metadata(self.account_id, self.folder_id,
                                       self.folder_role, dict(flag_batch),
                                       db_session)
            if len(flag_batch) == CONDSTORE_FLAGS_REFRESH_BATCH_SIZE:
                interim_highestmodseq = max(v.modseq for k, v in flag_batch)
                self.highestmodseq = interim_highestmodseq

        with session_scope(self.namespace_id) as db_session:
            local_uids = common.local_uids(self.account_id, db_session,
                                           self.folder_id)
            expunged_uids = set(local_uids).difference(remote_uids)

        if expunged_uids:
            # If new UIDs have appeared since we last checked in
            # get_new_uids, save them first. We want to always have the
            # latest UIDs before expunging anything, in order to properly
            # capture draft revisions.
            with session_scope(self.namespace_id) as db_session:
                lastseenuid = common.lastseenuid(self.account_id, db_session,
                                                 self.folder_id)
            if remote_uids and lastseenuid < max(remote_uids):
                log.info('Downloading new UIDs before expunging')
                self.get_new_uids(crispin_client)
            common.remove_deleted_uids(self.account_id, self.folder_id,
                                       expunged_uids)
        self.highestmodseq = new_highestmodseq