def download_queued_threads(crispin_client, db_session, log, folder_name, message_download_stack, status_cb, syncmanager_lock): """ Download threads until `message_download_stack` is empty. UIDs and g_metadata that come out of `message_download_stack` are for the _folder that threads are being expanded in_. Threads are downloaded in the order they come out of the stack, which _ought_ to be putting newest threads at the top. Messages are downloaded newest-to-oldest in thread. (Threads are expanded to all messages in the email archive that belong to the threads corresponding to the given uids.) """ num_total_messages = message_download_stack.qsize() log.info("{} messages found initially (unsorted by thread)" .format(num_total_messages)) log.info("Expanding threads and downloading messages.") # We still need the original crispin connection for progress reporting, # so the easiest thing to do here with the current pooling setup is to # create a new crispin client for querying All Mail. with connection_pool(crispin_client.account_id).get() as \ all_mail_crispin_client: all_mail_crispin_client.select_folder( crispin_client.folder_names()['all'], uidvalidity_cb(db_session, crispin_client.account_id)) # Since we do thread expansion, for any given thread, even if we # already have the UID in the given GMessage downloaded, we may not # have _every_ message in the thread. We have to expand it and make # sure we have all messages. acc = db_session.query(ImapAccount).get(crispin_client.account_id) while not message_download_stack.empty(): message = message_download_stack.get_nowait() # Don't try to re-download any messages that are in the same # thread. (Putting this _before_ the download to guarantee no # context switches happen in the meantime; we _should_ re-download # if another message arrives on the thread.) processed_msgs = [m for m in message_download_stack.queue if m.g_metadata.thrid == message.g_metadata.thrid] processed_msgs.append(message) message_download_stack.queue = [ m for m in message_download_stack.queue if m.g_metadata.thrid != message.g_metadata.thrid] thread_uids = all_mail_crispin_client.expand_threads( [message.g_metadata.thrid]) thread_g_metadata = all_mail_crispin_client.g_metadata( thread_uids) download_thread(all_mail_crispin_client, db_session, log, syncmanager_lock, thread_g_metadata, message.g_metadata.thrid, thread_uids) # In theory we only ever have one Greenlet modifying ImapUid # entries for a non-All Mail folder, but grab the lock anyway # to be safe. with syncmanager_lock: log.debug("download_queued_threads acquired syncmanager_lock") # Since we download msgs from All Mail, we need to separately # make sure we have ImapUids recorded for this folder (used in # progress tracking, queuing, and delete detection). log.debug("Adding {} imapuid rows for {} processed messages" .format(folder_name, len(processed_msgs))) for msg in processed_msgs: add_new_imapuid(db_session, log, msg, folder_name, acc) report_progress(crispin_client, db_session, log, folder_name, message_download_stack.qsize(), status_cb) log.info("Message download queue emptied")
def download_queued_threads(crispin_client, db_session, log, folder_name, message_download_stack, status_cb, syncmanager_lock, c): """ Download threads until `message_download_stack` is empty. UIDs and g_metadata that come out of `message_download_stack` are for the _folder that threads are being expanded in_. Threads are downloaded in the order they come out of the stack, which _ought_ to be putting newest threads at the top. Messages are downloaded newest-to-oldest in thread. (Threads are expanded to all messages in the email archive that belong to the threads corresponding to the given uids.) """ num_total_messages = message_download_stack.qsize() log.info("{} messages found initially (unsorted by thread)" .format(num_total_messages)) # We still need the original crispin connection for progress reporting, # so the easiest thing to do here with the current pooling setup is to # create a new crispin client for querying All Mail. all_mail_crispin_client = new_crispin(crispin_client.account_id, crispin_client.PROVIDER, conn_pool_size=1) log.info("Expanding threads and downloading messages.") with all_mail_crispin_client.pool.get() as all_mail_c: all_mail_crispin_client.select_folder( crispin_client.folder_names(c)['all'], uidvalidity_cb(db_session, crispin_client.account_id), all_mail_c) # Since we do thread expansion, for any given thread, even if we # already have the UID in the given GMessage downloaded, we may not # have _every_ message in the thread. We have to expand it and make # sure we have all messages. acc = db_session.query(ImapAccount).join(Namespace).filter_by( id=crispin_client.account_id).one() while not message_download_stack.empty(): message = message_download_stack.get_nowait() # Don't try to re-download any messages that are in the same # thread. (Putting this _before_ the download to guarantee no # context switches happen in the meantime; we _should_ re-download # if another message arrives on the thread.) processed_msgs = [m for m in message_download_stack.queue if m.g_metadata.thrid == message.g_metadata.thrid] processed_msgs.append(message) message_download_stack.queue = [ m for m in message_download_stack.queue if m.g_metadata.thrid != message.g_metadata.thrid] thread_uids = all_mail_crispin_client.expand_threads( [message.g_metadata.thrid], all_mail_c) thread_g_metadata = all_mail_crispin_client.g_metadata( thread_uids, all_mail_c) download_thread(all_mail_crispin_client, db_session, log, syncmanager_lock, thread_g_metadata, message.g_metadata.thrid, thread_uids, all_mail_c) # Since we download msgs from All Mail, we need to separately make # sure we have ImapUids recorded for this folder (used in progress # tracking and delete detection). for msg in processed_msgs: add_new_imapuid(db_session, msg, folder_name, acc) report_progress(crispin_client, db_session, log, folder_name, message_download_stack.qsize(), status_cb, c) log.info("Message download queue emptied")