def do_import_content(mbox, only_new=True): mbox = mbox.db.merge(mbox) session = mbox.db session.add(mbox) if mbox.use_ssl: mailbox = IMAP4_SSL(host=mbox.host.encode('utf-8'), port=mbox.port) else: mailbox = IMAP4(host=mbox.host.encode('utf-8'), port=mbox.port) if 'STARTTLS' in mailbox.capabilities: # Always use starttls if server supports it mailbox.starttls() mailbox.login(mbox.username, mbox.password) mailbox.select(mbox.folder) command = "ALL" search_status = None email_ids = None if only_new and mbox.last_imported_email_uid: command = "(UID %s:*)" % mbox.last_imported_email_uid search_status, search_result = mailbox.uid('search', None, command) # print "UID searched with: "+ command + ", got result "+repr(search_status)+" and found "+repr(search_result) email_ids = search_result[0].split() # print email_ids if (only_new and search_status == 'OK' and email_ids and email_ids[0] == mbox.last_imported_email_uid): # Note: the email_ids[0]==mbox.last_imported_email_uid test is # necessary beacuse according to https://tools.ietf.org/html/rfc3501 # seq-range like "3291:* includes the UID of the last message in # the mailbox, even if that value is less than 3291." # discard the first message, it should be the last imported email. del email_ids[0] else: # Either: # a) we don't import only new messages or # b) the message with mbox.last_imported_email_uid hasn't been found # (may have been deleted) # In this case we request all messages and rely on duplicate # detection command = "ALL" search_status, search_result = mailbox.uid('search', None, command) # print "UID searched with: "+ command + ", got result "+repr(search_status)+" and found "+repr(search_result) assert search_status == 'OK' email_ids = search_result[0].split() def import_email(mailbox_obj, email_id): session = mailbox_obj.db # print "running fetch for message: "+email_id status, message_data = mailbox.uid('fetch', email_id, "(RFC822)") assert status == 'OK' # print repr(message_data) for response_part in message_data: if isinstance(response_part, tuple): message_string = response_part[1] assert message_string if mailbox_obj.message_ok_to_import(message_string): (email_object, dummy, error) = mailbox_obj.parse_email(message_string) if error: raise Exception(error) session.add(email_object) translate_content(email_object) # should delay else: print( "Skipped message with imap id %s (bounce or vacation message)" % (email_id)) # print "Setting mailbox_obj.last_imported_email_uid to "+email_id mailbox_obj.last_imported_email_uid = email_id if len(email_ids): print("Processing messages from IMAP: %d " % (len(email_ids))) for email_id in email_ids: with transaction.manager: import_email(mbox, email_id) else: print("No IMAP messages to process") discussion_id = mbox.discussion_id mailbox.close() mailbox.logout() with transaction.manager: if len(email_ids): # We imported mails, we need to re-thread emails = session.query(Email).filter( Email.discussion_id == discussion_id, ).options( joinedload_all(Email.parent)) AbstractMailbox.thread_mails(emails)
def do_import_content(mbox, only_new=True): mbox = mbox.db.merge(mbox) session = mbox.db session.add(mbox) if mbox.use_ssl: mailbox = IMAP4_SSL(host=mbox.host.encode('utf-8'), port=mbox.port) else: mailbox = IMAP4(host=mbox.host.encode('utf-8'), port=mbox.port) if 'STARTTLS' in mailbox.capabilities: #Always use starttls if server supports it mailbox.starttls() mailbox.login(mbox.username, mbox.password) mailbox.select(mbox.folder) command = "ALL" search_status = None email_ids = None if only_new and mbox.last_imported_email_uid: command = "(UID %s:*)" % mbox.last_imported_email_uid search_status, search_result = mailbox.uid('search', None, command) #print "UID searched with: "+ command + ", got result "+repr(search_status)+" and found "+repr(search_result) email_ids = search_result[0].split() #print email_ids if (only_new and search_status == 'OK' and email_ids and email_ids[0] == mbox.last_imported_email_uid): # Note: the email_ids[0]==mbox.last_imported_email_uid test is # necessary beacuse according to https://tools.ietf.org/html/rfc3501 # seq-range like "3291:* includes the UID of the last message in # the mailbox, even if that value is less than 3291." # discard the first message, it should be the last imported email. del email_ids[0] else: # Either: # a) we don't import only new messages or # b) the message with mbox.last_imported_email_uid hasn't been found # (may have been deleted) # In this case we request all messages and rely on duplicate # detection command = "ALL" search_status, search_result = mailbox.uid('search', None, command) #print "UID searched with: "+ command + ", got result "+repr(search_status)+" and found "+repr(search_result) assert search_status == 'OK' email_ids = search_result[0].split() def import_email(mailbox_obj, email_id): session = mailbox_obj.db #print "running fetch for message: "+email_id status, message_data = mailbox.uid('fetch', email_id, "(RFC822)") assert status == 'OK' #print repr(message_data) for response_part in message_data: if isinstance(response_part, tuple): message_string = response_part[1] assert message_string if mailbox_obj.message_ok_to_import(message_string): (email_object, dummy, error) = mailbox_obj.parse_email(message_string) if error: raise Exception(error) session.add(email_object) else: print "Skipped message with imap id %s (bounce or vacation message)"% (email_id) #print "Setting mailbox_obj.last_imported_email_uid to "+email_id mailbox_obj.last_imported_email_uid = email_id transaction.commit() mailbox_obj = AbstractMailbox.get(mailbox_obj.id) if len(email_ids): print "Processing messages from IMAP: %d "% (len(email_ids)) new_emails = [import_email(mbox, email_id) for email_id in email_ids] else: print "No IMAP messages to process" discussion_id = mbox.discussion_id mailbox.close() mailbox.logout() mark_changed() transaction.commit() with transaction.manager: if len(email_ids): #We imported mails, we need to re-thread emails = session.query(Email).filter( Email.discussion_id == discussion_id, ).options(joinedload_all(Email.parent)) AbstractMailbox.thread_mails(emails) mark_changed()
import networkx as nx from Crypto.Hash import SHA256 import re if __name__ == "__main__": """ take as input a string, use it to hash the "From" fields as read from a mailing list, then save the graph in graphml format """ if len(sys.argv) > 3: crypto_key = sys.argv[3] else: crypto_key = None mbox_file = sys.argv[1] dictionary = sys.argv[2] mailbox = mailbox.mbox(mbox_file) g = mbparse.parse_mbox_fragment(mailbox, dictionary) mailbox.close() gg = g.copy() #if crypto_key: # for node in g.nodes(): # email = re.search(r'[\w\.-]+@[\w\.-]+', # node).group(0) # h = SHA256.new(crypto_key) # h.update(str(email)) # g = nx.relabel_nodes(g, {str(node): h.hexdigest()}, copy=False) ggg = nx.convert_node_labels_to_integers(g) import code code.interact(local=locals()) c_g = mbparse.get_communities(g) c_ggg = mbparse.get_communities(ggg) c_g_s = [ len(x) for x in