Exemplo n.º 1
0
    def do_import_content(mbox, only_new=True):
        mbox = mbox.db.merge(mbox)
        session = mbox.db
        session.add(mbox)
        if mbox.use_ssl:
            mailbox = IMAP4_SSL(host=mbox.host.encode('utf-8'), port=mbox.port)
        else:
            mailbox = IMAP4(host=mbox.host.encode('utf-8'), port=mbox.port)
        if 'STARTTLS' in mailbox.capabilities:
            # Always use starttls if server supports it
            mailbox.starttls()
        mailbox.login(mbox.username, mbox.password)
        mailbox.select(mbox.folder)

        command = "ALL"
        search_status = None

        email_ids = None
        if only_new and mbox.last_imported_email_uid:
            command = "(UID %s:*)" % mbox.last_imported_email_uid

            search_status, search_result = mailbox.uid('search', None, command)
            # print "UID searched with: "+ command + ", got result "+repr(search_status)+" and found "+repr(search_result)
            email_ids = search_result[0].split()
            # print email_ids

        if (only_new and search_status == 'OK' and email_ids
                and email_ids[0] == mbox.last_imported_email_uid):
            # Note:  the email_ids[0]==mbox.last_imported_email_uid test is
            # necessary beacuse according to https://tools.ietf.org/html/rfc3501
            # seq-range like "3291:* includes the UID of the last message in
            # the mailbox, even if that value is less than 3291."

            # discard the first message, it should be the last imported email.
            del email_ids[0]
        else:
            # Either:
            # a) we don't import only new messages or
            # b) the message with mbox.last_imported_email_uid hasn't been found
            #    (may have been deleted)
            # In this case we request all messages and rely on duplicate
            # detection
            command = "ALL"
            search_status, search_result = mailbox.uid('search', None, command)
            # print "UID searched with: "+ command + ", got result "+repr(search_status)+" and found "+repr(search_result)
            assert search_status == 'OK'
            email_ids = search_result[0].split()

        def import_email(mailbox_obj, email_id):
            session = mailbox_obj.db
            # print "running fetch for message: "+email_id
            status, message_data = mailbox.uid('fetch', email_id, "(RFC822)")
            assert status == 'OK'

            # print repr(message_data)
            for response_part in message_data:
                if isinstance(response_part, tuple):
                    message_string = response_part[1]
            assert message_string
            if mailbox_obj.message_ok_to_import(message_string):
                (email_object, dummy,
                 error) = mailbox_obj.parse_email(message_string)
                if error:
                    raise Exception(error)
                session.add(email_object)
                translate_content(email_object)  # should delay
            else:
                print(
                    "Skipped message with imap id %s (bounce or vacation message)"
                    % (email_id))
            # print "Setting mailbox_obj.last_imported_email_uid to "+email_id
            mailbox_obj.last_imported_email_uid = email_id

        if len(email_ids):
            print("Processing messages from IMAP: %d " % (len(email_ids)))
            for email_id in email_ids:
                with transaction.manager:
                    import_email(mbox, email_id)
        else:
            print("No IMAP messages to process")

        discussion_id = mbox.discussion_id
        mailbox.close()
        mailbox.logout()

        with transaction.manager:
            if len(email_ids):
                # We imported mails, we need to re-thread
                emails = session.query(Email).filter(
                    Email.discussion_id == discussion_id, ).options(
                        joinedload_all(Email.parent))

                AbstractMailbox.thread_mails(emails)
Exemplo n.º 2
0
    def do_import_content(mbox, only_new=True):
        mbox = mbox.db.merge(mbox)
        session = mbox.db
        session.add(mbox)
        if mbox.use_ssl:
            mailbox = IMAP4_SSL(host=mbox.host.encode('utf-8'), port=mbox.port)
        else:
            mailbox = IMAP4(host=mbox.host.encode('utf-8'), port=mbox.port)
        if 'STARTTLS' in mailbox.capabilities:
            #Always use starttls if server supports it
            mailbox.starttls()
        mailbox.login(mbox.username, mbox.password)
        mailbox.select(mbox.folder)

        command = "ALL"
        search_status = None

        email_ids = None
        if only_new and mbox.last_imported_email_uid:
            command = "(UID %s:*)" % mbox.last_imported_email_uid

            search_status, search_result = mailbox.uid('search', None, command)
            #print "UID searched with: "+ command + ", got result "+repr(search_status)+" and found "+repr(search_result)
            email_ids = search_result[0].split()
            #print email_ids

        if (only_new and search_status == 'OK' and email_ids
                and email_ids[0] == mbox.last_imported_email_uid):
            # Note:  the email_ids[0]==mbox.last_imported_email_uid test is
            # necessary beacuse according to https://tools.ietf.org/html/rfc3501
            # seq-range like "3291:* includes the UID of the last message in
            # the mailbox, even if that value is less than 3291."

            # discard the first message, it should be the last imported email.
            del email_ids[0]
        else:
            # Either:
            # a) we don't import only new messages or
            # b) the message with mbox.last_imported_email_uid hasn't been found
            #    (may have been deleted)
            # In this case we request all messages and rely on duplicate
            # detection
            command = "ALL"
            search_status, search_result = mailbox.uid('search', None, command)
            #print "UID searched with: "+ command + ", got result "+repr(search_status)+" and found "+repr(search_result)
            assert search_status == 'OK'
            email_ids = search_result[0].split()

        def import_email(mailbox_obj, email_id):
            session = mailbox_obj.db
            #print "running fetch for message: "+email_id
            status, message_data = mailbox.uid('fetch', email_id, "(RFC822)")
            assert status == 'OK'

            #print repr(message_data)
            for response_part in message_data:
                if isinstance(response_part, tuple):
                    message_string = response_part[1]
            assert message_string
            if mailbox_obj.message_ok_to_import(message_string):
                (email_object, dummy, error) = mailbox_obj.parse_email(message_string)
                if error:
                    raise Exception(error)
                session.add(email_object)
            else:
                print "Skipped message with imap id %s (bounce or vacation message)"% (email_id)
            #print "Setting mailbox_obj.last_imported_email_uid to "+email_id
            mailbox_obj.last_imported_email_uid = email_id
            transaction.commit()
            mailbox_obj = AbstractMailbox.get(mailbox_obj.id)

        if len(email_ids):
            print "Processing messages from IMAP: %d "% (len(email_ids))
            new_emails = [import_email(mbox, email_id) for email_id in email_ids]
        else:
            print "No IMAP messages to process"

        discussion_id = mbox.discussion_id
        mailbox.close()
        mailbox.logout()
        mark_changed()
        transaction.commit()

        with transaction.manager:
            if len(email_ids):
                #We imported mails, we need to re-thread
                emails = session.query(Email).filter(
                    Email.discussion_id == discussion_id,
                    ).options(joinedload_all(Email.parent))

                AbstractMailbox.thread_mails(emails)
                mark_changed()
Exemplo n.º 3
0
import networkx as nx
from Crypto.Hash import SHA256
import re

if __name__ == "__main__":
    """ take as input a string, use it to hash the "From" fields
    as read from a mailing list, then save the graph in graphml format """
    if len(sys.argv) > 3:
        crypto_key = sys.argv[3]
    else:
        crypto_key = None
    mbox_file = sys.argv[1]
    dictionary = sys.argv[2]
    mailbox = mailbox.mbox(mbox_file)
    g = mbparse.parse_mbox_fragment(mailbox, dictionary)
    mailbox.close()
    gg = g.copy()
    #if crypto_key:
    #    for node in g.nodes():
    #        email = re.search(r'[\w\.-]+@[\w\.-]+',
    #                          node).group(0)
    #        h = SHA256.new(crypto_key)
    #        h.update(str(email))
    #        g = nx.relabel_nodes(g, {str(node): h.hexdigest()}, copy=False)
    ggg = nx.convert_node_labels_to_integers(g)
    import code
    code.interact(local=locals())
    c_g = mbparse.get_communities(g)
    c_ggg = mbparse.get_communities(ggg)
    c_g_s = [
        len(x) for x in