Exemplo n.º 1
0
def check_email():
# set script to run at intervals (time.sleep?) - morn, mid-day, afternoon

    current_time = datetime.now()
    previous_check = cpm.unpickle(os.path.join(pkl_dir, 'last_check_time.pkl'))
    print previous_check
    emails = cpm.get_emails('INBOX', previous_check)

    if not emails: # avoid unpickling if empty inbox
        return

    feature_model = cpm.unpickle(os.path.join(pkl_dir, 'final_vec.pkl'))

    classifier_model = cpm.unpickle(os.path.join(pkl_dir, 'final_model.pkl')) 
    
    print 'classifier model', classifier_model

    for email in emails:
        if email.sent_at > previous_check:
            clean_b = cpm.clean_raw_txt(email.body)
            clean_s = cpm.clean_raw_txt(email.subject)
            print 'clean_email', clean_s, clean_b
            email_features = feature_model.transform([clean_b+clean_s])
            print 'email_bag', email_features.shape
            classifier_result = classifier_model.predict(email_features)
            
            eval_email(classifier_result, email)

    cpm.pickle(current_time, os.path.join(pkl_dir,'last_check_time.pkl'))
Exemplo n.º 2
0
def store_email(email, box, email_owner):
    # Use nvarchar in case storing non-english data
    target, starred = False, False

    # TO DO - make table name a variable
    store_e = '''
        insert into raw_data_2 
        (message_id, thread_id, to_email, from_email, cc, date, starred, subject, body, sub_body, email_owner, box, target)
        values (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s);
    '''
    # in psycopg - var placeholder must be %s even with int or dates or other types

    # prep text for storage
    body = cpm.clean_raw_txt(email.body)
    subject = cpm.clean_raw_txt(email.subject)
    sub_body = subject + ' ' + body

    # Marks taget colum
    if 'Jeeves' in email.labels and '\\Starred' in email.labels:
        target = starred = True
    elif '\\Starred' in email.labels:
        starred = True

    # Could apply executemany with query and list of values but still need to do fetch first and apply changes
    with connect_db() as db:
        try:
            db.execute(store_e, (email.message_id, email.thread_id, email.to, email.fr, email.cc, email.sent_at, starred, subject, body, sub_body, email_owner, box, target))
        except psycopg2.IntegrityError:
        # if exists then skip loading it
            with open('../not_needed/load_errors.txt', 'a') as f:
                print "Problem loading email"
                f.write(email.message_id, email_fr, body)