import sys, os import pandas as pd from gchat_eml import Gchat, locate_eml work_dir = sys.argv[1] corpus_folder = sys.argv[2] files = locate_eml(work_dir) if corpus_folder[-1] != '/': corpus_folder += '/' print corpus_folder l = len(files) / 5 for (ii, f) in enumerate(files): try: gc = Gchat(f) gc.corpus_writer_yearly(corpus_folder + gc.msg_from_address) except: print 'uwotmate' pass if ii % l == 0 and ii != 0: print ('%d percent finished..' % ((ii // l) * 20) )