def main(): # select operations tmp=raw_input('Select 1 or 2 to continue: \n'+ '[1] Generate feature 1 and 2 using single thread\n' + '[2] Generate feature 1 and 2 using multiple thread (for large data size)\n' '[3] Clear all data (DANGEROUS !!!!)\n'+ 'P.S. I assume the new data will be appended into the end of tweets.txt. It will not take into account changes in previous tweets in tweets.txt\n') # for generating features if tmp=='1': f_manager=feature_manager(s_para) f_manager.calculate_feature(f_para) # output features output_feature1(f_manager.word_storage) output_feature2(f_manager.median_arr) elif tmp=='2': m_worker=multi_worker(s_para) m_worker.multi_calculate_feature(f_para) # output features output_feature1(m_worker.total_word_storage) output_feature2(m_worker.total_median_arr) # clean temporary file clean_tmp_file() # for delete all features elif tmp == '3': confirm = raw_input('[WARNING] It will deleted all data. Are you sure that you want to continue(y/n)??') if confirm.lower() == 'y': f_manager=feature_manager(s_para) f_manager.__delete_all__() clean_tmp_file() print 'data are deleted'
def __init__(self,s_para): self.main_f_manager=feature_manager(s_para) self.last_pos=self.main_f_manager.get_last_pos() self.total_storage_name=s_para['storage_filename'] self.par_f_manager={} self.total_word_storage=self.main_f_manager.word_storage self.total_median_arr=self.main_f_manager.median_arr self.total_indiv_word_arr=self.main_f_manager.indiv_word_arr self.total_unsorted_indiv_word_arr=self.main_f_manager.unsorted_indiv_word_arr
def process_data_par(self): logging.info ('start to calculate feature in parallel') s_para={} for i in range(0,self.num_of_file): tmpstr=(str(i).zfill(self.suffix_length)) storage_file_p='data_storage_%s.p' %tmpstr s_para['storage_filename']=os.path.join(storage_dir_p,storage_file_p) s_para['worker_no']='worker %d' %i self.par_f_manager[tmpstr]=feature_manager(s_para) # process data in parallel p = ThreadPool(num_of_worker) l=range(0,self.num_of_file) result=p.map(self.let_worker_run,l) p.close() p.join()