def execute(): data_files = get_files_in_dir(TEMP_PATH, JSON) l = len(data_files) print 'Started Preprocessing ' + str(l) + ' files... ' start_timing() cnt = 0 percent_interval = 1 # increment for the completion percent display display_percentage(cnt, l, percent_interval) for data_file in data_files: data_file_path = join(TEMP_PATH, data_file) tweets_data = extract_data(data_file_path) processed_tweets = process(tweets_data) insert_many(collection, processed_tweets) remove(data_file_path) # updating completion status cnt += 1 display_percentage(cnt, l, percent_interval) client.close() print print 'Finished' stop_timing()
def remove_previous_data(): tsv_files = get_files_in_dir(TSV_DIR_PATH, TSV) for tsv_file in tsv_files: os.remove(join(TSV_DIR_PATH, tsv_file))