示例#1
0
def execute():
    data_files = get_files_in_dir(TEMP_PATH, JSON)
    l = len(data_files)
    print 'Started Preprocessing ' + str(l) + ' files... '
    start_timing()

    cnt = 0
    percent_interval = 1  # increment for the completion percent display
    display_percentage(cnt, l, percent_interval)

    for data_file in data_files:
        data_file_path = join(TEMP_PATH, data_file)
        tweets_data = extract_data(data_file_path)
        processed_tweets = process(tweets_data)

        insert_many(collection, processed_tweets)
        remove(data_file_path)

        # updating completion status
        cnt += 1
        display_percentage(cnt, l, percent_interval)

    client.close()
    print
    print 'Finished'

    stop_timing()
示例#2
0
def execute():
    data_files = get_files_in_dir(TEMP_PATH, JSON)
    l = len(data_files)
    print 'Started Preprocessing ' + str(l) + ' files... '
    start_timing()

    cnt = 0
    percent_interval = 1  # increment for the completion percent display
    display_percentage(cnt, l, percent_interval)

    for data_file in data_files:
        data_file_path = join(TEMP_PATH, data_file)
        tweets_data = extract_data(data_file_path)
        processed_tweets = process(tweets_data)

        insert_many(collection, processed_tweets)
        remove(data_file_path)

        # updating completion status
        cnt += 1
        display_percentage(cnt, l, percent_interval)

    client.close()
    print
    print 'Finished'

    stop_timing()
示例#3
0
def remove_previous_data():
    tsv_files = get_files_in_dir(TSV_DIR_PATH, TSV)
    for tsv_file in tsv_files:
        os.remove(join(TSV_DIR_PATH, tsv_file))
def remove_previous_data():
    tsv_files = get_files_in_dir(TSV_DIR_PATH, TSV)
    for tsv_file in tsv_files:
        os.remove(join(TSV_DIR_PATH, tsv_file))