示例#1
0
def weekly2all(data_overall_processed_file, data_overall_raw_file, data_weekly_json, data_weekly_raw_file):
    parse_json_2_csv.json2csv(data_weekly_json, data_weekly_raw_file)
    data_weekly_raw = pd.read_csv(data_weekly_raw_file, encoding ='utf8')
    data_overall_raw = pd.read_csv(data_overall_raw_file, encoding = 'utf8')
    data_raw = process_overlap(data_overall_raw, data_weekly_raw)
    print(data_raw.shape)   
    data_raw = pd.DataFrame(data_raw.drop_duplicates().values, columns = data_raw.columns)
    print(data_raw.shape)
    data_processed = preprocess_data(data_raw)        
                 
    data_raw.to_csv(data_overall_raw_file, index = False, encoding='utf8')
    data_processed.to_csv(data_overall_processed_file, index = False, encoding = 'utf8')
示例#2
0
def main_old():
    # add weekly data to the current overall data
    #     weekly2all('teacher.csv','raw/teacher_raw.csv','data/anonymous-teacher-events_1-12.json', 'raw/teacher_1-12_raw.csv')
    #     weekly2all('student.csv','raw/student_raw.csv','data/anonymous-student-events_1-12.json', 'raw/student_1-12_raw.csv')

    # pre_process all data
    # teacher
    parse_json_2_csv.json2csv('data/anonymous-teacher-events.json', 'raw/teacher_raw.csv')
    data_raw = pd.read_csv('raw/teacher_raw.csv')
    data = preprocess_data(data_raw)
    data.to_csv('teacher.csv', index=False, encoding = 'utf8')
    # student
    parse_json_2_csv.json2csv('data/anonymous-student-events.json', 'raw/student_raw.csv')
    data_raw = pd.read_csv('raw/student_raw.csv')    
    data = preprocess_data(data_raw)
    data.to_csv('student.csv', index=False, encoding = 'utf8')