def _data_clean_core(types, day, hour, minute, types_choice): """ :param types: :param day: :param hour: :param minute: :param types_choice: :return: """ try: data = rd.read_txt(day, hour, minute) # print(data) types_error = eval(types_choice['search']) # print(types_error) if not types_error.empty: types_error['txt_name'] = rd.txt_name(day, hour, minute) types_error.to_csv('%s/%s_data.txt' % (path_error_data, types), mode='a', header=0, index=0, sep="|") eval(types_choice['drop']) data.to_csv(rd.path_name(day, hour, minute), header=0, index=0, sep="|") del data except Exception as result: file = open('./log/%s_log.txt' % types, 'a') file.write('%s,%s,%s\n' % (time0, rd.txt_name(day, hour, minute), result)) file.close()
def _data_reduce_core(day, hour, minute, pt_name02): """ 数据精简,去掉无用字段 :param day: :param hour: :param minute: :param pt_name02: :return: """ try: data = rd.read_txt(day, hour, minute) data.drop([ 'control', 'police', 'viaduct', 'brake', 'P1', 'direction', 'numS', 'P2' ], axis=1, inplace=True, errors='ignore') data.to_csv(pt_name02, header=0, index=0, sep="|") except Exception as result: file = open('./log/reduce_log.txt', 'a') file.write('%s :%s\n' % (rd.txt_name(day, hour, minute), result)) file.close() print('%s :%s' % (rd.txt_name(day, hour, minute), result))
def _drift_delete(df0): """ :return: """ df1 = fo.error_data_path(df0['txt_name']) df2 = rd.read_txt(df1.day, df1.hour, df1.minute) df2['index_1'] = df2.index df3 = df2[~df2.index_1.isin(df0['index_1'])] df3.drop(['index_1'], axis=1, inplace=True) df3.to_csv(rd.path_name(df1.day, df1.hour, df1.minute), header=0, index=0, sep="|")
def _drift_error(day, hour, minute, step_size): """ :param day: :param hour: :param minute: :param step_size: :return: """ count = 0 data = pd.DataFrame() for x in range(day, max_day): if x == 17: continue for y in range(hour, max_hour): for z in range(minute, max_minute): try: data_0 = rd.read_txt(x, y, z) data_1 = data_0.drop([ 'control', 'police', 'empty', 'state', 'viaduct', 'brake', 'P1', 'receipt_time', 'speed', 'direction', 'numS', 'P2' ], axis=1).copy() del data_0 data_1['index_0'] = data_1.index data_1['txt_time'] = rd.txt_name(x, y, z) count = count + 1 if count == 1: data = data_1 else: data = data.append(data_1, ignore_index=True) if count == step_size: data['gps_time'] = pd.to_datetime( data['gps_time'], format='%Y-%m-%d %H:%M:%S') _drift_error_core(data) count = 0 del data # exit() except Exception as result: file = open('./log/drift_log.txt', 'a') file.write('%s,%s\n' % (time0, result)) file.close()
def data_error_delete(types): """ :param types: :return: """ types_choice = {'search': types_search[types], 'drop': types_drop[types]} df1 = fo.error_data_path(types) # print(df1) for y in df1.index: x = df1.loc[y] data = rd.read_txt(day=x.day, hour=x.hour, minute=x.minute) types_error = eval(types_choice['search']) print(types_error) if not types_error.empty: eval(types_choice['drop']) data.to_csv(rd.path_name(day=x.day, hour=x.hour, minute=x.minute), header=0, index=0, sep="|") del data
def demand(day=1, hour=0, minute=0): for x in range(day, max_day): if x == 17: continue for y in range(hour, max_hour): print('完成:%02d' % y) data = pd.DataFrame() for z in range(minute, max_minute): try: da0 = rd.read_txt(x, y, z, types=1) data = data.append(da0, ignore_index=True) except Exception as result: print(result) continue data.drop(['state', 'receipt_time', 'speed'], axis=1, inplace=True) data.drop_duplicates(['id', 'gps_time'], inplace=True) data.sort_values('gps_time', inplace=True) da = data.groupby('id').apply(_demand_state) # print(da) da.to_csv('./data/demand_data_0/%02d.csv' % x, mode='a', index=0, header=0) del da