def main(): # load data print("loading data...") ts = time.time() datapath = os.path.join(Paramater.DATAPATH, "2016", month) if is_mmn: fname = os.path.join(datapath, 'CACHE', 'TaxiBJ_C{}_P{}_T{}_{}_mmn_speed.h5'.format(len_closeness, len_period, len_trend, "External" if hasExternal else "noExternal")) else: fname = os.path.join(datapath, 'CACHE', 'TaxiBJ_C{}_P{}_T{}_{}_speed.h5'.format(len_closeness, len_period, len_trend, "External" if hasExternal else "noExternal")) pkl = fname + '.preprocessing_speed.pkl' fn = "48_48_20_LinearInterpolationFixed" if os.path.exists(fname) and CACHEDATA: X_train, Y_train, X_test, Y_test, mmn, external_dim, \ timestamp_train, timestamp_test, noConditionRegions, x_num, y_num, z_num = read_cache(fname, is_mmn, pkl) print("load %s successfully" % fname) else: datapaths = [os.path.join(datapath, fn)] noConditionRegionsPath = os.path.join(datapath, "48_48_20_noSpeedRegion_0.05") X_train, Y_train, X_test, Y_test, mmn, external_dim, timestamp_train, timestamp_test, noConditionRegions, \ x_num, y_num, z_num = Data.loadDataFromRaw( paths=datapaths, noSpeedRegionPath=noConditionRegionsPath, nb_flow=nb_flow, len_closeness=len_closeness, len_period=len_period, len_trend=len_trend , len_test=len_test, maxMinNormalization=is_mmn, preprocess_name=pkl, meta_data=hasExternal, meteorol_data=hasExternal, holiday_data=hasExternal, isComplete=False) if CACHEDATA: cache(fname, X_train, Y_train, X_test, Y_test, external_dim, timestamp_train, timestamp_test, noConditionRegions, is_mmn, x_num, y_num, nb_flow) z_num = nb_flow # print("\n days (test): ", [v[:8] for v in timestamp_test[0::72]]) print("\nelapsed time (loading data): %.3f seconds\n" % (time.time() - ts)) print('=' * 10) print("compiling model_train...") print( "**at the first time, it takes a few minites to compile if you use [Theano] as the backend**") ts = time.time() X_train, Y_train = Data.getSequenceXY(X_train, Y_train, step) Y_train_final = Y_train[:, -1] X_test, Y_test = Data.getSequenceXY(X_test, Y_test, step) Y_test_final = Y_test[:, -1] X_train.append(Y_train) X_test.append(Y_test) timestamp_train = timestamp_train[step - 1:] timestamp_test = timestamp_test[step - 1:] if use_diff_test: X_test_old = X_test Y_test_old = Y_test import pandas as pd df_diff = pd.read_csv("./data/2016/all/" + fn + "_diff.csv", index_col=0) # 大于200 有335个作为test test_time = df_diff[df_diff["diff"] > 200]["time"].values timestamp_train_dict = dict(zip(timestamp_train, range(len(timestamp_train)))) timestamp_test_dict = dict(zip(timestamp_test, range(len(timestamp_test)))) new_X_test = [] new_Y_test = [] if isinstance(X_train, list): for _ in range(len(X_train)): new_X_test.append([]) for _test_time in test_time: _test_time = str(_test_time) if (_test_time in timestamp_train_dict): index = timestamp_train_dict[_test_time] if isinstance(X_train, list): for i in range(len(X_train)): new_X_test[i].append(X_train[i][index]) else: new_X_test.append(X_train[index]) new_Y_test.append(Y_train[index]) if (_test_time in timestamp_test_dict): index = timestamp_test_dict[_test_time] if isinstance(X_test_old, list): for i in range(len(X_test_old)): new_X_test[i].append(X_test_old[i][index]) else: new_X_test.append(X_test_old[index]) new_Y_test.append(Y_test_old[index]) # if (_test_time not in timestamp_train_dict and _test_time not in timestamp_test_dict): # print(_test_time) if isinstance(new_X_test, list): for i in range(len(new_X_test)): new_X_test[i] = np.stack(new_X_test[i], axis=0) else: new_X_test = np.stack(new_X_test, axis=0) new_Y_test = np.stack(new_Y_test, axis=0) # if isinstance(new_X_test, list): # for i in range(len(new_X_test)): # print(new_X_test[i].shape) # else: # print(new_X_test.shape) # print(new_Y_test.shape) X_test = new_X_test Y_test = new_Y_test Y_test_final = Y_test[:, -1] # print "X_test len:", len(X_test) # for x in X_test: # print x.shape # print Y_test.shape # print z_num, x_num, y_num print "start build model_train" outputs = [] inputs = [] resUnit_share_layers = [] resUnit_share_layers2 = [] resUnit_share_layers3 = [] shared_conv1 = Convolution2D(filters=64, kernel_size=(3, 3), padding="same") shared_conv2 = Convolution2D(nb_filter=nb_flow, nb_row=3, nb_col=3, border_mode="same") shared_conv3 = Convolution2D(filters=64, kernel_size=(3, 3), padding="same") shared_conv4 = Convolution2D(nb_filter=nb_flow, nb_row=3, nb_col=3, border_mode="same") shared_conv5 = Convolution2D(filters=64, kernel_size=(3, 3), padding="same") shared_conv6 = Convolution2D(nb_filter=nb_flow, nb_row=3, nb_col=3, border_mode="same") shared_convLSTM_period = ConvLSTM2D(nb_filter=32, nb_row=3, nb_col=3, border_mode="same") shared_conv_period = Convolution2D(nb_filter=nb_flow, nb_row=3, nb_col=3, border_mode="same") shared_convLSTM_trend = ConvLSTM2D(nb_filter=32, nb_row=3, nb_col=3, border_mode="same") shared_conv_trend = Convolution2D(nb_filter=nb_flow, nb_row=3, nb_col=3, border_mode="same") shared_ilayers = [] shared_embeding = Dense(output_dim=10) shared_embeding2 = Dense(output_dim=nb_flow * x_num * y_num) assert l < step for _ in range(step): main_outputs = [] if len_closeness > 0: input = Input(shape=(nb_flow * len_closeness, x_num, y_num)) inputs.append(input) # Conv1 conv1 = shared_conv1(input) # [nb_residual_unit] Residual Units resUnit_share_index = [0] residual_output = ResUnits(_residual_unit, nb_filter=64, repetations=nb_residual_unit, share=True, shareIndex=resUnit_share_index, shares=resUnit_share_layers)(conv1) # Conv2 activation = Activation('relu')(residual_output) conv2 = shared_conv2(activation) main_outputs.append(conv2) # input = Input(shape=(nb_flow * len_closeness, x_num, y_num)) # inputs.append(input) # # conv1 = Convolution2D(nb_filter=64, nb_row=3, nb_col=3, border_mode="same")(input) # # act1 = Activation("relu")(conv1) # reshape = Reshape((len_closeness, nb_flow, x_num, y_num))(input) # convLSTM = ConvLSTM2D(nb_filter=32, nb_row=3, nb_col=3, border_mode="same")(reshape) # act2 = Activation("relu")(convLSTM) # conv2 = Convolution2D(nb_filter=nb_flow, nb_row=3, nb_col=3, border_mode="same")(act2) # main_outputs.append(conv2) if len_period > 0: input = Input(shape=(nb_flow * len_period, x_num, y_num)) inputs.append(input) # Conv1 conv1 = shared_conv3(input) # [nb_residual_unit] Residual Units resUnit_share_index = [0] residual_output = ResUnits(_residual_unit, nb_filter=64, repetations=nb_residual_unit, share=True, shareIndex=resUnit_share_index, shares=resUnit_share_layers2)(conv1) # Conv2 activation = Activation('relu')(residual_output) conv2 = shared_conv4(activation) main_outputs.append(conv2) # input = Input(shape=(nb_flow * len_period, x_num, y_num)) # inputs.append(input) # # conv1 = Convolution2D(nb_filter=64, nb_row=3, nb_col=3, border_mode="same")(input) # # act1 = Activation("relu")(conv1) # input = Reshape((len_period, nb_flow, x_num, y_num))(input) # convLSTM = shared_convLSTM_period(input) # act2 = Activation("relu")(convLSTM) # conv2 = shared_conv_period(act2) # main_outputs.append(conv2) if len_trend > 0: input = Input(shape=(nb_flow * len_trend, x_num, y_num)) inputs.append(input) # Conv1 conv1 = shared_conv5(input) # [nb_residual_unit] Residual Units resUnit_share_index = [0] residual_output = ResUnits(_residual_unit, nb_filter=64, repetations=nb_residual_unit, share=True, shareIndex=resUnit_share_index, shares=resUnit_share_layers3)(conv1) # Conv2 activation = Activation('relu')(residual_output) conv2 = shared_conv6(activation) main_outputs.append(conv2) # input = Input(shape=(nb_flow * len_trend, x_num, y_num)) # inputs.append(input) # # conv1 = Convolution2D(nb_filter=64, nb_row=3, nb_col=3, border_mode="same")(input) # # act1 = Activation("relu")(conv1) # reshape = Reshape((len_trend, nb_flow, x_num, y_num))(input) # convLSTM = shared_convLSTM_trend(reshape) # act2 = Activation("relu")(convLSTM) # conv2 = shared_conv_trend(act2) # main_outputs.append(conv2) if len(main_outputs) == 1: main_output = main_outputs[0] else: new_outputs = [] for index, output in enumerate(main_outputs): if (len(shared_ilayers) <= index): shared_ilayers.append(iLayer()) new_outputs.append(shared_ilayers[index](output)) main_output = merge(new_outputs, mode='sum') if external_dim != None and external_dim > 0: # external input external_input = Input(shape=(external_dim,)) inputs.append(external_input) embedding = shared_embeding(external_input) embedding = Activation('relu')(embedding) h1 = shared_embeding2(embedding) activation = Activation('relu')(h1) external_output = Reshape((nb_flow, x_num, y_num))(activation) main_output = merge([main_output, external_output], mode='sum') main_output = Activation('tanh')(main_output) outputs.append(main_output) main_output = merge(outputs, mode="concat", concat_axis=1) predict_sequence = Reshape((step, z_num, x_num, y_num))(main_output) input_targets = Input(shape=(step, z_num, x_num, y_num), name="input_targets") inputs.append(input_targets) main_output = eRNN(error_hidden_dim, (z_num, x_num, y_num), l, False)([predict_sequence, input_targets]) model_train = Model(inputs=inputs, outputs=[predict_sequence, main_output]) adam = Adam(lr=lr) model_train.compile(loss=['mse', 'mse'], loss_weights=[0.2, 1], optimizer=adam, metrics=[metrics.rmse]) # model_train.compile(loss=lambda y_true,y_preiod: K.mean(K.square(y_preiod - y_true), axis=-1), optimizer=adam, metrics=[metrics.rmse]) # model_predict = Model(input=inputs, output=main_output) # model_predict.compile(optimizer=adam,loss="mse",metrics=metrics.rmse) model_train.summary() print "finish build model_train" hyperparams_name = 'testMyModel3_speed.c{}.p{}.t{}.resunit{}.lr{}.{}.{}'.format( len_closeness, len_period, len_trend, nb_residual_unit, lr, "External" if hasExternal else "noExternal", "MMN" if is_mmn else "noMMN") fname_param = os.path.join(path_model, '{}.best.h5'.format(hyperparams_name)) early_stopping = EarlyStopping(monitor='val_e_rnn_1_rmse', patience=4, mode='min') model_checkpoint = ModelCheckpoint(fname_param, monitor='val_e_rnn_1_rmse', verbose=0, save_best_only=True, mode='min', save_weights_only=True) print("\nelapsed time (compiling model_train): %.3f seconds\n" % (time.time() - ts)) print('=' * 10) print("training model_train...") ts = time.time() history = model_train.fit(X_train, [Y_train, Y_train_final], epochs=nb_epoch, batch_size=batch_size, validation_split=0.1, callbacks=[early_stopping, model_checkpoint], verbose=1) model_train.save_weights(os.path.join( path_model, '{}.h5'.format(hyperparams_name)), overwrite=True) pickle.dump((history.history), open(os.path.join( path_result, '{}.history.pkl'.format(hyperparams_name)), 'wb')) print("\nelapsed time (training): %.3f seconds\n" % (time.time() - ts)) print('=' * 10) print('evaluating using the model_train that has the best loss on the valid set') ts = time.time() model_train.load_weights(fname_param) score = model_train.evaluate(X_train, [Y_train, Y_train_final], batch_size=Y_train.shape[0] // 48, verbose=0) if is_mmn: print('Train score: %.6f rmse (norm): %.6f rmse (real): %.6f' % (score[0], score[1], score[1] * (mmn._max - mmn._min) / 2.)) else: print('Train score: %.6f rmse (real): %.6f' % (score[0], score[1])) score = model_train.evaluate(X_test, [Y_test, Y_test_final], batch_size=Y_test.shape[0] // 12, verbose=0) if is_mmn: print('Test score: %.6f rmse (norm): %.6f rmse (real): %.6f' % (score[0], score[1], score[1] * (mmn._max - mmn._min) / 2.)) else: print('Test score: %.6f rmse (real): %.6f' % (score[0], score[1])) if not is_mmn: predict = model_train.predict(X_test)[1] else: predict = mmn.inverse_transform(model_train.predict(X_test)[1]) Y_test_final = mmn.inverse_transform(Y_test_final) # predict = predict[:, -1] # Y_test = Y_test[:, -1] # print("predict", predict) # print("test", Y_test_final) rmse = round(Metric.RMSE(predict, Y_test_final, noConditionRegions), 5) save_result(predict, Y_test_final, timestamp_test, "./result/{}_predict_rmse{}".format(hyperparams_name, str(rmse))) print("RMSE:", rmse) # print("accuracy", Metric.accuracy(predict, Y_test, noConditionRegions)) print("\nelapsed time (eval): %.3f seconds\n" % (time.time() - ts)) exit(1)
def main(): # load data print("loading data...") ts = time.time() if is_mmn: fname = os.path.join( Paramater.DATAPATH, 'CACHE', 'TaxiBJ_C{}_P{}_T{}_{}_mmn.h5'.format( len_closeness, len_period, len_trend, "External" if hasExternal else "noExternal")) else: fname = os.path.join( Paramater.DATAPATH, 'CACHE', 'TaxiBJ_C{}_P{}_T{}_{}.h5'.format( len_closeness, len_period, len_trend, "External" if hasExternal else "noExternal")) x_num = y_num = 48 if os.path.exists(fname) and CACHEDATA: X_train, Y_train, X_test, Y_test, mmn, external_dim, \ timestamp_train, timestamp_test, noConditionRegions, x_num, y_num, z_num = read_cache(fname, is_mmn) print("load %s successfully" % fname) else: datapaths = [ Paramater.DATAPATH + "48_48_20_LinearInterpolationFixed_condition" ] noConditionRegionsPath = Paramater.PROJECTPATH + "data/48_48_20_noSpeedRegion_0.05" X_train, Y_train, X_test, Y_test, mmn, external_dim, timestamp_train, timestamp_test, noConditionRegions, x_num, y_num, z_num = Data.loadDataFromRaw( paths=datapaths, noSpeedRegionPath=noConditionRegionsPath, nb_flow=nb_flow, len_closeness=len_closeness, len_period=len_period, len_trend=len_trend, len_test=len_test, maxMinNormalization=is_mmn, preprocess_name='preprocessing.pkl', meta_data=hasExternal, meteorol_data=hasExternal, holiday_data=hasExternal) if CACHEDATA: cache(fname, X_train, Y_train, X_test, Y_test, external_dim, timestamp_train, timestamp_test, noConditionRegions, is_mmn, x_num, y_num, Paramater.Z_NUM) # print("\n days (test): ", [v[:8] for v in timestamp_test[0::72]]) print("\nelapsed time (loading data): %.3f seconds\n" % (time.time() - ts)) print('=' * 10) print("compiling model...") print( "**at the first time, it takes a few minites to compile if you use [Theano] as the backend**" ) ts = time.time() model = build_model(external_dim, x_num=x_num, y_num=y_num) hyperparams_name = 'c{}.p{}.t{}.resunit{}.lr{}.{}.{}'.format( len_closeness, len_period, len_trend, nb_residual_unit, lr, "External" if hasExternal else "noExternal", "MMN" if is_mmn else "noMMN") fname_param = os.path.join(path_model, '{}.best.h5'.format(hyperparams_name)) early_stopping = EarlyStopping(monitor='val_rmse', patience=2, mode='min') model_checkpoint = ModelCheckpoint(fname_param, monitor='val_rmse', verbose=0, save_best_only=True, mode='min') print("\nelapsed time (compiling model): %.3f seconds\n" % (time.time() - ts)) print('=' * 10) print("training model...") ts = time.time() history = model.fit(X_train, Y_train, nb_epoch=nb_epoch, batch_size=batch_size, validation_split=0.1, callbacks=[early_stopping, model_checkpoint], verbose=1) model.save_weights(os.path.join(path_model, '{}.h5'.format(hyperparams_name)), overwrite=True) pickle.dump((history.history), open( os.path.join(path_result, '{}.history.pkl'.format(hyperparams_name)), 'wb')) print("\nelapsed time (training): %.3f seconds\n" % (time.time() - ts)) print('=' * 10) print('evaluating using the model that has the best loss on the valid set') ts = time.time() model.load_weights(fname_param) score = model.evaluate(X_train, Y_train, batch_size=Y_train.shape[0] // 48, verbose=0) if mmn is not None: print('Train score: %.6f rmse (norm): %.6f rmse (real): %.6f' % (score[0], score[1], score[1] * (mmn._max - mmn._min) / 2.)) else: print('Train score: %.6f rmse (real): %.6f' % (score[0], score[1])) score = model.evaluate(X_test, Y_test, batch_size=Y_test.shape[0], verbose=0) if mmn is not None: print('Test score: %.6f rmse (norm): %.6f rmse (real): %.6f' % (score[0], score[1], score[1] * (mmn._max - mmn._min) / 2.)) else: print('Test score: %.6f rmse (real): %.6f' % (score[0], score[1])) if not is_mmn: predict = matrixsRounding(model.predict(X_test)) else: predict = matrixsRounding(mmn.inverse_transform(model.predict(X_test))) print("RMSE:", Metric.RMSE(predict, Y_test, noConditionRegions)) print("accuracy", Metric.accuracy(predict, Y_test, noConditionRegions)) print("\nelapsed time (eval): %.3f seconds\n" % (time.time() - ts)) print('=' * 10) print("training model (cont)...") ts = time.time() fname_param = os.path.join(path_model, '{}.cont.best.h5'.format(hyperparams_name)) model_checkpoint = ModelCheckpoint(fname_param, monitor='rmse', verbose=0, save_best_only=True, mode='min') history = model.fit(X_train, Y_train, nb_epoch=nb_epoch_cont, verbose=2, batch_size=batch_size, callbacks=[model_checkpoint]) pickle.dump( (history.history), open( os.path.join(path_result, '{}.cont.history.pkl'.format(hyperparams_name)), 'wb')) model.save_weights(os.path.join(path_model, '{}_cont.h5'.format(hyperparams_name)), overwrite=True) print("\nelapsed time (training cont): %.3f seconds\n" % (time.time() - ts)) print('=' * 10) print('evaluating using the final model') score = model.evaluate(X_train, Y_train, batch_size=Y_train.shape[0] // 48, verbose=0) if (mmn is not None): print('Train score: %.6f rmse (norm): %.6f rmse (real): %.6f' % (score[0], score[1], score[1] * (mmn._max - mmn._min) / 2.)) else: print('Train score: %.6f rmse (real): %.6f' % (score[0], score[1])) ts = time.time() score = model.evaluate(X_test, Y_test, batch_size=Y_test.shape[0], verbose=0) if mmn is not None: print('Test score: %.6f rmse (norm): %.6f rmse (real): %.6f' % (score[0], score[1], score[1] * (mmn._max - mmn._min) / 2.)) else: print('Test score: %.6f rmse (real): %.6f' % (score[0], score[1])) if not is_mmn: predict = matrixsRounding(model.predict(X_test)) else: predict = matrixsRounding(mmn.inverse_transform(model.predict(X_test))) print("RMSE:", Metric.RMSE(predict, Y_test, noConditionRegions)) print("accuracy", Metric.accuracy(predict, Y_test, noConditionRegions)) print("\nelapsed time (eval cont): %.3f seconds\n" % (time.time() - ts))
def main(): all_result = [] # load data for _c in c_list: for _p in p_list: for _t in t_list: for _ex in ex_list: if _c == 0 and _p == 0 and _t == 0: continue len_period = _p len_closeness = _c len_trend = _t hasExternal = _ex print("loading data...") ts = time.time() datapath = os.path.join(Paramater.DATAPATH, "2016", "all") if is_mmn: fname = os.path.join( datapath, 'CACHE', 'TaxiBJ_C{}_P{}_T{}_{}_mmn_speed.h5'.format( len_closeness, len_period, len_trend, "External" if hasExternal else "noExternal")) else: fname = os.path.join( datapath, 'CACHE', 'TaxiBJ_C{}_P{}_T{}_{}_speed.h5'.format( len_closeness, len_period, len_trend, "External" if hasExternal else "noExternal")) x_num = y_num = 48 pkl = fname + '.preprocessing_speed.pkl' fn = "48_48_20_LinearInterpolationFixed" if os.path.exists(fname) and CACHEDATA: X_train, Y_train, X_test, Y_test, mmn, external_dim, \ timestamp_train, timestamp_test, noConditionRegions, x_num, y_num, z_num = read_cache(fname, is_mmn, pkl) print("load %s successfully" % fname) else: datapaths = [os.path.join(datapath, fn)] noConditionRegionsPath = os.path.join( datapath, "48_48_20_noSpeedRegion_0.05") X_train, Y_train, X_test, Y_test, mmn, external_dim, timestamp_train, timestamp_test, noConditionRegions, x_num, y_num, z_num = Data.loadDataFromRaw( paths=datapaths, noSpeedRegionPath=noConditionRegionsPath, nb_flow=nb_flow, len_closeness=len_closeness, len_period=len_period, len_trend=len_trend, len_test=len_test, maxMinNormalization=is_mmn, preprocess_name=pkl, meta_data=hasExternal, meteorol_data=hasExternal, holiday_data=hasExternal, isComplete=False) if CACHEDATA: cache(fname, X_train, Y_train, X_test, Y_test, external_dim, timestamp_train, timestamp_test, noConditionRegions, is_mmn, x_num, y_num, Paramater.Z_NUM) if use_diff_test: X_test_old = X_test Y_test_old = Y_test import pandas as pd df_diff = pd.read_csv("./data/2016/all/" + fn + "_diff.csv", index_col=0) # 大于200 有335个作为test test_time = df_diff[ df_diff["diff"] > 200]["time"].values timestamp_train_dict = dict( zip(timestamp_train, range(len(timestamp_train)))) timestamp_test_dict = dict( zip(timestamp_test, range(len(timestamp_test)))) new_X_test = [] new_Y_test = [] if isinstance(X_train, list): for _ in range(len(X_train)): new_X_test.append([]) for _test_time in test_time: _test_time = str(_test_time) if (_test_time in timestamp_train_dict): index = timestamp_train_dict[_test_time] if isinstance(X_train, list): for i in range(len(X_train)): new_X_test[i].append(X_train[i][index]) else: new_X_test.append(X_train[index]) new_Y_test.append(Y_train[index]) if (_test_time in timestamp_test_dict): index = timestamp_test_dict[_test_time] if isinstance(X_test_old, list): for i in range(len(X_test_old)): new_X_test[i].append( X_test_old[i][index]) else: new_X_test.append(X_test_old[index]) new_Y_test.append(Y_test_old[index]) # if (_test_time not in timestamp_train_dict and _test_time not in timestamp_test_dict): # print(_test_time) if isinstance(new_X_test, list): for i in range(len(new_X_test)): new_X_test[i] = np.stack(new_X_test[i], axis=0) else: new_X_test = np.stack(new_X_test, axis=0) new_Y_test = np.stack(new_Y_test, axis=0) # if isinstance(new_X_test, list): # for i in range(len(new_X_test)): # print(new_X_test[i].shape) # else: # print(new_X_test.shape) # print(new_Y_test.shape) X_test = new_X_test Y_test = new_Y_test # print("\n days (test): ", [v[:8] for v in timestamp_test[0::72]]) print("\nelapsed time (loading data): %.3f seconds\n" % (time.time() - ts)) print('=' * 10) print("compiling model...") print( "**at the first time, it takes a few minites to compile if you use [Theano] as the backend**" ) ts = time.time() model = build_model(external_dim, x_num, y_num, len_closeness, len_period, len_trend) hyperparams_name = 'speed.c{}.p{}.t{}.resunit{}.lr{}.{}.{}'.format( len_closeness, len_period, len_trend, nb_residual_unit, lr, "External" if hasExternal else "noExternal", "MMN" if is_mmn else "noMMN") fname_param = os.path.join( path_model, '{}.best.h5'.format(hyperparams_name)) early_stopping = EarlyStopping(monitor='val_rmse', patience=2, mode='min') model_checkpoint = ModelCheckpoint(fname_param, monitor='val_rmse', verbose=0, save_best_only=True, mode='min', save_weights_only=True) print("\nelapsed time (compiling model): %.3f seconds\n" % (time.time() - ts)) print('=' * 10) print("training model...") ts = time.time() history = model.fit( X_train, Y_train, nb_epoch=nb_epoch, batch_size=batch_size, validation_split=0.1, callbacks=[early_stopping, model_checkpoint], verbose=1) model.save_weights(os.path.join( path_model, '{}.h5'.format(hyperparams_name)), overwrite=True) pickle.dump( (history.history), open( os.path.join( path_result, '{}.history.pkl'.format(hyperparams_name)), 'wb')) print("\nelapsed time (training): %.3f seconds\n" % (time.time() - ts)) print('=' * 10) print( 'evaluating using the model that has the best loss on the valid set' ) ts = time.time() model.load_weights(fname_param) score = model.evaluate(X_train, Y_train, batch_size=Y_train.shape[0] // 48, verbose=0) if is_mmn: print( 'Train score: %.6f rmse (norm): %.6f rmse (real): %.6f' % (score[0], score[1], score[1] * (mmn._max - mmn._min) / 2.)) else: print('Train score: %.6f rmse (real): %.6f' % (score[0], score[1])) score = model.evaluate(X_test, Y_test, batch_size=Y_test.shape[0], verbose=0) if is_mmn: print( 'Test score: %.6f rmse (norm): %.6f rmse (real): %.6f' % (score[0], score[1], score[1] * (mmn._max - mmn._min) / 2.)) else: print('Test score: %.6f rmse (real): %.6f' % (score[0], score[1])) if not is_mmn: predict = model.predict(X_test) else: predict = mmn.inverse_transform(model.predict(X_test)) Y_test = mmn.inverse_transform(Y_test) # print("predict", predict) # print("test", Y_test) # print("RMSE:", Metric.RMSE(predict, Y_test, noConditionRegions)) # # print("accuracy", Metric.accuracy(predict, Y_test, noConditionRegions)) # # print("\nelapsed time (eval): %.3f seconds\n" % (time.time() - ts)) # # print('=' * 10) # print("training model (cont)...") # ts = time.time() # fname_param = os.path.join( # path_model, '{}.cont.best.h5'.format(hyperparams_name)) # model_checkpoint = ModelCheckpoint( # fname_param, monitor='rmse', verbose=0, save_best_only=True, mode='min') # history = model.fit(X_train, Y_train, nb_epoch=nb_epoch_cont, verbose=2, batch_size=batch_size, # callbacks=[ # model_checkpoint]) # pickle.dump((history.history), open(os.path.join( # path_result, '{}.cont.history.pkl'.format(hyperparams_name)), 'wb')) # model.save_weights(os.path.join( # path_model, '{}_cont.h5'.format(hyperparams_name)), overwrite=True) # print("\nelapsed time (training cont): %.3f seconds\n" % (time.time() - ts)) # # print('=' * 10) # print('evaluating using the final model') # score = model.evaluate(X_train, Y_train, batch_size=Y_train.shape[ # 0] // 48, verbose=0) # if (mmn is not None): # print('Train score: %.6f rmse (norm): %.6f rmse (real): %.6f' % # (score[0], score[1], score[1] * (mmn._max - mmn._min) / 2.)) # else: # print('Train score: %.6f rmse (real): %.6f' % # (score[0], score[1])) # ts = time.time() # score = model.evaluate( # X_test, Y_test, batch_size=Y_test.shape[0], verbose=0) # if mmn is not None: # print('Test score: %.6f rmse (norm): %.6f rmse (real): %.6f' % # (score[0], score[1], score[1] * (mmn._max - mmn._min) / 2.)) # else: # print('Test score: %.6f rmse (real): %.6f' % # (score[0], score[1])) # # if not is_mmn: # predict = model.predict(X_test) # else: # predict = mmn.inverse_transform(model.predict(X_test)) rmse = round( Metric.RMSE(predict, Y_test, noConditionRegions), 5) # np.save("./result/{}_predict_rmse{}".format(hyperparams_name, str(rmse)), # np.stack([predict, Y_test], axis=0)) save_result( predict, Y_test, timestamp_test, "./result/{}_predict_rmse{}".format( hyperparams_name, str(rmse))) print("RMSE:", rmse) # print("accuracy", Metric.accuracy(predict, Y_test, noConditionRegions)) all_result.append("{}c_{}p_{}t_{}External_{}rmse".format( len_closeness, len_period, len_trend, hasExternal, rmse)) print("\nelapsed time (eval cont): %.3f seconds\n" % (time.time() - ts)) for _v in all_result: print(_v)
@Create Date: 17-9-18, 09:56 @Description: @Update Date: 17-9-18, 09:56 """ import matplotlib matplotlib.use("Agg") import matplotlib.pyplot as plt import os import time from jampredict.utils.Cache import * from jampredict.utils import Paramater from jampredict.feature import Data import seaborn as ses if __name__ == '__main__': datas, times, x_num, y_num, interval, startTime, endTime, nospeed_regions = \ Data.loadRawData(os.path.join(Paramater.DATAPATH, "2016/all/48_48_20_LinearInterpolationFixed"), os.path.join(Paramater.DATAPATH, "48_48_20_noSpeedRegion_0.05"), False) x_index = 24 y_index = 24 datas = datas[:, 0] f, ax = plt.subplots(1, 1, figsize=(15, 7)) print datas[:500, x_index, y_index] ses.tsplot(datas[:500, x_index, y_index], ax=ax) plt.savefig(os.path.join(Paramater.PROJECTPATH, "fig/test2.jpg"))
def main(): # load data print("loading data...") ts = time.time() if is_mmn: fname = os.path.join( Paramater.DATAPATH, 'CACHE', 'TaxiBJ_C{}_P{}_T{}_{}_mmn.h5'.format( len_closeness, len_period, len_trend, "External" if hasExternal else "noExternal")) else: fname = os.path.join( Paramater.DATAPATH, 'CACHE', 'TaxiBJ_C{}_P{}_T{}_{}.h5'.format( len_closeness, len_period, len_trend, "External" if hasExternal else "noExternal")) x_num = y_num = 48 z_num = Paramater.Z_NUM if os.path.exists(fname) and CACHEDATA: X_train, Y_train, X_test, Y_test, mmn, external_dim, timestamp_train, timestamp_test, noConditionRegions, x_num, y_num, z_num = read_cache( fname, is_mmn) print("load %s successfully" % fname) else: datapaths = [ Paramater.DATAPATH + "48_48_20_LinearInterpolationFixed_condition" ] noConditionRegionsPath = Paramater.PROJECTPATH + "data/48_48_20_noSpeedRegion_0.05" X_train, Y_train, X_test, Y_test, mmn, external_dim, timestamp_train, timestamp_test, noConditionRegions, x_num, y_num, z_num = Data.loadDataFromRaw( paths=datapaths, noSpeedRegionPath=noConditionRegionsPath, nb_flow=nb_flow, len_closeness=len_closeness, len_period=len_period, len_trend=len_trend, len_test=len_test, preprocess_name='preprocessing.pkl', meta_data=hasExternal, meteorol_data=hasExternal, holiday_data=hasExternal) if CACHEDATA: cache(fname, X_train, Y_train, X_test, Y_test, external_dim, timestamp_train, timestamp_test, noConditionRegions, is_mmn, x_num, y_num, Paramater.Z_NUM) # print("\n days (test): ", [v[:8] for v in timestamp_test[0::72]]) print("\nelapsed time (loading data): %.3f seconds\n" % (time.time() - ts)) print('=' * 10) print("compiling model...") print( "**at the first time, it takes a few minites to compile if you use [Theano] as the backend**" ) ts = time.time() model = build_model(external_dim, x_num=x_num, y_num=y_num) model.load_weights( Paramater.PROJECTPATH + "/MODEL/c3.p1.t1.resunit6.lr0.0002.External.MMN.cont.best.h5") if not is_mmn: predict = matrixsRounding(model.predict(X_test)) else: predict = mmn.inverse_transform(model.predict(X_test)) # print(predict) predict = matrixsRounding(predict) # print(predict) print("RMSE:", Metric.RMSE(predict, Y_test, noConditionRegions)) print("accuracy", Metric.accuracy(predict, Y_test, noConditionRegions))
def main(): all_results = {} for _c in clossesness: len_closeness = _c print "closeness is", len_closeness # load data print("loading data...") ts = time.time() datapath = os.path.join(Paramater.DATAPATH, "2016", month) if is_mmn: fname = os.path.join(datapath, 'CACHE', 'TaxiBJ_C{}_P{}_T{}_{}_mmn_speed.h5'.format(len_closeness, len_period, len_trend, "External" if hasExternal else "noExternal")) else: fname = os.path.join(datapath, 'CACHE', 'TaxiBJ_C{}_P{}_T{}_{}_speed.h5'.format(len_closeness, len_period, len_trend, "External" if hasExternal else "noExternal")) x_num = y_num = 48 pkl = fname + '.preprocessing_speed.pkl' if os.path.exists(fname) and CACHEDATA: X_train, Y_train, X_test, Y_test, mmn, external_dim, \ timestamp_train, timestamp_test, noConditionRegions, \ x_num, y_num, z_num = read_cache(fname, is_mmn, pkl) print("load %s successfully" % fname) else: datapaths = [os.path.join(datapath, "48_48_20_MaxSpeedFillingFixed_5")] noConditionRegionsPath = os.path.join(datapath, "48_48_20_noSpeedRegion_0.05") X_train, Y_train, X_test, Y_test, \ mmn, external_dim, timestamp_train, \ timestamp_test, noConditionRegions, \ x_num, y_num, z_num = Data.loadDataFromRaw( paths=datapaths, noSpeedRegionPath=noConditionRegionsPath, nb_flow=nb_flow, len_closeness=len_closeness, len_period=len_period, len_trend=len_trend, len_test=len_test, maxMinNormalization=is_mmn, preprocess_name=pkl, meta_data=hasExternal, meteorol_data=hasExternal, holiday_data=hasExternal, isComplete=False) if CACHEDATA: cache(fname, X_train, Y_train, X_test, Y_test, external_dim, timestamp_train, timestamp_test, noConditionRegions, is_mmn, x_num, y_num, Paramater.Z_NUM) # print("\n days (test): ", [v[:8] for v in timestamp_test[0::72]]) print("\nelapsed time (loading data): %.3f seconds\n" % (time.time() - ts)) if isinstance(X_train, list): print "X_train len:", len(X_train) for i, _x_train in enumerate(X_train): print "x_train_{} shape:".format(i), _x_train.shape else: print "X_train shape:", X_train.shape print "Y_train shape:", Y_train.shape if isinstance(X_test, list): print "X_test len:", len(X_test) for i, _x_test in enumerate(X_test): print "x_test_{} shape:".format(i), _x_test.shape else: print "X_test shape:", X_test.shape print "Y_test shape:", Y_test.shape # if not use_CNN_model: # if (isinstance(X_train, list) and len(X_train) == 1): # X_train = X_train[0] # X_test = X_test[0] # X = np.vstack([X_train, X_test]) # Y = np.vstack([Y_train, Y_test]) # # print "X", X.shape # print "Y", Y.shape # X, Y = Data.getSequenceXY(X, Y, len_period) # Y = Y[:, -1] # print "after sequence:" # print "X", X.shape # print "Y", Y.shape # # X_train = X[:-800] # X_test = X[-800:] # Y_train = Y[:-800] # Y_test = Y[-800:] print('=' * 10) print("compiling model...") print("**at the first time, it takes a few minites to compile if you use [Theano] as the backend**") # predict = mmn.inverse_transform(average_method(X_test)) # Y_test = mmn.inverse_transform(Y_test) # # print("predict", predict) # # print("test", Y_test) # rmse = Metric.RMSE(predict, Y_test, noConditionRegions) # # results["avg_method"] = {"rmse": rmse} # print rmse # exit(1) results = {} if isinstance(X_test, list) and len(X_test) == 1: X_test = X_test[0] if isinstance(X_train, list) and len(X_train) == 1: X_train = X_train[0] if isinstance(Y_train, list) and len(Y_train) == 1: Y_train = Y_train[0] if isinstance(Y_test, list) and len(Y_test) == 1: Y_test = Y_test[0] X_test_copy = X_test.copy() X_train_copy = X_train.copy() Y_train_copy = Y_train.copy() Y_test_copy = Y_test.copy() for model_method, name in model_methods: X_test = X_test_copy.copy() X_train = X_train_copy.copy() Y_train = Y_train_copy.copy() Y_test = Y_test_copy.copy() print name result = {} results[name] = result ts = time.time() # print(X_train) print "start build model" # input = Input(shape=(nb_flow * len_period, x_num, y_num)) # reshape = Reshape((len_period, nb_flow, x_num, y_num))(input) # convLSTM = ConvLSTM2D(nb_filter=32, nb_row=3, nb_col=3, border_mode="same")(reshape) # act2 = Activation("relu")(convLSTM) # main_output = Convolution2D(nb_filter=nb_flow, nb_row=3, nb_col=3, border_mode="same")(act2) model = model_method(x_num, y_num, _c) adam = Adam(lr=lr) from keras.optimizers import SGD, RMSprop sgd = SGD(lr, clipvalue=0.01) rmsprop = RMSprop() model.compile(loss='mse', optimizer=adam, metrics=[metrics.rmse]) # model.summary() # exit(1) print "finish build model" result["build_time"] = time.time() - ts print("\nelapsed time (compiling model): %.3f seconds\n" % (time.time() - ts)) hyperparams_name = 'closenesstest_{}_speed.c{}.p{}.t{}.resunit{}.lr{}.{}.{}'.format(name, len_closeness, len_period, len_trend, nb_residual_unit, lr, "External" if hasExternal else "noExternal", "MMN" if is_mmn else "noMMN") fname_param = os.path.join(path_model, '{}.best.h5'.format(hyperparams_name)) early_stopping = EarlyStopping(monitor='val_rmse', patience=4, mode='min') model_checkpoint = ModelCheckpoint(fname_param, monitor='val_rmse', verbose=1, save_best_only=True, mode='min', save_weights_only=True) print('=' * 10) time.sleep(20) print("training model...") ts = time.time() history = model.fit(X_train, Y_train, nb_epoch=nb_epoch, batch_size=batch_size, validation_split=0.1, callbacks=[early_stopping, model_checkpoint], verbose=1) result["train_time"] = time.time() - ts print("\nelapsed time (training): %.3f seconds\n" % (time.time() - ts)) model.save_weights(os.path.join(path_model, '{}.h5'.format(hyperparams_name)), overwrite=True) pickle.dump((history.history), open(os.path.join( path_result, '{}.history.pkl'.format(hyperparams_name)), 'wb')) print('=' * 10) print('evaluating using the model that has the best loss on the valid set') ts = time.time() model.load_weights(fname_param) score = model.evaluate(X_train, Y_train, batch_size=Y_train.shape[0] // 48, verbose=0) if is_mmn: print('Train score: %.6f rmse (norm): %.6f rmse (real): %.6f' % (score[0], score[1], score[1] * (mmn._max - mmn._min) / 2.)) else: print('Train score: %.6f rmse (real): %.6f' % (score[0], score[1])) score = model.evaluate( X_test, Y_test, batch_size=Y_test.shape[0], verbose=0) if is_mmn: print('Test score: %.6f rmse (norm): %.6f rmse (real): %.6f' % (score[0], score[1], score[1] * (mmn._max - mmn._min) / 2.)) else: print('Test score: %.6f rmse (real): %.6f' % (score[0], score[1])) if not is_mmn: predict = model.predict(X_test) else: predict = mmn.inverse_transform(model.predict(X_test)) Y_test = mmn.inverse_transform(Y_test) # print("predict", predict) # print("test", Y_test) rmse = Metric.RMSE(predict, Y_test, noConditionRegions) save_result(predict, Y_test, timestamp_test, "./result/{}_predict_rmse{}".format(hyperparams_name, str(rmse))) result["rmse"] = rmse print("RMSE:", rmse) # print("accuracy", Metric.accuracy(predict, Y_test, noConditionRegions)) print("\nelapsed time (eval): %.3f seconds\n" % (time.time() - ts)) # X_test = X_test_copy.copy() # Y_test = Y_test_copy.copy() # average # predict = mmn.inverse_transform(average_method(X_test)) # Y_test = mmn.inverse_transform(Y_test) # print("predict", predict) # print("test", Y_test) # rmse = Metric.RMSE(predict, Y_test, noConditionRegions) # results["avg_method"] = {"rmse": rmse} print "closeness is {} and the final result is:".format(_c) for method_name, vs in results.items(): print method_name, ":" for _m, _v in vs.items(): print " ", _m, _v print "" print "all finish" for _p, _rs in all_results.items(): print "closeness is {} and the final result is:".format(_p) for method_name, vs in _rs.items(): print method_name, ":" for _m, _v in vs.items(): print " ", _m, _v print "" d = {} for _method_name in model_methods: d[_method_name] = {} d["avg_method"] = {} for _p, _rs in all_results.items(): for method_name, vs in _rs.items(): for _m, _v in vs.items(): if _m == "rmse": d[method_name][_p] = _v clossesness_df = pd.DataFrame(d) clossesness_df.to_csv("./result/clossness_rmse.csv", float_format="%.5f")
def main(): # load data print("loading data...") ts = time.time() if is_mmn: fname = os.path.join(Paramater.DATAPATH, 'CACHE', 'TaxiBJ_C{}_P{}_T{}_{}_mmn.h5'.format(len_closeness, len_period, len_trend, "External" if hasExternal else "noExternal")) else: fname = os.path.join(Paramater.DATAPATH, 'CACHE', 'TaxiBJ_C{}_P{}_T{}_{}.h5'.format(len_closeness, len_period, len_trend, "External" if hasExternal else "noExternal")) f2name = fname.replace(".h5", "_cell.h5") if CACHEDATA and os.path.exists(f2name): print f2name X_train, Y_train, X_test, Y_test, mmn, external_dim, timestamp_train, timestamp_test, noConditionRegions, x_num, y_num, z_num = read_cache( f2name, is_mmn) print("load %s successfully" % f2name) else: if os.path.exists(fname) and CACHEDATA: X_train, Y_train, X_test, Y_test, mmn, external_dim, timestamp_train, timestamp_test, noConditionRegions, x_num, y_num, z_num = read_cache( fname, is_mmn) print("load %s successfully" % fname) else: datapaths = [Paramater.DATAPATH + "48_48_20_LinearInterpolationFixed_condition"] noConditionRegionsPath = Paramater.PROJECTPATH + "data/48_48_20_noSpeedRegion_0.05" X_train, Y_train, X_test, Y_test, mmn, external_dim, timestamp_train, timestamp_test, noConditionRegions, x_num, y_num, z_num = Data.loadDataFromRaw( paths=datapaths, noSpeedRegionPath=noConditionRegionsPath, nb_flow=nb_flow, len_closeness=len_closeness, len_period=len_period, len_trend=len_trend , len_test=len_test, maxMinNormalization=is_mmn, preprocess_name='preprocessing.pkl', meta_data=hasExternal, meteorol_data=hasExternal, holiday_data=hasExternal) if CACHEDATA: cache(fname, X_train, Y_train, X_test, Y_test, external_dim, timestamp_train, timestamp_test, noConditionRegions, is_mmn, x_num, y_num, z_num) X_train, Y_train = Data.transformMatrixToCell(X_train, Y_train, noConditionRegions, hasExternal) X_test, Y_test = Data.transformMatrixToCell(X_test, Y_test, noConditionRegions, hasExternal) if CACHEDATA: cache(f2name, X_train, Y_train, X_test, Y_test, external_dim, timestamp_train, timestamp_test, list(noConditionRegions), is_mmn, x_num, y_num, z_num) # grid cv if grid_cv: max_depth = [None, 5, 10, 15] min_samples_split = [2, 4, 6] min_samples_leaf = [1, 2, 3] criterion = ["gini", "entropy"] param_grid = dict(max_depth=max_depth, min_samples_split=min_samples_split, min_samples_leaf=min_samples_leaf, criterion=criterion) grid = GridSearchCV(estimator=DecisionTreeClassifier(random_state=random_state), scoring="accuracy", param_grid=param_grid, n_jobs=-1, verbose=1) grid.refit = False grid_result = grid.fit(X_train, Y_train) print("Best: %f using %s" % (grid_result.best_score_, grid_result.best_params_)) max_depth = grid_result.best_params_['max_depth'] min_samples_split = grid_result.best_params_['min_samples_split'] min_samples_leaf = grid_result.best_params_['min_samples_leaf'] criterion = grid_result.best_params_["criterion"] else: max_depth = 10 min_samples_split = 4 min_samples_leaf = 1 criterion = "gini" classfier = DecisionTreeClassifier(criterion=criterion, max_depth=max_depth, min_samples_leaf=min_samples_leaf, min_samples_split=min_samples_split, random_state=random_state) print "DT train ing.." classfier.fit(X_train, Y_train) print "train finish" score = classfier.score(X_test, Y_test) print score predict = classfier.predict(X_test) predict = Data.transformCellToMatrix(predict, Data.getMatrixSize(predict.shape[0], x_num, y_num, z_num, len(noConditionRegions)), x_num, y_num, z_num, noConditionRegions) Y_test = Data.transformCellToMatrix(Y_test, Data.getMatrixSize(Y_test.shape[0], x_num, y_num, z_num, len(noConditionRegions)), x_num, y_num, z_num, noConditionRegions) print("RMSE:", Metric.RMSE(predict, Y_test, noConditionRegions)) print("accuracy", Metric.accuracy(predict, Y_test, noConditionRegions))
def main(): # load data print("loading data...") ts = time.time() if is_mmn: fname = os.path.join( Paramater.DATAPATH, 'CACHE', 'TaxiBJ_C{}_P{}_T{}_{}_mmn.h5'.format( len_closeness, len_period, len_trend, "External" if hasExternal else "noExternal")) else: fname = os.path.join( Paramater.DATAPATH, 'CACHE', 'TaxiBJ_C{}_P{}_T{}_{}.h5'.format( len_closeness, len_period, len_trend, "External" if hasExternal else "noExternal")) f2name = fname.replace(".h5", "_cell.h5") if CACHEDATA and os.path.exists(f2name): print f2name X_train, Y_train, X_test, Y_test, mmn, external_dim, timestamp_train, timestamp_test, noConditionRegions, x_num, y_num, z_num = read_cache( f2name, is_mmn) print("load %s successfully" % f2name) else: if os.path.exists(fname) and CACHEDATA: X_train, Y_train, X_test, Y_test, mmn, external_dim, timestamp_train, timestamp_test, noConditionRegions, x_num, y_num, z_num = read_cache( fname, is_mmn) print("load %s successfully" % fname) else: datapaths = [ Paramater.DATAPATH + "48_48_20_LinearInterpolationFixed_condition" ] noConditionRegionsPath = Paramater.PROJECTPATH + "data/48_48_20_noSpeedRegion_0.05" X_train, Y_train, X_test, Y_test, mmn, external_dim, timestamp_train, timestamp_test, noConditionRegions, x_num, y_num, z_num = Data.loadDataFromRaw( paths=datapaths, noSpeedRegionPath=noConditionRegionsPath, nb_flow=nb_flow, len_closeness=len_closeness, len_period=len_period, len_trend=len_trend, len_test=len_test, maxMinNormalization=is_mmn, preprocess_name='preprocessing.pkl', meta_data=hasExternal, meteorol_data=hasExternal, holiday_data=hasExternal) if CACHEDATA: cache(fname, X_train, Y_train, X_test, Y_test, external_dim, timestamp_train, timestamp_test, noConditionRegions, is_mmn, x_num, y_num, z_num) X_train, Y_train = Data.transformMatrixToCell(X_train, Y_train, noConditionRegions, hasExternal) X_test, Y_test = Data.transformMatrixToCell(X_test, Y_test, noConditionRegions, hasExternal) if CACHEDATA: cache(f2name, X_train, Y_train, X_test, Y_test, external_dim, timestamp_train, timestamp_test, list(noConditionRegions), is_mmn, x_num, y_num, z_num) print "baseline start train ing.." bl = BaseLine(maxC=3, maxD=1, maxW=1, minSupport=10, minConfidence=0.9) bl.fit(X_train, Y_train, len_closeness, len_period, len_trend) print "baseline train finish" predict = bl.predict(X_test) predict = Data.transformCellToMatrix( predict, Data.getMatrixSize(predict.shape[0], x_num, y_num, z_num, len(noConditionRegions)), x_num, y_num, z_num, noConditionRegions) Y_test = Data.transformCellToMatrix( Y_test, Data.getMatrixSize(Y_test.shape[0], x_num, y_num, z_num, len(noConditionRegions)), x_num, y_num, z_num, noConditionRegions) print("RMSE:", Metric.RMSE(predict, Y_test, noConditionRegions)) print("accuracy", Metric.accuracy(predict, Y_test, noConditionRegions))
def main(): # load data print("loading data...") ts = time.time() datapath = os.path.join(Paramater.DATAPATH, "2016", "all") if is_mmn: fname = os.path.join( datapath, 'CACHE', 'TaxiBJ_C{}_P{}_T{}_{}_mmn_speed.h5'.format( len_closeness, len_period, len_trend, "External" if hasExternal else "noExternal")) else: fname = os.path.join( datapath, 'CACHE', 'TaxiBJ_C{}_P{}_T{}_{}_speed.h5'.format( len_closeness, len_period, len_trend, "External" if hasExternal else "noExternal")) x_num = y_num = 48 pkl = fname + '.preprocessing_speed.pkl' if os.path.exists(fname) and CACHEDATA: X_train, Y_train, X_test, Y_test, mmn, external_dim, \ timestamp_train, timestamp_test, noConditionRegions, x_num, y_num, z_num = read_cache(fname, is_mmn, pkl) print("load %s successfully" % fname) else: datapaths = [os.path.join(datapath, "48_48_20_MaxSpeedFillingFixed_5")] noConditionRegionsPath = os.path.join(datapath, "48_48_20_noSpeedRegion_0.05") X_train, Y_train, X_test, Y_test, mmn, external_dim, timestamp_train, timestamp_test, noConditionRegions, x_num, y_num, z_num = Data.loadDataFromRaw( paths=datapaths, noSpeedRegionPath=noConditionRegionsPath, nb_flow=nb_flow, len_closeness=len_closeness, len_period=len_period, len_trend=len_trend, len_test=len_test, maxMinNormalization=is_mmn, preprocess_name=pkl, meta_data=hasExternal, meteorol_data=hasExternal, holiday_data=hasExternal, isComplete=False) if CACHEDATA: cache(fname, X_train, Y_train, X_test, Y_test, external_dim, timestamp_train, timestamp_test, noConditionRegions, is_mmn, x_num, y_num, Paramater.Z_NUM) # print("\n days (test): ", [v[:8] for v in timestamp_test[0::72]]) print("\nelapsed time (loading data): %.3f seconds\n" % (time.time() - ts)) print('=' * 10) print("compiling model...") print( "**at the first time, it takes a few minites to compile if you use [Theano] as the backend**" ) ts = time.time() print(X_train) print "start build model" # input = Input(shape=(nb_flow * len_closeness, x_num, y_num)) # # Conv1 # conv1 = Convolution2D( # nb_filter=64, nb_row=3, nb_col=3, border_mode="same")(input) # # [nb_residual_unit] Residual Units # residual_output = ResUnits(_residual_unit, nb_filter=64, # repetations=nb_residual_unit)(conv1) # # Conv2 # activation = Activation('relu')(residual_output) # conv2 = Convolution2D( # nb_filter=nb_flow, nb_row=3, nb_col=3, border_mode="same")(activation) # main_output = Activation('tanh')(conv2) input = Input(shape=(nb_flow * len_closeness, x_num, y_num)) # conv1 = Convolution2D(nb_filter=64, nb_row=3, nb_col=3, border_mode="same")(input) # act1 = Activation("relu")(conv1) reshape = Reshape((len_closeness, nb_flow, x_num, y_num))(input) convLSTM = ConvLSTM2D(nb_filter=32, nb_row=3, nb_col=3, border_mode="same", inner_activation="relu")(reshape) act2 = Activation("relu")(convLSTM) conv2 = Convolution2D(nb_filter=nb_flow, nb_row=3, nb_col=3, border_mode="same")(act2) main_output = Activation('tanh')(conv2) model = Model(input=input, output=main_output) adam = Adam(lr=lr) model.compile(loss='mse', optimizer=adam, metrics=[metrics.rmse]) model.summary() print "finish build model" hyperparams_name = 'testMyModel_speed.c{}.p{}.t{}.resunit{}.lr{}.{}.{}'.format( len_closeness, len_period, len_trend, nb_residual_unit, lr, "External" if hasExternal else "noExternal", "MMN" if is_mmn else "noMMN") fname_param = os.path.join(path_model, '{}.best.h5'.format(hyperparams_name)) early_stopping = EarlyStopping(monitor='val_rmse', patience=2, mode='min') model_checkpoint = ModelCheckpoint(fname_param, monitor='val_rmse', verbose=0, save_best_only=True, mode='min') print("\nelapsed time (compiling model): %.3f seconds\n" % (time.time() - ts)) print('=' * 10) print("training model...") ts = time.time() history = model.fit(X_train, Y_train, nb_epoch=nb_epoch, batch_size=batch_size, validation_split=0.1, callbacks=[early_stopping, model_checkpoint], verbose=1) model.save_weights(os.path.join(path_model, '{}.h5'.format(hyperparams_name)), overwrite=True) pickle.dump((history.history), open( os.path.join(path_result, '{}.history.pkl'.format(hyperparams_name)), 'wb')) print("\nelapsed time (training): %.3f seconds\n" % (time.time() - ts)) print('=' * 10) print('evaluating using the model that has the best loss on the valid set') ts = time.time() model.load_weights(fname_param) score = model.evaluate(X_train, Y_train, batch_size=Y_train.shape[0] // 48, verbose=0) if is_mmn: print('Train score: %.6f rmse (norm): %.6f rmse (real): %.6f' % (score[0], score[1], score[1] * (mmn._max - mmn._min) / 2.)) else: print('Train score: %.6f rmse (real): %.6f' % (score[0], score[1])) score = model.evaluate(X_test, Y_test, batch_size=Y_test.shape[0], verbose=0) if is_mmn: print('Test score: %.6f rmse (norm): %.6f rmse (real): %.6f' % (score[0], score[1], score[1] * (mmn._max - mmn._min) / 2.)) else: print('Test score: %.6f rmse (real): %.6f' % (score[0], score[1])) if not is_mmn: predict = model.predict(X_test) else: predict = mmn.inverse_transform(model.predict(X_test)) Y_test = mmn.inverse_transform(Y_test) print("predict", predict) print("test", Y_test) print("RMSE:", Metric.RMSE(predict, Y_test, noConditionRegions)) # print("accuracy", Metric.accuracy(predict, Y_test, noConditionRegions)) print("\nelapsed time (eval): %.3f seconds\n" % (time.time() - ts)) exit(1)
def main(): # load data print("loading data...") datapath = os.path.join(Paramater.DATAPATH, "2016", "all") ts = time.time() if is_mmn: fname = os.path.join( datapath, 'CACHE', 'TaxiBJ_C{}_P{}_T{}_{}_mmn_speed.h5'.format( len_closeness, len_period, len_trend, "External" if hasExternal else "noExternal")) else: fname = os.path.join( datapath, 'CACHE', 'TaxiBJ_C{}_P{}_T{}_{}_speed.h5'.format( len_closeness, len_period, len_trend, "External" if hasExternal else "noExternal")) f2name = fname.replace(".h5", "_cell.h5") pkl_fname = fname + '.preprocessing_speed.pkl' if CACHEDATA and os.path.exists(f2name): # print f2name print("load %s successfully" % f2name) X_train, Y_train, X_test, Y_test, mmn, external_dim, timestamp_train, timestamp_test, noConditionRegions, x_num, y_num, z_num = read_cache( f2name, is_mmn, pkl_fname) else: if os.path.exists(fname) and CACHEDATA: X_train, Y_train, X_test, Y_test, mmn, external_dim, timestamp_train, timestamp_test, noConditionRegions, x_num, y_num, z_num = read_cache( fname, is_mmn, pkl_fname) print("load %s successfully" % fname) else: datapaths = [ os.path.join(datapath, "48_48_20_MaxSpeedFillingFixed_5") ] noConditionRegionsPath = os.path.join( datapath, "48_48_20_noSpeedRegion_0.05") X_train, Y_train, X_test, Y_test, mmn, external_dim, timestamp_train, timestamp_test, noConditionRegions, x_num, y_num, z_num = Data.loadDataFromRaw( paths=datapaths, noSpeedRegionPath=noConditionRegionsPath, nb_flow=nb_flow, len_closeness=len_closeness, len_period=len_period, len_trend=len_trend, len_test=len_test, maxMinNormalization=is_mmn, preprocess_name=pkl_fname, meta_data=hasExternal, meteorol_data=hasExternal, holiday_data=hasExternal, isComplete=False) if CACHEDATA: cache(fname, X_train, Y_train, X_test, Y_test, external_dim, timestamp_train, timestamp_test, noConditionRegions, is_mmn, x_num, y_num, z_num) X_train, Y_train = Data.transformMatrixToCell(X_train, Y_train, noConditionRegions, hasExternal) X_test, Y_test = Data.transformMatrixToCell(X_test, Y_test, noConditionRegions, hasExternal) if CACHEDATA: cache(f2name, X_train, Y_train, X_test, Y_test, external_dim, timestamp_train, timestamp_test, list(noConditionRegions), is_mmn, x_num, y_num, z_num) # print "X_train", X_train # print "Y_train", Y_train # print "X_test", X_test # print "Y_test", Y_test # grid cv if grid_cv: max_depth = [None, 5, 10, 15] min_samples_split = [2, 4, 6] min_samples_leaf = [1, 2, 3] criterion = ["mse", "mae"] param_grid = dict(max_depth=max_depth, min_samples_split=min_samples_split, min_samples_leaf=min_samples_leaf, criterion=criterion) grid = GridSearchCV( estimator=DecisionTreeRegressor(random_state=random_state), scoring="neg_mean_squared_error", param_grid=param_grid, n_jobs=-1, verbose=1) grid.refit = False grid_result = grid.fit(X_train, Y_train) print("Best: %f using %s" % (grid_result.best_score_, grid_result.best_params_)) max_depth = grid_result.best_params_['max_depth'] min_samples_split = grid_result.best_params_['min_samples_split'] min_samples_leaf = grid_result.best_params_['min_samples_leaf'] criterion = grid_result.best_params_["criterion"] else: max_depth = 10 min_samples_split = 4 min_samples_leaf = 1 criterion = "mse" classfier = DecisionTreeRegressor(criterion=criterion, max_depth=max_depth, min_samples_leaf=min_samples_leaf, min_samples_split=min_samples_split, random_state=random_state) print "DT train ing.." classfier.fit(X_train, Y_train) print "train finish" score = classfier.score(X_test, Y_test) print score predict = classfier.predict(X_test) # print "p", predict, x_num, y_num, z_num, noConditionRegions predict = Data.transformCellToMatrix( predict, Data.getMatrixSize(predict.shape[0], x_num, y_num, z_num, len(noConditionRegions)), x_num, y_num, z_num, noConditionRegions, Y_test.min()) Y_test = Data.transformCellToMatrix( Y_test, Data.getMatrixSize(Y_test.shape[0], x_num, y_num, z_num, len(noConditionRegions)), x_num, y_num, z_num, noConditionRegions, Y_test.min()) # print predict # print Y_test if is_mmn: mmn.printMinMax() predict = mmn.inverse_transform(predict) Y_test = mmn.inverse_transform(Y_test) print "predict", predict print "Y_test", Y_test print("RMSE:", Metric.RMSE(predict, Y_test, noConditionRegions))