def joint_learning(): X2 = fun2('number_age_col85tran_v2.csv') print(X2.shape) y = fun2('number_category.csv') nb_x_train = fun3(f_in='nb_x_train_%d.dat' % (data_version)) nb_x_test = fun3(f_in='nb_x_test_%d.dat' % (data_version)) X_train, X_test, y_train, y_test, = get_train_test_data( X2, y, nb_x_train, nb_x_test, ) temp_1_10 = fun2('front_1_10_temp.csv') temp_2_11 = fun2('front_2_11_temp.csv') temp_1_10_train, temp_1_10_test, temp_2_11_train, temp_2_11_test = get_train_test_data( temp_1_10, temp_2_11, nb_x_train, nb_x_test) temp_1_10_train = reshape_dataset(temp_1_10_train) temp_2_11_train = reshape_dataset(temp_2_11_train) temp_1_10_test = reshape_dataset(temp_1_10_test) temp_2_11_test = reshape_dataset(temp_2_11_test) print(temp_1_10_train.shape) # x_train represents list temperature # x2_train represents test parameter x_train = X_train[:, in_file_length + 1:] x2_train = X_train[:, 0:in_file_length] x_test = X_test[:, in_file_length + 1:] x2_test = X_test[:, 0:in_file_length] print((sum(y_test) - len(y_test)) / len(y_test)) y_train = category_to_target(y_train) y_test = category_to_target(y_test) model = joint_learning_model() mse_list = [] acc_list = [] for epoch in range(nb_epochs): print('Train...') model.fit([temp_1_10_train, x_train, x2_train], [temp_2_11_train, y_train], batch_size=batch_size, nb_epoch=1, shuffle=True) loss, mse, acc = model.evaluate([temp_1_10_test, x_test, x2_test], [temp_2_11_test, y_test], batch_size=batch_size) print('loss:', loss) print('mse:', mse) print('acc:', acc)
def logistic_regression_temprature_prediction_epoch_lt12(data_version,penalty ='l1'): X = diff_length_csv('temperature_lt12.csv') X = pad_sequences(X, maxlen=50, padding='post', truncating='post', value=0, dtype=float) X = np.array(X, dtype=float) # nb_x_train = read_case_nb(f_in='nb_x_train_%d.dat'%(data_version)) # nb_x_test = read_case_nb(f_in='nb_x_test_%d.dat'%(data_version)) nb_x_train =read_case_nb(f_in ='nb_train_lt12_cv%d.dat'%(data_version)) nb_x_test =read_case_nb(f_in ='nb_test_lt12_cv%d.dat'%(data_version)) y = same_length_csv('number_category_lt12.csv') X_train, X_test, y_train, y_test, = get_train_test_data(X, y, nb_x_train, nb_x_test, ) acc_list=[] for i in range(nb_epochs): lr = LogisticRegression(penalty=penalty, fit_intercept=True, max_iter=i, warm_start=True) lr = lr.fit(X_train, y_train, ) score = lr.score(X_test, y_test) acc_list.append(score) plot(range(0, nb_epochs), acc_list, label='temp_acc') acc_list_100_110 = acc_list[99:109] acc_list_200 = acc_list[0:200] print(len(acc_list_200)) acc_list_210 = acc_list[200:] print(acc_list_210) print(len(acc_list_210)) acc_list_sored = sorted(acc_list_200, reverse=True) print(acc_list) title('temp_study_%d' % (data_version)) print('temp_study_%d\n' % (data_version)) print("top-10 mean: %.3f" % np.mean(np.array(acc_list_sored[:10]))) print("top-50 mean: %.3f" % np.mean(np.array(acc_list_sored[:50]))) # print("last-10 mean: %.3f" % np.mean(np.array(acc_list_210))) print("acc_100-110 mean: %.3f" % np.mean(np.array(acc_list_100_110)))
def temp_mutilstm_study(model): X = diff_length_csv('temperature.csv') X = pad_sequences(X, maxlen=time_steps, padding='post', truncating='post', value=padding_value, dtype=float) print(X.shape) y = same_length_csv('number_category.csv') nb_x_train = read_case_nb(f_in='nb_x_train_%d.dat' % (data_version)) nb_x_test = read_case_nb(f_in='nb_x_test_%d.dat' % (data_version)) x_train, x_test, y_train, y_test, = get_train_test_data( X, y, nb_x_train, nb_x_test, ) y_train = category_to_target(y_train) y_test = category_to_target(y_test) x_train = step_change_5(x_train) x_test = step_change_5(x_test) model = model # test acc_list = [] train_score_list = [] test_score_list = [] for epoch in range(nb_epochs): print('Train...') model.fit(x_train, y_train, batch_size=batch_size, epochs=1, shuffle=True, verbose=1) train_score, train_acc = model.evaluate(x_train, y_train, batch_size=batch_size) score, acc = model.evaluate(x_test, y_test, batch_size=batch_size) acc_list.append(acc) train_score_list.append(train_score) test_score_list.append(score) print('Test score:', score) print('Test accuracy:', acc) # subplot(2,3,data_version) plot(range(0, nb_epochs), acc_list, label='temp_acc') plot(range(0, nb_epochs), train_score_list, label='train_loss') plot(range(0, nb_epochs), test_score_list, label='test_loss') acc_list_100_110 = acc_list[99:109] acc_list = sorted(acc_list, reverse=True) print(acc_list) title('temp_study_%d' % (data_version)) print('temp_study_%d\n' % (data_version)) print("top-10 mean: %.3f" % np.mean(np.array(acc_list[:10]))) print("top-50 mean: %.3f" % np.mean(np.array(acc_list[:50]))) print("acc_100-110 mean: %.3f" % np.mean(np.array(acc_list_100_110)))
def para_prediction(model, data_version): X = same_length_csv('cap_feature_2.csv') y = same_length_csv('number_category.csv') nb_x_train = read_case_nb(f_in='nb_x_train_%d.dat' % data_version) nb_x_test = read_case_nb(f_in='nb_x_test_%d.dat' % data_version) x_train, x_test, y_train, y_test = get_train_test_data( X, y, nb_x_train, nb_x_test, ) probability_test = (sum(y_test) - len(y_test)) / len(y_test) print('probability_test:', probability_test) y_train = category_to_target(y_train) y_test = category_to_target(y_test) model = model print('Train...') acc_list = [] train_loss_list = [] test_loss_list = [] for epoch in range(nb_epochs): # print('Train...') model.fit(x_train, y_train, batch_size=batch_size, nb_epoch=1, validation_split=0.05) train_score, train_acc = model.evaluate(x_train, y_train, batch_size=batch_size) score, acc = model.evaluate(x_test, y_test, batch_size=batch_size) acc_list.append(acc) train_loss_list.append(train_score) test_loss_list.append(score) print('Test score:', score) print('Test accuracy:', acc) results = "" for i, acc in enumerate(acc_list): if acc > 0.72: if acc > 0.74: results += '\033[1;31m' + str(i + 1) + ':' + str( acc) + '\033[0m' + '; ' else: results += '\033[1;34m' + str(i + 1) + ':' + str( acc) + '\033[0m' + '; ' else: results += str(i + 1) + ':' + str(acc) + '; ' plot(range(0, nb_epochs), acc_list, label='feature') plot(range(0, nb_epochs), train_loss_list, label='train_loss') plot(range(0, nb_epochs), test_loss_list, label='test_loss') acc_list = sorted(acc_list, reverse=True) print(acc_list) print("top-10 mean: %.3f" % np.mean(np.array(acc_list[:10]))) print("top-50 mean: %.3f" % np.mean(np.array(acc_list[:50])))
def gbdt_feature_prediction_lt12(data_version): X2 = same_length_csv('cap_feature_lt12.csv') nb_x_train = read_case_nb(f_in='nb_train_lt12_cv%d.dat' % (data_version)) nb_x_test = read_case_nb(f_in='nb_test_lt12_cv%d.dat' % (data_version)) y = same_length_csv('number_category_lt12.csv') X_train, X_test, y_train, y_test, = get_train_test_data(X2, y, nb_x_train, nb_x_test, ) lr = GradientBoostingClassifier(n_estimators=100,learning_rate=0.01,max_depth=3) lr = lr.fit(X_train, y_train, ) score = lr.score(X_test, y_test) print(score) return score
def logistic_regression_feature_prediction_lt12(data_version,penalty ='l1'): X2 = same_length_csv('cap_feature_lt12.csv') nb_x_train =read_case_nb(f_in ='nb_train_lt12_cv%d.dat'%(data_version)) nb_x_test =read_case_nb(f_in ='nb_test_lt12_cv%d.dat'%(data_version)) y = same_length_csv('number_category_lt12.csv') X_train, X_test, y_train, y_test, = get_train_test_data(X2, y, nb_x_train, nb_x_test, ) lr = LogisticRegression(penalty=penalty, fit_intercept=True, max_iter=200, warm_start=True,tol=0.0001) lr = lr.fit(X_train, y_train, ) # print(lr.coef_) score = lr.score(X_test, y_test) print(score) return score
def temp_lstm(f_in='5_day_50_check.csv'): if f_in[-1] == 't': X = diff_length_dat(f_in) else: X = diff_length_csv(f_in) X = pad_sequences(X, maxlen=maxlen, padding='post', truncating='post', value=0, dtype=float) X = np.array(X, dtype=float) # print(X[0]) y = fun2('number_category.csv') nb_x_train = fun3(f_in='nb_x_train_%d.dat' % (data_version)) nb_x_test = fun3(f_in='nb_x_test_%d.dat' % (data_version)) x_train, x_test, y_train, y_test = get_train_test_data( X, y, nb_x_train, nb_x_test) probability_test = (sum(y_test) - len(y_test)) / len(y_test) print('probability_test:', probability_test) y_train = category_to_target(y_train) y_test = category_to_target(y_test) #get_model model = conv_lstm_model() #evaluation acc_list = [] score, acc = model.evaluate(x_test, y_test, batch_size=batch_size) acc_list.append(acc) print('Train...') for epoch in range(nb_epochs): model.fit(x_train, y_train, batch_size=batch_size, verbose=0, nb_epoch=1, shuffle=True) score, acc = model.evaluate(x_test, y_test, batch_size=batch_size) acc_list.append(acc) print('Test score:', score) print('Test accuracy:', acc) subplot(2, 3, data_version) plot(range(0, nb_epochs + 1), [probability_test for i in range(0, nb_epochs + 1)]) plot(range(0, nb_epochs + 1), acc_list) acc_list = sorted(acc_list, reverse=True) title('temp_pre_conv_dataset_%d' % (data_version)) print("top-K mean: %.3f" % np.mean(np.array(acc_list[:10])))
def gbdt_temprature_prediction_lt12(data_version): X = diff_length_csv('temperature_lt12.csv') X = pad_sequences(X, maxlen=50, padding='post', truncating='post', value=0, dtype=float) X = np.array(X, dtype=float) # nb_x_train = read_case_nb(f_in='nb_x_train_%d.dat'%(data_version)) # nb_x_test = read_case_nb(f_in='nb_x_test_%d.dat'%(data_version)) nb_x_train =read_case_nb(f_in ='nb_train_lt12_cv%d.dat'%(data_version)) nb_x_test =read_case_nb(f_in ='nb_test_lt12_cv%d.dat'%(data_version)) y = same_length_csv('number_category_lt12.csv') X_train, X_test, y_train, y_test, = get_train_test_data(X, y, nb_x_train, nb_x_test, ) lr = GradientBoostingClassifier(n_estimators=100,learning_rate=0.01,max_depth=3) lr = lr.fit(X_train, y_train, ) score = lr.score(X_test, y_test) print(score) return score
def logistic_regression_temperature_prediction_lt12(data_version,penalty ='l1'): X = diff_length_csv('temperature_lt12.csv') X = pad_sequences(X, maxlen=50, padding='post', truncating='post', value=0, dtype=float) X = np.array(X, dtype=float) # nb_x_train = read_case_nb(f_in='nb_x_train_%d.dat'%(data_version)) # nb_x_test = read_case_nb(f_in='nb_x_test_%d.dat'%(data_version)) nb_x_train =read_case_nb(f_in ='nb_train_lt12_cv%d.dat'%(data_version)) nb_x_test =read_case_nb(f_in ='nb_test_lt12_cv%d.dat'%(data_version)) y = same_length_csv('number_category_lt12.csv') X_train, X_test, y_train, y_test, = get_train_test_data(X, y, nb_x_train, nb_x_test, ) lr = LogisticRegression(penalty=penalty, fit_intercept=True, max_iter=200, warm_start=True) lr = lr.fit(X_train, y_train, ) # print(lr.coef_) score = lr.score(X_test, y_test) print(score) return score
def para_prediction(f_x='number_age_col85tran_v2.csv',in_file_length=87): X = fun2(f_x) y = fun2('number_category.csv') nb_x_train = fun3(f_in='nb_x_train_%d.dat' % data_version) nb_x_test = fun3(f_in='nb_x_test_%d.dat' % data_version) x_train, x_test, y_train, y_test = get_train_test_data(X, y, nb_x_train, nb_x_test, ) probability_test = (sum(y_test) - len(y_test)) / len(y_test) print('probability_test:', probability_test) y_train = category_to_target(y_train) y_test = category_to_target(y_test) # get_model model = get_model(in_file_length) print('Train...') acc_list = [] score, acc = model.evaluate(x_test, y_test, batch_size=batch_size) acc_list.append(acc) for epoch in range(nb_epochs): # print('Train...') model.fit(x_train, y_train, batch_size=batch_size, nb_epoch=1, validation_split=0.05) score, acc = model.evaluate(x_test, y_test, batch_size=batch_size) acc_list.append(acc) print('Test score:', score) print('Test accuracy:', acc) results = "" for i, acc in enumerate(acc_list): if acc > 0.72: if acc > 0.74: results += '\033[1;31m' + str(i + 1) + ':' + str(acc) + '\033[0m' + '; ' else: results += '\033[1;34m' + str(i + 1) + ':' + str(acc) + '\033[0m' + '; ' else: results += str(i + 1) + ':' + str(acc) + '; ' print(results) subplot(2,3,data_version) title('para_prediction_dataset_%d'%(data_version)) plot(range(0, nb_epochs + 1), acc_list,label='length:%d'%(in_file_length)) acc_list = sorted(acc_list, reverse=True) print(acc_list) print("top-K mean: %.3f" % np.mean(np.array(acc_list[:10])))
if __name__ == '__main__': model = merge_predict_next_temp_model() temp_x, temp_y = get_train_fun() # X = fun2('number_age_col85tran_v2.csv') # X = fun2('number_age_col71tran_v2.csv') X = fun2('cap_feature_2.csv') print(X.shape) y = fun2('number_category.csv') nb_x_train = fun3(f_in='nb_x_train_%d.dat' % (data_version)) nb_x_test = fun3(f_in='nb_x_test_%d.dat' % (data_version)) X_train, X_test, y_train, y_test, = get_train_test_data( X, y, nb_x_train, nb_x_test, ) print(X_train.shape) y_train = category_to_target(y_train) y_test = category_to_target(y_test) temp_1_10_train, temp_1_10_test, temp_2_11_train, temp_2_11_test = get_train_test_data( temp_x, temp_y, nb_x_train, nb_x_test) temp_1_10_train = reshape_dataset(np.array(temp_1_10_train)) temp_2_11_train = reshape_dataset(temp_2_11_train) temp_1_10_test = reshape_dataset(temp_1_10_test) temp_2_11_test = reshape_dataset(temp_2_11_test) print(temp_1_10_train.shape)
def gbdt_lstm(): temp_x, temp_y = get_train_fun() X = same_length_csv('cap_feature.csv') print(X.shape) y = same_length_csv('number_category.csv') # nb_x_train = read_case_nb(f_in='nb_x_train_%d.dat'%(data_version)) # nb_x_test = read_case_nb(f_in='nb_x_test_%d.dat'%(data_version)) nb_x_train = read_case_nb(f_in='nb_train_cv%d.dat' % (data_version)) nb_x_test = read_case_nb(f_in='nb_test_cv%d.dat' % (data_version)) X_train, X_test, y_train, y_test, = get_train_test_data( X, y, nb_x_train, nb_x_test, ) # y_train = category_to_target(y_train) # y_test = category_to_target(y_test) temp_1_10_train, temp_1_10_test, temp_2_11_train, temp_2_11_test = get_train_test_data( temp_x, temp_y, nb_x_train, nb_x_test) temp_1_10_train = reshape_dataset(np.array(temp_1_10_train)) temp_2_11_train = reshape_dataset(temp_2_11_train) temp_1_10_test = reshape_dataset(temp_1_10_test) temp_2_11_test = reshape_dataset(temp_2_11_test) input_temp = Input(shape=(10, 1), name='input_temp') lstm_temp = LSTM(16, return_sequences=True, name='lstm_temp')(input_temp) lstm_temp = Dropout(0.25)(lstm_temp) dense_temp = Dense(1, name='dense_temp', activation='relu')(lstm_temp) model = Model(inputs=input_temp, outputs=dense_temp) model.compile( loss={'dense_temp': 'mse'}, optimizer=Adam(lr=0.0003, clipnorm=1.), metrics={'dense_temp': 'mse'}, ) model.summary() intermediate_layer_model = Model( input=model.input, output=model.get_layer('dense_temp').output) score_list = [] for epoch in range(nb_epochs): print('Train...') model.fit([temp_1_10_train], [temp_2_11_train], batch_size=batch_size, epochs=1, shuffle=True, verbose=True) train_temp = intermediate_layer_model.predict(temp_1_10_train) test_temp = intermediate_layer_model.predict(temp_1_10_test) train_temp = np.reshape( train_temp, newshape=(train_temp.shape[0], train_temp.shape[1] * train_temp.shape[2])) test_temp = np.reshape( test_temp, newshape=(test_temp.shape[0], test_temp.shape[1] * test_temp.shape[2])) x_epoch_train = np.concatenate([train_temp, X_train], axis=1) x_epoch_test = np.concatenate([test_temp, X_test], axis=1) print(x_epoch_train.shape) lr = GradientBoostingClassifier(n_estimators=100, learning_rate=0.01, max_depth=3) lr = lr.fit( x_epoch_train, y_train, ) score = lr.score(x_epoch_test, y_test) print(score) score_list.append(score) plot(range(0, nb_epochs), score_list, label='temp_acc') acc_list_100_110 = score_list[99:109] acc_list = sorted(score_list, reverse=True) print(acc_list) title('temp_study_%d' % (data_version)) print('temp_study_%d\n' % (data_version)) print("top-10 mean: %.3f" % np.mean(np.array(acc_list[:10]))) print("top-50 mean: %.3f" % np.mean(np.array(acc_list[:50]))) print("acc_100-110 mean: %.3f" % np.mean(np.array(acc_list_100_110)))