def logistic_regression_prediction_epoch_lt12(data_version,penalty='l1'): X = diff_length_csv('temperature_lt12.csv') X = pad_sequences(X, maxlen=50, padding='post', truncating='post', value=0, dtype=float) X = np.array(X, dtype=float) X2 = same_length_csv('cap_feature_lt12.csv') # nb_x_train = read_case_nb(f_in='nb_x_train_%d.dat'%(data_version)) # nb_x_test = read_case_nb(f_in='nb_x_test_%d.dat'%(data_version)) nb_x_train =read_case_nb(f_in ='nb_train_lt12_cv%d.dat'%(data_version)) nb_x_test =read_case_nb(f_in ='nb_test_lt12_cv%d.dat'%(data_version)) y = same_length_csv('number_category_lt12.csv') X_con = np.concatenate((X2, X), axis=1) X_train, X_test, y_train, y_test, = get_train_test_data(X_con, y, nb_x_train, nb_x_test, ) acc_list = [] for i in range(nb_epochs): lr = LogisticRegression(penalty=penalty, fit_intercept=True, max_iter=i, warm_start=True) lr = lr.fit(X_train, y_train, ) score = lr.score(X_test, y_test) acc_list.append(score) plot(range(0, nb_epochs), acc_list, label='temp_acc') acc_list_100_110 = acc_list[99:109] acc_list_200 = acc_list[0:200] print(len(acc_list_200)) acc_list_210 = acc_list[200:] print(acc_list_210) print(len(acc_list_210)) acc_list_sored = sorted(acc_list_200, reverse=True) print(acc_list) title('temp_study_%d' % (data_version)) print('temp_study_%d\n' % (data_version)) print("top-10 mean: %.3f" % np.mean(np.array(acc_list_sored[:10]))) print("top-50 mean: %.3f" % np.mean(np.array(acc_list_sored[:50]))) # print("last-10 mean: %.3f" % np.mean(np.array(acc_list_210))) print("acc_100-110 mean: %.3f" % np.mean(np.array(acc_list_100_110)))
def para_prediction(model, data_version): X = same_length_csv('cap_feature_2.csv') y = same_length_csv('number_category.csv') nb_x_train = read_case_nb(f_in='nb_x_train_%d.dat' % data_version) nb_x_test = read_case_nb(f_in='nb_x_test_%d.dat' % data_version) x_train, x_test, y_train, y_test = get_train_test_data( X, y, nb_x_train, nb_x_test, ) probability_test = (sum(y_test) - len(y_test)) / len(y_test) print('probability_test:', probability_test) y_train = category_to_target(y_train) y_test = category_to_target(y_test) model = model print('Train...') acc_list = [] train_loss_list = [] test_loss_list = [] for epoch in range(nb_epochs): # print('Train...') model.fit(x_train, y_train, batch_size=batch_size, nb_epoch=1, validation_split=0.05) train_score, train_acc = model.evaluate(x_train, y_train, batch_size=batch_size) score, acc = model.evaluate(x_test, y_test, batch_size=batch_size) acc_list.append(acc) train_loss_list.append(train_score) test_loss_list.append(score) print('Test score:', score) print('Test accuracy:', acc) results = "" for i, acc in enumerate(acc_list): if acc > 0.72: if acc > 0.74: results += '\033[1;31m' + str(i + 1) + ':' + str( acc) + '\033[0m' + '; ' else: results += '\033[1;34m' + str(i + 1) + ':' + str( acc) + '\033[0m' + '; ' else: results += str(i + 1) + ':' + str(acc) + '; ' plot(range(0, nb_epochs), acc_list, label='feature') plot(range(0, nb_epochs), train_loss_list, label='train_loss') plot(range(0, nb_epochs), test_loss_list, label='test_loss') acc_list = sorted(acc_list, reverse=True) print(acc_list) print("top-10 mean: %.3f" % np.mean(np.array(acc_list[:10]))) print("top-50 mean: %.3f" % np.mean(np.array(acc_list[:50])))
def gbdt_feature_prediction_lt12(data_version): X2 = same_length_csv('cap_feature_lt12.csv') nb_x_train = read_case_nb(f_in='nb_train_lt12_cv%d.dat' % (data_version)) nb_x_test = read_case_nb(f_in='nb_test_lt12_cv%d.dat' % (data_version)) y = same_length_csv('number_category_lt12.csv') X_train, X_test, y_train, y_test, = get_train_test_data(X2, y, nb_x_train, nb_x_test, ) lr = GradientBoostingClassifier(n_estimators=100,learning_rate=0.01,max_depth=3) lr = lr.fit(X_train, y_train, ) score = lr.score(X_test, y_test) print(score) return score
def logistic_regression_feature_prediction_lt12(data_version,penalty ='l1'): X2 = same_length_csv('cap_feature_lt12.csv') nb_x_train =read_case_nb(f_in ='nb_train_lt12_cv%d.dat'%(data_version)) nb_x_test =read_case_nb(f_in ='nb_test_lt12_cv%d.dat'%(data_version)) y = same_length_csv('number_category_lt12.csv') X_train, X_test, y_train, y_test, = get_train_test_data(X2, y, nb_x_train, nb_x_test, ) lr = LogisticRegression(penalty=penalty, fit_intercept=True, max_iter=200, warm_start=True,tol=0.0001) lr = lr.fit(X_train, y_train, ) # print(lr.coef_) score = lr.score(X_test, y_test) print(score) return score
def temp_mutilstm_study(model): X = diff_length_csv('temperature.csv') X = pad_sequences(X, maxlen=time_steps, padding='post', truncating='post', value=padding_value, dtype=float) print(X.shape) y = same_length_csv('number_category.csv') nb_x_train = read_case_nb(f_in='nb_x_train_%d.dat' % (data_version)) nb_x_test = read_case_nb(f_in='nb_x_test_%d.dat' % (data_version)) x_train, x_test, y_train, y_test, = get_train_test_data( X, y, nb_x_train, nb_x_test, ) y_train = category_to_target(y_train) y_test = category_to_target(y_test) x_train = step_change_5(x_train) x_test = step_change_5(x_test) model = model # test acc_list = [] train_score_list = [] test_score_list = [] for epoch in range(nb_epochs): print('Train...') model.fit(x_train, y_train, batch_size=batch_size, epochs=1, shuffle=True, verbose=1) train_score, train_acc = model.evaluate(x_train, y_train, batch_size=batch_size) score, acc = model.evaluate(x_test, y_test, batch_size=batch_size) acc_list.append(acc) train_score_list.append(train_score) test_score_list.append(score) print('Test score:', score) print('Test accuracy:', acc) # subplot(2,3,data_version) plot(range(0, nb_epochs), acc_list, label='temp_acc') plot(range(0, nb_epochs), train_score_list, label='train_loss') plot(range(0, nb_epochs), test_score_list, label='test_loss') acc_list_100_110 = acc_list[99:109] acc_list = sorted(acc_list, reverse=True) print(acc_list) title('temp_study_%d' % (data_version)) print('temp_study_%d\n' % (data_version)) print("top-10 mean: %.3f" % np.mean(np.array(acc_list[:10]))) print("top-50 mean: %.3f" % np.mean(np.array(acc_list[:50]))) print("acc_100-110 mean: %.3f" % np.mean(np.array(acc_list_100_110)))
def gbdt_merge_prediction_lt12(data_version): X = diff_length_csv('temperature_lt12.csv') X = pad_sequences(X, maxlen=50, padding='post', truncating='post', value=0, dtype=float) X = np.array(X, dtype=float) X2 = same_length_csv('cap_feature_lt12.csv') # nb_x_train = read_case_nb(f_in='nb_x_train_%d.dat'%(data_version)) # nb_x_test = read_case_nb(f_in='nb_x_test_%d.dat'%(data_version)) nb_x_train =read_case_nb(f_in ='nb_train_lt12_cv%d.dat'%(data_version)) nb_x_test =read_case_nb(f_in ='nb_test_lt12_cv%d.dat'%(data_version)) y = same_length_csv('number_category_lt12.csv') X_con = np.concatenate((X2, X), axis=1) X_train, X_test, y_train, y_test, = get_train_test_data(X_con, y, nb_x_train, nb_x_test, ) lr = GradientBoostingClassifier(n_estimators=100,learning_rate=0.01,max_depth=3) lr = lr.fit(X_train, y_train, ) score = lr.score(X_test, y_test) print(score) return score
def logistic_regression_prediction_lt12(data_version,penalty='l1'): X = diff_length_csv('temperature_lt12.csv') X = pad_sequences(X, maxlen=50, padding='post', truncating='post', value=0, dtype=float) X = np.array(X, dtype=float) X2 = same_length_csv('cap_feature_lt12.csv') # nb_x_train = read_case_nb(f_in='nb_x_train_%d.dat'%(data_version)) # nb_x_test = read_case_nb(f_in='nb_x_test_%d.dat'%(data_version)) nb_x_train =read_case_nb(f_in ='nb_train_lt12_cv%d.dat'%(data_version)) nb_x_test =read_case_nb(f_in ='nb_test_lt12_cv%d.dat'%(data_version)) y = same_length_csv('number_category_lt12.csv') X_con = np.concatenate((X2, X), axis=1) X_train, X_test, y_train, y_test, = get_train_test_data(X_con, y, nb_x_train, nb_x_test, ) lr =LogisticRegression(penalty=penalty,fit_intercept=True,max_iter=200,tol=0.00005) lr =lr.fit(X_train,y_train,) # print(lr.coef_) score =lr.score(X_test,y_test) print(score) return score
def gbdt_lstm(): temp_x, temp_y = get_train_fun() X = same_length_csv('cap_feature.csv') print(X.shape) y = same_length_csv('number_category.csv') # nb_x_train = read_case_nb(f_in='nb_x_train_%d.dat'%(data_version)) # nb_x_test = read_case_nb(f_in='nb_x_test_%d.dat'%(data_version)) nb_x_train = read_case_nb(f_in='nb_train_cv%d.dat' % (data_version)) nb_x_test = read_case_nb(f_in='nb_test_cv%d.dat' % (data_version)) X_train, X_test, y_train, y_test, = get_train_test_data( X, y, nb_x_train, nb_x_test, ) # y_train = category_to_target(y_train) # y_test = category_to_target(y_test) temp_1_10_train, temp_1_10_test, temp_2_11_train, temp_2_11_test = get_train_test_data( temp_x, temp_y, nb_x_train, nb_x_test) temp_1_10_train = reshape_dataset(np.array(temp_1_10_train)) temp_2_11_train = reshape_dataset(temp_2_11_train) temp_1_10_test = reshape_dataset(temp_1_10_test) temp_2_11_test = reshape_dataset(temp_2_11_test) input_temp = Input(shape=(10, 1), name='input_temp') lstm_temp = LSTM(16, return_sequences=True, name='lstm_temp')(input_temp) lstm_temp = Dropout(0.25)(lstm_temp) dense_temp = Dense(1, name='dense_temp', activation='relu')(lstm_temp) model = Model(inputs=input_temp, outputs=dense_temp) model.compile( loss={'dense_temp': 'mse'}, optimizer=Adam(lr=0.0003, clipnorm=1.), metrics={'dense_temp': 'mse'}, ) model.summary() intermediate_layer_model = Model( input=model.input, output=model.get_layer('dense_temp').output) score_list = [] for epoch in range(nb_epochs): print('Train...') model.fit([temp_1_10_train], [temp_2_11_train], batch_size=batch_size, epochs=1, shuffle=True, verbose=True) train_temp = intermediate_layer_model.predict(temp_1_10_train) test_temp = intermediate_layer_model.predict(temp_1_10_test) train_temp = np.reshape( train_temp, newshape=(train_temp.shape[0], train_temp.shape[1] * train_temp.shape[2])) test_temp = np.reshape( test_temp, newshape=(test_temp.shape[0], test_temp.shape[1] * test_temp.shape[2])) x_epoch_train = np.concatenate([train_temp, X_train], axis=1) x_epoch_test = np.concatenate([test_temp, X_test], axis=1) print(x_epoch_train.shape) lr = GradientBoostingClassifier(n_estimators=100, learning_rate=0.01, max_depth=3) lr = lr.fit( x_epoch_train, y_train, ) score = lr.score(x_epoch_test, y_test) print(score) score_list.append(score) plot(range(0, nb_epochs), score_list, label='temp_acc') acc_list_100_110 = score_list[99:109] acc_list = sorted(score_list, reverse=True) print(acc_list) title('temp_study_%d' % (data_version)) print('temp_study_%d\n' % (data_version)) print("top-10 mean: %.3f" % np.mean(np.array(acc_list[:10]))) print("top-50 mean: %.3f" % np.mean(np.array(acc_list[:50]))) print("acc_100-110 mean: %.3f" % np.mean(np.array(acc_list_100_110)))
loss_weights={ 'dense_softmax': 0.1, 'dense_temp': 1 }) model.summary() return model if __name__ == '__main__': # model =merge_predict_next_temp_model() model = new_merge_predict_next_temp_model() temp_x, temp_y = get_train_fun() X = same_length_csv('cap_feature.csv') print(X.shape) y = same_length_csv('number_category.csv') # nb_x_train = read_case_nb(f_in='nb_x_train_%d.dat'%(data_version)) # nb_x_test = read_case_nb(f_in='nb_x_test_%d.dat'%(data_version)) nb_x_train = read_case_nb(f_in='nb_train_cv%d.dat' % (data_version)) nb_x_test = read_case_nb(f_in='nb_test_cv%d.dat' % (data_version)) X_train, X_test, y_train, y_test, = get_train_test_data( X, y, nb_x_train, nb_x_test, ) print(X_train.shape)