def main(): p=Path(__file__).parents[0] directory = os.path.abspath(os.path.join(p,"gemini_ETHUSD_d.csv")) data = Data(directory) train,test = data.split_data(test_size=0.2) numOfDays=20 x_train,x_test,y_train,y_test = data.prepare_data(train,test,numOfDays) model = Model() #hyperparameters tuning epochs = 50 optimizer='adam' loss='mean_squared_error' activation ='tanh' batch_size = 1 neurons = 30 model.LSTM_model(x_train,activation =activation,optimizer=optimizer,loss=loss,neurons=neurons) history = model.train(x_train,y_train,x_test,y_test,epochs=epochs,batch_size=batch_size) targets = test['Close'][numOfDays:] preds = model.predict(x_test).squeeze() print('MAE: ',mean_absolute_error(preds,y_test)) preds = test['Close'].values[:-numOfDays] * (preds + 1) preds = pd.Series(index=targets.index, data=preds) line_plot(targets, preds, 'actual', 'prediction', lw=3) line_plot(history.history['loss'],history.history['val_loss'],'train loss','test loss',lw=3)
def main(params): #=======step 1: get args for model======= args = load_arg() args.learning_rate = params["lr_rate"] args.drop_out = params["dp_out"] args.batch_size = params["bt_size"] args.dist = params["distance"] print ("At distance {}, learning_rate is {}, drop_out is {}, batch_size is {}".\ format(params["distance"], params["lr_rate"], params["dp_out"], params["bt_size"])) #=======step 2: preprocess data========== direc = './data/' # directory of data file csv_file = 'seq_all.csv' dl = DataLoad(direc, csv_file) dl.munge_data(height=11.0, seq_len=args.seq_len, dist=args.dist) basket_center = np.array([5.25, 25.0, 10.0]) dl.center_data(center_cent=basket_center) sum_samples, num_train, num_test = dl.test_valid_data_split(ratio=0.8) print( "--------------------------------------------------------------------") X_train = dl.data['X_train'] y_train = dl.data['y_train'] X_test = dl.data['X_test'] y_test = dl.data['y_test'] #=======step 3: construct model========== tf.reset_default_graph() model = Model(args) if args.model_type == 'LSTM_model': model.LSTM_model() elif args.model_type == 'bidir_LSTM_model': model.bidir_LSTM_model() elif args.model_type == 'CNN_model': model.CNN_model() elif args.model_type == 'Conv_LSTM_model': model.Conv_LSTM_model() elif args.model_type == 'LSTM_MDN_model': model.MDN_model('LSTM') elif args.model_type == 'BLSTM_MDN_model': model.MDN_model('BLSTM') else: print("please choose correct model type") return model.Evaluating() #=======step 4: start training=========== start_time = time.time() train_cost_list = [] test_cost_list = [] test_AUC_list = [] with tf.Session() as sess: sess.run(tf.global_variables_initializer()) for i in range(args.epoch): for batch_num in range(num_train / args.batch_size): perm_ind = np.random.choice(num_train, args.batch_size, replace=False) feed_dict = { model.X: X_train[perm_ind], model.y: y_train[perm_ind], model.drop_out: args.drop_out } fetch = [model.train_op, model.accuracy, model.cost] _, train_acc, train_cost = sess.run(fetch, feed_dict=feed_dict) train_cost_list.append(train_cost) #=======step 5: start testing============ test_AUC_batch_list = [] test_cost_batch_list = [] # shuffle test data X_test, y_test = shuffle(X_test, y_test, random_state=i * 42) for start, end in zip( range(0, num_test, args.batch_size), range(args.batch_size, num_test + 1, args.batch_size)): feed_dict = { model.X: X_test[start:end], model.y: y_test[start:end], model.drop_out: 1.0 } fetch = [model.accuracy, model.cost, model.y_pred, model.numel] test_acc, test_cost_batch, y_pred, numel = sess.run( fetch, feed_dict=feed_dict) test_AUC_batch = sklearn.metrics.roc_auc_score( y_test[start:end], y_pred[:, 1]) test_AUC_batch_list.append(test_AUC_batch) test_cost_batch_list.append(test_cost_batch) test_AUC = np.mean(test_AUC_batch_list) test_cost = np.mean(test_cost_batch_list) test_AUC_list.append(test_AUC) test_cost_list.append(test_cost) print( "at {} epoch, the training cost is {}, the training accuracy is {}" .format(i, train_cost, train_acc)) print("at {} epoch, the test cost is {}, the test accuracy is {}". format(i, test_cost, test_acc)) print("at {} epoch, the test_AUC is {}".format(i, test_AUC)) print("------------------------------------------------------") #----early stop--------- # if test_AUC start to decrease, then stop caculating if i > 10: mean_test_AUC = np.mean(test_AUC_list[-10:]) if test_AUC < mean_test_AUC * 0.8: break best_AUC = max(test_AUC_list) best_AUC_ind = test_AUC_list.index(best_AUC) end_time = time.time() spend_time = end_time - start_time print("========================================================") print("Finally, at distance {}, the best test AUC is {} at {} epoch,". format(args.dist, best_AUC, best_AUC_ind)) print("Finally, the model has {} parameters\n\n".format(numel)) # wirte result in local with open(args.model_type + '.txt', 'a') as f: f.write( "At distance {}, the best test AUC is {} at {} epoch, the model has {} parameters, lr_rate is {}, dropout is {}, batchsize is {}, spend time is {}, \n\n" .format(args.dist, best_AUC, best_AUC_ind, numel, args.learning_rate, args.drop_out, args.batch_size, spend_time)) #========step 5: draw results=============== generate_trajectory = True if generate_trajectory: if args.model_type == 'LSTM_MDN_model' or args.model_type == 'BLSTM_MDN_model': perm_ind = np.random.choice(num_test, args.batch_size, replace=False) val_dict = { model.X: X_test[perm_ind], model.y: y_test[perm_ind], model.drop_out: 1.0 } batch = X_test[perm_ind] plot_traj_MDN_mult(model, sess, val_dict, batch) plt.figure() plt.plot(train_cost_list, 'r', label='train_cost') plt.plot(test_cost_list, '--r', label='test_cost') plt.legend() plt.figure() plt.plot(test_AUC_list, label='test_AUC') plt.show() return -best_AUC