Exemplo n.º 1
0
def main():
    p=Path(__file__).parents[0]
    directory = os.path.abspath(os.path.join(p,"gemini_ETHUSD_d.csv"))
    
    data = Data(directory)
    train,test = data.split_data(test_size=0.2)
    numOfDays=20
    x_train,x_test,y_train,y_test = data.prepare_data(train,test,numOfDays)
    
    
    model = Model()   
    #hyperparameters tuning
    epochs = 50
    optimizer='adam'
    loss='mean_squared_error'
    activation ='tanh'
    batch_size = 1
    neurons = 30
    
    
    model.LSTM_model(x_train,activation =activation,optimizer=optimizer,loss=loss,neurons=neurons)
    history = model.train(x_train,y_train,x_test,y_test,epochs=epochs,batch_size=batch_size)



    targets = test['Close'][numOfDays:]
    preds = model.predict(x_test).squeeze()
    
    print('MAE: ',mean_absolute_error(preds,y_test))
    
    
    preds = test['Close'].values[:-numOfDays] * (preds + 1)
    preds = pd.Series(index=targets.index, data=preds)
    
    line_plot(targets, preds, 'actual', 'prediction', lw=3)
    line_plot(history.history['loss'],history.history['val_loss'],'train loss','test loss',lw=3)
Exemplo n.º 2
0
def main(params):
    #=======step 1: get args for model=======
    args = load_arg()
    args.learning_rate = params["lr_rate"]
    args.drop_out = params["dp_out"]
    args.batch_size = params["bt_size"]
    args.dist = params["distance"]
    print ("At distance {}, learning_rate is {}, drop_out is {}, batch_size is {}".\
          format(params["distance"], params["lr_rate"],
                 params["dp_out"], params["bt_size"]))
    #=======step 2: preprocess data==========
    direc = './data/'  # directory of data file
    csv_file = 'seq_all.csv'
    dl = DataLoad(direc, csv_file)
    dl.munge_data(height=11.0, seq_len=args.seq_len, dist=args.dist)
    basket_center = np.array([5.25, 25.0, 10.0])
    dl.center_data(center_cent=basket_center)
    sum_samples, num_train, num_test = dl.test_valid_data_split(ratio=0.8)
    print(
        "--------------------------------------------------------------------")
    X_train = dl.data['X_train']
    y_train = dl.data['y_train']
    X_test = dl.data['X_test']
    y_test = dl.data['y_test']
    #=======step 3: construct model==========
    tf.reset_default_graph()
    model = Model(args)
    if args.model_type == 'LSTM_model':
        model.LSTM_model()
    elif args.model_type == 'bidir_LSTM_model':
        model.bidir_LSTM_model()
    elif args.model_type == 'CNN_model':
        model.CNN_model()
    elif args.model_type == 'Conv_LSTM_model':
        model.Conv_LSTM_model()
    elif args.model_type == 'LSTM_MDN_model':
        model.MDN_model('LSTM')
    elif args.model_type == 'BLSTM_MDN_model':
        model.MDN_model('BLSTM')
    else:
        print("please choose correct model type")
        return
    model.Evaluating()
    #=======step 4: start training===========
    start_time = time.time()
    train_cost_list = []
    test_cost_list = []
    test_AUC_list = []
    with tf.Session() as sess:
        sess.run(tf.global_variables_initializer())
        for i in range(args.epoch):
            for batch_num in range(num_train / args.batch_size):
                perm_ind = np.random.choice(num_train,
                                            args.batch_size,
                                            replace=False)
                feed_dict = {
                    model.X: X_train[perm_ind],
                    model.y: y_train[perm_ind],
                    model.drop_out: args.drop_out
                }
                fetch = [model.train_op, model.accuracy, model.cost]
                _, train_acc, train_cost = sess.run(fetch, feed_dict=feed_dict)
            train_cost_list.append(train_cost)

            #=======step 5: start testing============
            test_AUC_batch_list = []
            test_cost_batch_list = []
            # shuffle test data
            X_test, y_test = shuffle(X_test, y_test, random_state=i * 42)
            for start, end in zip(
                    range(0, num_test, args.batch_size),
                    range(args.batch_size, num_test + 1, args.batch_size)):

                feed_dict = {
                    model.X: X_test[start:end],
                    model.y: y_test[start:end],
                    model.drop_out: 1.0
                }
                fetch = [model.accuracy, model.cost, model.y_pred, model.numel]
                test_acc, test_cost_batch, y_pred, numel = sess.run(
                    fetch, feed_dict=feed_dict)
                test_AUC_batch = sklearn.metrics.roc_auc_score(
                    y_test[start:end], y_pred[:, 1])
                test_AUC_batch_list.append(test_AUC_batch)
                test_cost_batch_list.append(test_cost_batch)
            test_AUC = np.mean(test_AUC_batch_list)
            test_cost = np.mean(test_cost_batch_list)

            test_AUC_list.append(test_AUC)
            test_cost_list.append(test_cost)

            print(
                "at {} epoch, the training cost is {}, the training accuracy is {}"
                .format(i, train_cost, train_acc))
            print("at {} epoch, the test cost is {}, the test accuracy is {}".
                  format(i, test_cost, test_acc))
            print("at {} epoch, the test_AUC is {}".format(i, test_AUC))
            print("------------------------------------------------------")

            #----early stop---------
            # if test_AUC start to decrease, then stop caculating
            if i > 10:
                mean_test_AUC = np.mean(test_AUC_list[-10:])

                if test_AUC < mean_test_AUC * 0.8:
                    break

        best_AUC = max(test_AUC_list)
        best_AUC_ind = test_AUC_list.index(best_AUC)
        end_time = time.time()
        spend_time = end_time - start_time

        print("========================================================")
        print("Finally, at distance {}, the best test AUC is {} at {} epoch,".
              format(args.dist, best_AUC, best_AUC_ind))
        print("Finally, the model has {} parameters\n\n".format(numel))
        # wirte result in local
        with open(args.model_type + '.txt', 'a') as f:
            f.write(
                "At distance {}, the best test AUC is {} at {} epoch, the model has {} parameters, lr_rate is {}, dropout is {}, batchsize is {}, spend time is {}, \n\n"
                .format(args.dist, best_AUC, best_AUC_ind, numel,
                        args.learning_rate, args.drop_out, args.batch_size,
                        spend_time))

        #========step 5: draw results===============
        generate_trajectory = True
        if generate_trajectory:
            if args.model_type == 'LSTM_MDN_model' or args.model_type == 'BLSTM_MDN_model':
                perm_ind = np.random.choice(num_test,
                                            args.batch_size,
                                            replace=False)

                val_dict = {
                    model.X: X_test[perm_ind],
                    model.y: y_test[perm_ind],
                    model.drop_out: 1.0
                }
                batch = X_test[perm_ind]

                plot_traj_MDN_mult(model, sess, val_dict, batch)

            plt.figure()
            plt.plot(train_cost_list, 'r', label='train_cost')
            plt.plot(test_cost_list, '--r', label='test_cost')
            plt.legend()
            plt.figure()
            plt.plot(test_AUC_list, label='test_AUC')
            plt.show()

    return -best_AUC