Пример #1
0
def main():
    configs = json.load(open('config.json', 'r'))
    if not os.path.exists(configs['model']['save_dir']):
        os.makedirs(configs['model']['save_dir'])

    data = DataLoader(os.path.join('../data', configs['data']['filename']),
                      os.path.join('../data', configs['data']['VIMfile']),
                      configs['data']['train_test_split'],
                      configs['data']['columns'])

    model = Model()
    model.build_model(configs)
    x, y = data.get_train_data(seq_len=configs['data']['sequence_length'],
                               normalise=configs['data']['normalise'])
    '''
	# in-memory training
	model.train(
		x,
		y,
		epochs = configs['training']['epochs'],
		batch_size = configs['training']['batch_size'],
		save_dir = configs['model']['save_dir']
	)

    '''
    # Out-of memory generative training
    steps_per_epoch = math.ceil(
        (data.len_train - configs['data']['sequence_length']) /
        configs['training']['batch_size'])
    model.train_generator(data_gen=data.generate_train_batch(
        seq_len=configs['data']['sequence_length'],
        batch_size=configs['training']['batch_size'],
        normalise=configs['data']['normalise']),
                          epochs=configs['training']['epochs'],
                          batch_size=configs['training']['batch_size'],
                          steps_per_epoch=steps_per_epoch,
                          save_dir=configs['model']['save_dir'])

    x_test, y_test, p0_vec = data.get_test_data(
        seq_len=configs['data']['sequence_length'],
        normalise=configs['data']['normalise'])

    #predictions = model.predict_sequences_multiple(x_test, configs['data']['sequence_length'], configs['data']['sequence_length'])
    # predictions = model.predict_sequence_full(x_test, configs['data']['sequence_length'])
    predictions = model.predict_point_by_point(x_test)
    pred = predictions.reshape((predictions.size, 1))

    #plot_results_multiple(predictions, y_test, configs['data']['sequence_length'])
    #plot_results(pred, y_test) #normalised predictions

    # De-normalise & plot
    p_pred, p_true = denorm_transform(p0_vec, pred, y_test)
    plot_results(p_pred, p_true)  #de-normalised, i.e., original fex units

    # Compute evaluation metrics
    assess = EvalMetrics(p_true, p_pred)
    MAE = assess.get_MAE()
    RMSE = assess.get_RMSE()
    print("MAE on validation set is: %f" % MAE)
    print("RMSE on validation set is: %f" % RMSE)
Пример #2
0
def main():
    configs = json.load(open("config.json", "r"))
    if not os.path.exists(configs["model"]["save_dir"]):
        os.makedirs(configs["model"]["save_dir"])

    data = DataLoader(
        os.path.join("data", configs["data"]["filename"]),
        configs["data"]["train_test_split"],
        configs["data"]["columns"],
    )

    model = Model()
    model.build_model(configs)
    x, y = data.get_train_data(
        seq_len=configs["data"]["sequence_length"],
        normalise=configs["data"]["normalise"],
    )

    """
	# in-memory training
	model.train(
		x,
		y,
		epochs = configs['training']['epochs'],
		batch_size = configs['training']['batch_size'],
		save_dir = configs['model']['save_dir']
	)
	"""
    # out-of memory generative training
    steps_per_epoch = math.ceil(
        (data.len_train - configs["data"]["sequence_length"])
        / configs["training"]["batch_size"]
    )
    model.train_generator(
        data_gen=data.generate_train_batch(
            seq_len=configs["data"]["sequence_length"],
            batch_size=configs["training"]["batch_size"],
            normalise=configs["data"]["normalise"],
        ),
        epochs=configs["training"]["epochs"],
        batch_size=configs["training"]["batch_size"],
        steps_per_epoch=steps_per_epoch,
        save_dir=configs["model"]["save_dir"],
    )

    x_test, y_test = data.get_test_data(
        seq_len=configs["data"]["sequence_length"],
        normalise=configs["data"]["normalise"],
    )

    # predictions = model.predict_sequences_multiple(x_test, configs['data']['sequence_length'], configs['data']['sequence_length'])
    # predictions = model.predict_sequence_full(x_test, configs['data']['sequence_length'])
    predictions = model.predict_point_by_point(x_test)

    # plot_results_multiple(predictions, y_test, configs["data"]["sequence_length"])
    plot_results(predictions, y_test)
def main():
    configs = json.load(open('config.json', 'r'))
    if not os.path.exists(configs['model']['save_dir']):
        os.makedirs(configs['model']['save_dir'])
    if not os.path.exists(configs['model']['log_dir']):
        os.makedirs(configs['model']['log_dir'])

    data_loader = DataLoader(os.path.join('data',
                                          configs['data']['filename_train']),
                             configs['data']['train_test_split'],
                             configs['data']['columns'],
                             is_training=True)

    model = Model()
    model.build_model(configs)
    steps_per_epoch = math.ceil(
        (data_loader.len_train - configs['data']['sequence_length']) /
        configs['training']['batch_size'])
    validation_steps = math.ceil(
        (data_loader.len_val - configs['data']['sequence_length']) /
        configs['training']['batch_size'])
    model.train_generator(train_loader=data_loader.batch_generator(
        seq_len=configs['data']['sequence_length'],
        batch_size=configs['training']['batch_size'],
        normalise=configs['data']['normalise'],
        generator_type='train'),
                          val_loader=data_loader.batch_generator(
                              seq_len=configs['data']['sequence_length'],
                              batch_size=configs['training']['batch_size'],
                              normalise=configs['data']['normalise'],
                              generator_type='val'),
                          epochs=configs['training']['epochs'],
                          batch_size=configs['training']['batch_size'],
                          steps_per_epoch=steps_per_epoch,
                          validation_steps=validation_steps,
                          save_dir=configs['model']['save_dir'],
                          log_dir=configs['model']['log_dir'])

    test_data_loader = DataLoader(os.path.join(
        'data', configs['data']['filename_test']),
                                  0,
                                  configs['data']['columns'],
                                  is_training=False)
    x_test, y_test = test_data_loader.get_test_data(
        seq_len=configs['data']['sequence_length'],
        normalise=configs['data']['normalise'])

    predictions = model.predict_sequences_multiple(
        x_test, configs['data']['sequence_length'],
        configs['data']['sequence_length'])
    # predictions = model.predict_sequences_full(x_test, configs['data']['sequence_length'])
    # predictions = model.predict_point_by_point(x_test)

    plot = Plot()
    plot.plot_results_multiple(predictions, y_test,
                               configs['data']['sequence_length'])
Пример #4
0
def main():
    configs = json.load(open('config.json', 'r'))
    if not os.path.exists(configs['model']['save_dir']):
        os.makedirs(configs['model']['save_dir'])

    #獲取數據
    data = DataLoader(os.path.join('data', configs['data']['filename']),
                      configs['data']['train_test_split'],
                      configs['data']['columns'])
    #建立模型
    model = Model()
    model.build_model(configs)
    x, y = data.get_train_data(seq_len=configs['data']['sequence_length'],
                               normalise=configs['data']['normalise'])
    '''
	# in-memory training
	model.train(
		x,
		y,
		epochs = configs['training']['epochs'],
		batch_size = configs['training']['batch_size'],
		save_dir = configs['model']['save_dir']
	)
	'''
    #out-of memory generative training
    #每一輪的'前傳導-后傳導'組合數量
    steps_per_epoch = math.ceil(
        (data.len_train - configs['data']['sequence_length']) /
        configs['training']['batch_size'])
    model.train_generator(data_gen=data.generate_train_batch(
        seq_len=configs['data']['sequence_length'],
        batch_size=configs['training']['batch_size'],
        normalise=configs['data']['normalise']),
                          epochs=configs['training']['epochs'],
                          batch_size=configs['training']['batch_size'],
                          steps_per_epoch=steps_per_epoch,
                          save_dir=configs['model']['save_dir'])

    x_test, y_test = data.get_test_data(
        seq_len=configs['data']['sequence_length'],
        normalise=configs['data']['normalise'])

    predictions = model.predict_sequences_multiple(
        x_test, configs['data']['sequence_length'],
        configs['data']['sequence_length'])
    #predictions = model.predict_sequence_full(x_test, configs['data']['sequence_length'])
    #predictions = model.predict_point_by_point(x_test)

    plot_results_multiple(predictions, y_test,
                          configs['data']['sequence_length'])
    #plot_results(predictions, y_test)


#if __name__=='__main__':
#	main()
Пример #5
0
def main():
    configs = json.load(open('config.json', 'r'))
    if not os.path.exists(configs['model']['save_dir']):
        os.makedirs(configs['model']['save_dir'])

    rates_count = configs['data']['sequence_length'] * configs['data'][
        'number_sequences']
    #os.path.join('data', configs['data']['filename']),
    data = DataLoader(configs['data']['symbol'],
                      configs['data']['train_test_split'],
                      configs['data']['columns'], rates_count)

    model = Model()
    model.build_model(configs)
    x, y = data.get_train_data(seq_len=configs['data']['sequence_length'],
                               normalise=configs['data']['normalise'])
    '''
	# in-memory training
	model.train(
		x,
		y,
		epochs = configs['training']['epochs'],
		batch_size = configs['training']['batch_size'],
		save_dir = configs['model']['save_dir']
	)
	'''
    # out-of memory generative training
    steps_per_epoch = math.ceil(
        (data.len_train - configs['data']['sequence_length']) /
        configs['training']['batch_size'])
    model.train_generator(data_gen=data.generate_train_batch(
        seq_len=configs['data']['sequence_length'],
        batch_size=configs['training']['batch_size'],
        normalise=configs['data']['normalise']),
                          epochs=configs['training']['epochs'],
                          batch_size=configs['training']['batch_size'],
                          steps_per_epoch=steps_per_epoch,
                          save_dir=configs['model']['save_dir'])

    x_test, y_test = data.get_test_data(
        seq_len=configs['data']['sequence_length'],
        normalise=configs['data']['normalise'])

    predictions = model.predict_sequences_multiple(
        x_test, configs['data']['sequence_length'],
        configs['data']['sequence_length'])
    #predictions = model.predict_sequence_full(x_test, configs['data']['sequence_length'], configs['data']['sequence_length'])
    #predictions = model.predict_point_by_point(x_test)

    plot_results_multiple(predictions, y_test,
                          configs['data']['sequence_length'])
    #plot_results(predictions, y_test)

    input("Press Enter to continue...")
Пример #6
0
def main():
    configs = json.load(open('config.json', 'r'))

    #create folder for save model params
    if not os.path.exists(configs['model']['save_dir']): os.makedirs(configs['model']['save_dir'])

    data = DataLoader(
        os.path.join('data', configs['data']['filename']),
        configs['data']['train_test_split'],
        configs['data']['columns']
    )

    #plot true data
    #plot_results(data.data_train,True)

    #train model
    model = Model()
    model.build_model(configs)
    x, y = data.get_train_data(
        seq_len=configs['data']['sequence_length'],
        normalise=configs['data']['normalise']
    )

    steps_per_epoch = math.ceil((data.len_train - configs['data']['sequence_length']) / configs['training']['batch_size'])
    model.train_generator(
        data_gen=data.generate_train_batch(
            seq_len=configs['data']['sequence_length'],
            batch_size=configs['training']['batch_size'],
            normalise=configs['data']['normalise']
        ),
        epochs=configs['training']['epochs'],
        batch_size=configs['training']['batch_size'],
        steps_per_epoch=steps_per_epoch,
        save_dir=configs['model']['save_dir']
    )

    x_test, y_test = data.get_test_data(
        seq_len=configs['data']['sequence_length'],
        normalise=configs['data']['normalise']
    )


    predictions = model.predict_point_by_point(x_test)
#    plot_results(predictions, y_test)
#    print (predictions)
#    plot_results(predictions, y_test)



    data1 = pd.DataFrame(predictions)    
    data1.to_csv('predict.csv')
    data2 = pd.DataFrame(y_test)
    data2.to_csv('true.csv')
def main(model_name=None):
    configs = json.load(open('config.json', 'r'))
    if not os.path.exists(configs['model']['save_dir']):
        os.makedirs(configs['model']['save_dir'])

    data = DataLoader(os.path.join('data', configs['data']['filename']),
                      configs['data']['train_test_split'],
                      configs['data']['columns'])

    model = Model()

    if model_name:
        saved_model = os.path.join(configs['model']['save_dir'], model_name)
        model.load_model(saved_model)
    else:
        model.build_model(configs)

    x, y = data.get_train_data(seq_len=configs['data']['sequence_length'],
                               normalise=configs['data']['normalise'])
    '''
        # in-memory training
        model.train(
            x,
            y,
            epochs = configs['training']['epochs'],
            batch_size = configs['training']['batch_size'],
            save_dir = configs['model']['save_dir']
        )
    '''

    # out-of memory generative training
    steps_per_epoch = math.ceil(
        (data.len_train - configs['data']['sequence_length']) /
        configs['training']['batch_size'])
    if not model_name:
        model.train_generator(data_gen=data.generate_train_batch(
            seq_len=configs['data']['sequence_length'],
            batch_size=configs['training']['batch_size'],
            normalise=configs['data']['normalise']),
                              epochs=configs['training']['epochs'],
                              batch_size=configs['training']['batch_size'],
                              steps_per_epoch=steps_per_epoch,
                              save_dir=configs['model']['save_dir'])

    x_test, y_test = data.get_test_data(
        seq_len=configs['data']['sequence_length'],
        normalise=configs['data']['normalise'])

    predictions = model.predict_sequences_multiple(
        x_test, configs['data']['sequence_length'],
        configs['data']['sequence_length'])
    plot_results_multiple(predictions, y_test,
                          configs['data']['sequence_length'])
def main():
    configs = json.load(open('config.json', 'r'))
    if not os.path.exists(configs['model']['save_dir']): os.makedirs(configs['model']['save_dir'])

    data = DataLoader(
        os.path.join('data', configs['data']['filename']),
        configs['data']['train_test_split'],
        configs['data']['columns']
    )

    model = Model()
    model.build_model(configs)
    x, y = data.get_train_data(
        seq_len=configs['data']['sequence_length'],
        normalise=configs['data']['normalise']
    )

    '''
	# in-memory training
	model.train(
		x,
		y,
		epochs = configs['training']['epochs'],
		batch_size = configs['training']['batch_size'],
		save_dir = configs['model']['save_dir']
	)
	'''
    # out-of memory generative training
    steps_per_epoch = math.ceil((data.len_train - configs['data']['sequence_length']) / configs['training']['batch_size'])
    model.train_generator(
        data_gen=data.generate_train_batch(
            seq_len=configs['data']['sequence_length'],
            batch_size=configs['training']['batch_size'],
            normalise=configs['data']['normalise']
        ),
        epochs=configs['training']['epochs'],
        batch_size=configs['training']['batch_size'],
        steps_per_epoch=steps_per_epoch,
        save_dir=configs['model']['save_dir']
    )

    x_test, y_test = data.get_test_data(
        seq_len=configs['data']['sequence_length'],
        normalise=configs['data']['normalise']
    )

    predictions = model.predict_sequences_multiple(x_test, configs['data']['sequence_length'], configs['data']['sequence_length'])
    # predictions = model.predict_sequence_full(x_test, configs['data']['sequence_length'])
    # predictions = model.predict_point_by_point(x_test)

    plot_results_multiple(predictions, y_test, configs['data']['sequence_length'])
Пример #9
0
def main():
    configs = json.load(open('config.json', 'r'))

    data = DataLoader(
        os.path.join('data', configs['data']['filename']),
        configs['data']['train_test_split'],
        configs['data']['columns']
    )

    model = Model()
    model.build_model(configs)
    x, y = data.get_train_data(
        seq_len=configs['data']['sequence_length'],
        normalise=configs['data']['normalise']
    )

    '''
	# in-memory training
	model.train(
		x,
		y,
		epochs = configs['training']['epochs'],
		batch_size = configs['training']['batch_size']
	)
	'''
    # out-of memory generative training
    # math.ceil(所有窗的个数 / batch_size)
    steps_per_epoch = math.ceil((data.len_train - configs['data']['sequence_length']) / configs['training']['batch_size'])
    model.train_generator(
        data_gen=data.generate_train_batch(
            seq_len=configs['data']['sequence_length'],
            batch_size=configs['training']['batch_size'],
            normalise=configs['data']['normalise']
        ),
        epochs=configs['training']['epochs'],
        batch_size=configs['training']['batch_size'],
        steps_per_epoch=steps_per_epoch
    )

    x_test, y_test = data.get_test_data(
        seq_len=configs['data']['sequence_length'],
        normalise=configs['data']['normalise']
    )

    # predictions = model.predict_sequences_multiple(x_test, configs['data']['sequence_length'], configs['data']['sequence_length'])
    predictions = model.predict_sequence_full(x_test, configs['data']['sequence_length'])
    # predictions = model.predict_point_by_point(x_test)

    # plot_results_multiple(predictions, y_test, configs['data']['sequence_length'])
    plot_results(predictions, y_test)
def main():
    configs = json.load(open('config.json', 'r'))
    if not os.path.exists(configs['model']['save_dir']):
        os.makedirs(configs['model']['save_dir'])

    data = DataLoader(os.path.join('data', configs['data']['filename']),
                      configs['data']['train_test_split'],
                      configs['data']['columns'])

    model = Model()
    model.build_model(configs)
    x, y = data.get_train_data(seq_len=configs['data']['sequence_length'],
                               normalise=configs['data']['normalise'])
    '''
	# in-memory training
	model.train(
		x,
		y,
		epochs = configs['training']['epochs'],
		batch_size = configs['training']['batch_size'],
		save_dir = configs['model']['save_dir']
	)
	'''
    # out-of memory generative training

    steps_per_epoch = math.ceil(
        (data.len_train - configs['data']['sequence_length']) /
        configs['training']['batch_size'])
    model.train_generator(data_gen=data.generate_train_batch(
        seq_len=configs['data']['sequence_length'],
        batch_size=configs['training']['batch_size'],
        normalise=configs['data']['normalise']),
                          epochs=configs['training']['epochs'],
                          batch_size=configs['training']['batch_size'],
                          steps_per_epoch=steps_per_epoch,
                          save_dir=configs['model']['save_dir'],
                          configs=configs)

    x_test, y_test, p0 = data.get_test_data(
        seq_len=configs['data']['sequence_length'],
        normalise=configs['data']['normalise'])

    # predictions = model.predict_sequences_multiple(x_test, configs['data']['sequence_length'], configs['data']['sequence_length'])
    # predictions = model.predict_sequence_full(x_test, configs['data']['sequence_length'])
    predictions = model.predict_point_by_point(x_test)
    y_test = np.reshape(np.copy(y_test), -1)

    plot_results((p0 * (predictions + 1))[-200:], (p0 * (y_test + 1))[-200:])
    measure_performance(predictions, y_test)
Пример #11
0
def main():
    configs = json.load(open('config.json', 'r'))
    if not os.path.exists(configs['model']['save_dir']): os.makedirs(configs['model']['save_dir'])

    data = DataLoader(
        os.path.join('data', configs['data']['filename']),
        configs['data']['train_test_split'],
        configs['data']['columns']
    )
    model = Model()
    model.build_model(configs)
    x, y = data.get_train_data(
        seq_len=configs['data']['sequence_length'],
        normalise=configs['data']['normalise']
    )
    '''
	# in-memory training
	model.train(
		x,
		y,
		epochs = configs['training']['epochs'],
		batch_size = configs['training']['batch_size'],
		save_dir = configs['model']['save_dir']
	)
	'''
    # out-of memory generative training
    steps_per_epoch = math.ceil((data.len_train - configs['data']['sequence_length']) / configs['training']['batch_size'])
    model.train_generator(
        data_gen=data.generate_train_batch(
            seq_len=configs['data']['sequence_length'],
            batch_size=configs['training']['batch_size'],
            normalise=configs['data']['normalise']
        ),
        epochs=configs['training']['epochs'],
        batch_size=configs['training']['batch_size'],
        steps_per_epoch=steps_per_epoch,
        save_dir=configs['model']['save_dir']
    )

    x_test, y_test, onedot = data.get_test_data(
        seq_len=configs['data']['sequence_length'],
        normalise=configs['data']['normalise']
    )
    #predictions = model.predict_sequences_multiple(x_test, configs['data']['sequence_length'], configs['data']['sequence_length'])
    #predictions = model.predict_sequence_full(x_test, configs['data']['sequence_length'])
    predictions = model.predict_point_by_point(onedot)
    with open('output.txt', 'w') as f:
        f.write('预测下一时间的螺栓螺母消耗量为:' + str(int((predictions[-1] + 1) * data.last_raw_data(seq_len=configs['data']['sequence_length']))))
Пример #12
0
def main():
    configs = json.load(open(CONFIG, 'r'))

    data = DataLoader(DATA, configs['data']['train_test_split'],
                      configs['data']['columns'])

    model = Model()
    model.build_model(configs)
    x, y = data.get_train_data(seq_len=configs['data']['sequence_length'],
                               normalise=configs['data']['normalise'])
    '''
	# in-memory training
	model.train(
		x,
		y,
		epochs = configs['training']['epochs'],
		batch_size = configs['training']['batch_size']
	)
	'''
    # out-of memory generative training
    steps_per_epoch = math.ceil(
        (data.len_train - configs['data']['sequence_length']) /
        configs['training']['batch_size'])
    model.train_generator(data_gen=data.generate_train_batch(
        seq_len=configs['data']['sequence_length'],
        batch_size=configs['training']['batch_size'],
        normalise=configs['data']['normalise']),
                          epochs=configs['training']['epochs'],
                          batch_size=configs['training']['batch_size'],
                          steps_per_epoch=steps_per_epoch,
                          model_path=MODEL)

    x_test, y_test = data.get_test_data(
        seq_len=configs['data']['sequence_length'],
        normalise=configs['data']['normalise'])

    predictions = model.predict_sequences_multiple(
        x_test, configs['data']['sequence_length'],
        configs['data']['sequence_length'])
    #predictions = model.predict_sequence_full(x_test, configs['data']['sequence_length'])
    #predictions = model.predict_point_by_point(x_test)

    plot_results_multiple(predictions, y_test,
                          configs['data']['sequence_length'])
    #plot_results(predictions, y_test)
    sys.stdout.write("--END--")
Пример #13
0
def main():
    configs = json.load(open('config.json', 'r'))
    if not os.path.exists(configs['model']['save_dir']):
        os.makedirs(configs['model']['save_dir'])

    data = DataLoader(os.path.join('data', configs['data']['filename']),
                      configs['data']['train_test_split'],
                      configs['data']['columns'])

    model = Model()
    model.build_model(configs)
    x, y = data.get_train_data(seq_len=configs['data']['sequence_length'],
                               normalise=configs['data']['normalise'])
    '''
	# in-memory training (heavier computation)
	model.train(
		x,
		y,
		epochs = configs['training']['epochs'],
		batch_size = configs['training']['batch_size'],
		save_dir = configs['model']['save_dir']
	)
	'''
    # out-of memory generative training
    steps_per_epoch = math.ceil(
        (data.len_train - configs['data']['sequence_length']) /
        configs['training']['batch_size'])
    model.train_generator(data_gen=data.generate_train_batch(
        seq_len=configs['data']['sequence_length'],
        batch_size=configs['training']['batch_size'],
        normalise=configs['data']['normalise']),
                          epochs=configs['training']['epochs'],
                          batch_size=configs['training']['batch_size'],
                          steps_per_epoch=steps_per_epoch,
                          save_dir=configs['model']['save_dir'])

    x_test, y_test = data.get_test_data(
        seq_len=configs['data']['sequence_length'],
        normalise=configs['data']['normalise'])

    #predictions are made point by point with model.predict_point_by_point
    predictions = model.predict_point_by_point(x_test)
    plot_results(predictions, y_test)
Пример #14
0
def main():
    configs = json.load(open('config.json', 'r'))
    if not os.path.exists(configs['model']['save_dir']):
        os.makedirs(configs['model']['save_dir'])

    dataframe = pd.read_csv(configs['data']['filename'])
    f = open('/Users/yucheng/Downloads/project2/stockIDs.txt', 'r')
    stockIDs = [int(line.split('\n')[0]) for line in f.readlines()]
    for id in stockIDs[377:378]:
        # for id in stockIDs[444:500]:
        print("index: ", stockIDs.index(id))
        data = DataLoader(dataframe, id, configs['data']['train_test_split'],
                          configs['data']['columns'])

        model = Model()
        model.build_model(configs)
        x, y = data.get_train_data(seq_len=configs['data']['sequence_length'],
                                   normalise=configs['data']['normalise'])
        '''
    	# in-memory training
    	model.train(
    		x,
    		y,
    		epochs = configs['training']['epochs'],
    		batch_size = configs['training']['batch_size'],
    		save_dir = configs['model']['save_dir']
    	)
    	'''
        # out-of memory generative training
        steps_per_epoch = math.ceil(
            (data.len_train - configs['data']['sequence_length']) /
            configs['training']['batch_size'])
        model.train_generator(id=id,
                              data_gen=data.generate_train_batch(
                                  seq_len=configs['data']['sequence_length'],
                                  batch_size=configs['training']['batch_size'],
                                  normalise=configs['data']['normalise']),
                              epochs=configs['training']['epochs'],
                              batch_size=configs['training']['batch_size'],
                              steps_per_epoch=steps_per_epoch,
                              save_dir=configs['model']['save_dir'])
Пример #15
0
    plt.show()


# %% Build/Train the model
model = Model()

model.build_model(configs)

# out-of memory generative training
steps_per_epoch = math.ceil(
    (data.len_train - configs['data']['sequence_length']) /
    configs['training']['batch_size'])

model.train_generator(data_gen=data.generate_train_batch(
    seq_len=configs['data']['sequence_length'],
    batch_size=configs['training']['batch_size'],
    normalise=configs['data']['normalise']),
                      epochs=configs['training']['epochs'],
                      batch_size=configs['training']['batch_size'],
                      steps_per_epoch=steps_per_epoch,
                      save_dir=configs['model']['save_dir'])

x_test, y_test = data.get_test_data(seq_len=configs['data']['sequence_length'],
                                    normalise=configs['data']['normalise'])

predictions = model.predict_sequences_multiple(
    x_test, configs['data']['sequence_length'],
    configs['data']['sequence_length'])

plot_results_multiple(predictions, y_test, configs['data']['sequence_length'])
Пример #16
0
def main():

    configs = json.load(open('config.json', 'r'))
    if not os.path.exists(configs['model']['save_dir']):
        os.makedirs(configs['model']['save_dir'])

    data = DataLoader(os.path.join('data', configs['data']['filename']),
                      configs['data']['train_test_split'],
                      configs['data']['columns'])

    model = Model()

    # model.build_model(configs)
    model.load_model("saved_models/dow_30_50%.h5")

    x, y = data.get_train_data(seq_len=configs['data']['sequence_length'],
                               normalise=configs['data']['normalise'])

    # out-of memory generative training
    steps_per_epoch = math.ceil(
        (data.len_train - configs['data']['sequence_length']) /
        configs['training']['batch_size'])

    model.train_generator(data_gen=data.generate_train_batch(
        seq_len=configs['data']['sequence_length'],
        batch_size=configs['training']['batch_size'],
        normalise=configs['data']['normalise']),
                          epochs=configs['training']['epochs'],
                          batch_size=configs['training']['batch_size'],
                          steps_per_epoch=steps_per_epoch,
                          save_dir=configs['model']['save_dir'])

    x_test, y_test = data.get_test_data(
        seq_len=configs['data']['sequence_length'],
        normalise=configs['data']['normalise'])

    print("x_test.shape")
    print(x_test.shape)

    predictions = model.predict_point_by_point(x_test)

    ########################################################################
    from sklearn.metrics import mean_squared_error
    # loss_final = mean_squared_error(predictions, y_test)
    # print("Testing Loss = " + str(loss_final))
    ########################################################################

    # plot_results_multiple(predictions, y_test, configs['data']['sequence_length'])

    print(predictions.shape)
    print(y_test.shape)

    m = pd.DataFrame(predictions)
    n = pd.DataFrame(y_test)

    m.to_csv("predictions.csv")
    n.to_csv("y_test.csv")

    p = 0
    t = 0

    t_1 = 0

    count = 0

    for a in range(len(predictions)):

        if (a == 0):
            t_1 = y_test[a]
            continue
        '''
            1 1 1 1 1 1 1 1 1
            1 1 1 1 1 1 1 1 1
        
        '''

        p = predictions[a]
        t = y_test[a]

        match = (t - t_1) * (p - t_1)

        if (match > 0):
            count += 1

        t_1 = t

    print("Good prediction rate = " + str(count / len(predictions)))

    plot_results(predictions, y_test)
Пример #17
0
def main():
    configs = json.load(open('config.json', 'r'))
    if not os.path.exists(configs['model']['save_dir']):
        os.makedirs(configs['model']['save_dir'])
    if not os.path.exists(configs['data']['data picture save dir']):
        os.makedirs(configs['data']['data picture save dir'])

    data = DataLoader(os.path.join('data', configs['data']['filename']),
                      configs['data']['train_test_split'],
                      configs['data']['columns'], configs['data']['id'])

    model = Model()
    model.build_model(configs)

    # x, y = data.get_train_data(
    #     seq_len=configs['data']['sequence_length'],
    #     normalise=configs['data']['normalise']
    # )
    '''
	# in-memory training
	model.train(
		x,
		y,
		epochs = configs['training']['epochs'],
		batch_size = configs['training']['batch_size'],
		save_dir = configs['model']['save_dir']
	)
	'''
    # out-of memory generative training
    steps_per_epoch = math.ceil(
        (data.len_train - configs['data']['sequence_length']) /
        configs['training']['batch_size'])
    model.train_generator(data_gen=data.generate_train_batch(
        seq_len=configs['data']['sequence_length'],
        batch_size=configs['training']['batch_size'],
        normalise=configs['data']['normalise']),
                          epochs=configs['training']['epochs'],
                          batch_size=configs['training']['batch_size'],
                          steps_per_epoch=steps_per_epoch,
                          save_dir=configs['model']['save_dir'])

    x_test, y_test = data.get_test_data(
        seq_len=configs['data']['sequence_length'],
        normalise=configs['data']['normalise'])

    #predictions = model.predict_sequences_multiple(x_test, configs['data']['sequence_length'], configs['data']['sequence_length'])
    # predictions = model.predict_sequence_full(x_test, configs['data']['sequence_length'])
    predictions = model.predict_point_by_point(x_test)

    sess = backend.get_session()

    rmsee = backend.mean(rmse(y_test, predictions), axis=0)
    msee = backend.mean(mse(y_test, predictions), axis=0)

    with sess.as_default():
        mse_val = msee.eval()
        rmse_val = rmsee.eval()
        print("mse:", mse_val)
        print("rmse:", rmse_val)

    #plot_results_multiple(predictions, y_test, configs['data']['sequence_length'])
    plot_results(predictions, y_test, configs['data']['data picture save dir'],
                 configs['data']['id'])

    with open("note.txt", 'a+') as f:
        f.write(
            '\n%s-e%s.h5:\n' %
            (dt.datetime.now().strftime('%m%d-%H%M%S'), configs['data']['id']))
        f.write("data split:%f\n" % configs["data"]["train_test_split"])
        f.write("epochs:%d\n" % configs["training"]["epochs"])
        f.write("batch size:%d\n" % configs["training"]["batch_size"])
        f.write("mse:%f\n" % mse_val)
        f.write("rmse:%f\n" % rmse_val)
        f.write("notes:%s\n" % configs['data']['note'])
def main():
    # instantiation
    data = DataLoader(os.path.join('data', configs['data']['filename']),
                      configs['data']['train_test_split'],
                      configs['data']['columns'])

    model = Model()
    model.build_model(configs)
    x, y = data.get_train_data(seq_len=configs['data']['sequence_length'],
                               normalise=configs['data']['normalise'])
    '''
	# in-memory training
	model.train(
		x,
		y,
		epochs = configs['training']['epochs'],
		batch_size = configs['training']['batch_size']
	)
	'''
    # out-of memory generative training
    # 不懂为什么要自己算这个,LSTM不是自己自带batch参数吗,为什么要自己算出来一共要输多少次batch???

    # 会出现:
    # in data_generator_task, generator_output = next(self._generator), StopIteration
    # in fit_generator, str(generator_output))
    # output of generator should be a tuple `(x, y, sample_weight)` or `(x, y)`. Found: None
    # 所以出错的时候,手动减少steps_per_epoch
    steps_per_epoch = math.ceil(
        (data.len_train - configs['data']['sequence_length']) /
        configs['training']['batch_size']) - 7
    model.train_generator(data_gen=data.generate_train_batch(
        seq_len=configs['data']['sequence_length'],
        batch_size=configs['training']['batch_size'],
        normalise=configs['data']['normalise']),
                          epochs=configs['training']['epochs'],
                          batch_size=configs['training']['batch_size'],
                          steps_per_epoch=steps_per_epoch)

    x_test, y_test = data.get_test_data(
        seq_len=configs['data']['sequence_length'],
        normalise=configs['data']['normalise'])

    predictions = model.predict_sequences_multiple(
        x_test, configs['data']['sequence_length'],
        configs['data']['sequence_length'])
    plot_results_multiple(predictions, y_test,
                          configs['data']['sequence_length'])

    # predictions = model.predict_sequence_full(x_test, configs['data']['sequence_length'])
    # predictions = model.predict_point_by_point(x_test)

    y_true_train, y_true_test = data.get_split_data()
    unnorm_data = predict_unnorm_data(
        y_true_test,
        prediction_len=configs['data']['sequence_length'],
        predictions=predictions)
    # 计算RMSE并输出dataframe
    begin_date = datetime(year=2018, month=9, day=18)
    end_date = begin_date + timedelta(
        days=(configs['data']['sequence_length'] - 1))
    # y_true_test:(301,2)
    y_true_test = pd.DataFrame(y_true_test)
    file = pd.read_csv(os.path.join('data', configs['data']['filename']))
    file = file['time'][len(y_true_train):]
    file = pd.Series(file)
    # 出现了无法新建列并赋值的error
    # 因为dataframe和Series都有自己的index,.values才能取到真正的值并赋给下一个变量
    y_true_test['time'] = file.values
    y_true_test = y_true_test.set_index('time')
    y_true_test.index = pd.to_datetime(y_true_test.index)
    calc_RMSE(predicted_data=unnorm_data,
              y_test_true=y_true_test,
              begin_date=begin_date,
              end_date=end_date)
def main():
    config_path = 'config/config.json'
    with open(config_path, 'r') as f:
        configs = json.load(f)
        logging.info("Loaded {}".format(config_path))

    logging.info("\n{}\n".format(configs))

    data_path = configs['data']['filename']
    data_dir = os.path.dirname(data_path)
    dtypes = configs['data'].get('dtypes', None)
    windowed_normalization = configs['data']['normalise']

    data = DataLoader(data_path,
                      configs['data']['train_test_split'],
                      configs['data']['columns'],
                      scaler_path=os.path.join(data_dir, "scaler"),
                      windowed_normalization=windowed_normalization,
                      dtypes=dtypes)

    model = Model()

    if configs['model'].get('load_model'):
        model_path = os.path.join(configs['model']['load_model'])
        logging.info("Loading {}".format(model_path))
        model.load_model(model_path, configs)
        plot_dir = os.path.join(os.path.dirname(model_path), "plots")
        os.makedirs(plot_dir, exist_ok=True)
    else:
        plot_dir = os.path.join(configs['model']['save_dir'], "plots")
        os.makedirs(plot_dir, exist_ok=True)
        model.build_model(configs)
        x, y = data.get_train_data(
            seq_len=configs['data']['sequence_length'],
            windowed_normalization=windowed_normalization)
        '''
        # in-memory training
        model.train(
            x,
            y,
            epochs = configs['training']['epochs'],
            batch_size = configs['training']['batch_size'],
            save_dir = configs['model']['save_dir']
        )
        '''
        # out-of-memory generative training
        steps_per_epoch = math.ceil(
            (data.len_train - configs['data']['sequence_length']) /
            configs['training']['batch_size'])
        model.train_generator(data_gen=data.generate_train_batch(
            seq_len=configs['data']['sequence_length'],
            batch_size=configs['training']['batch_size'],
            windowed_normalization=windowed_normalization),
                              epochs=configs['training']['epochs'],
                              batch_size=configs['training']['batch_size'],
                              steps_per_epoch=steps_per_epoch,
                              save_dir=configs['model']['save_dir'])

    x_test, y_test = data.get_test_data(
        seq_len=configs['data']['sequence_length'],
        windowed_normalization=windowed_normalization)

    predictions_multiple = model.predict_sequences_multiple(
        x_test, configs['data']['sequence_length'],
        configs['data']['sequence_length'])
    plot_results_multiple(predictions_multiple,
                          y_test,
                          configs['data']['sequence_length'],
                          out_path=os.path.join(plot_dir, "multiple.png"))
    def lstm(sTicker, postn, sFile, file_to_save):

        configs = json.load(open('config.json', 'r'))
        # print("hello")
        # "How can I safely create a nested directory in Python?"
        # stackoverflow.com/questions/273192/how-can-i-safely-create-a-nested-directory-in-python
        if not os.path.exists(configs['model']['save_dir']):
            os.makedirs(configs['model']['save_dir'])

        data = DataLoader(os.path.join(os.getcwd(), file_to_save),
                          configs['data']['train_test_split'],
                          configs['data']['columns'])

        try:
            model = Model()
            model.build_model(configs)
            x, y = data.get_train_data(
                seq_len=configs['data']['sequence_length'],
                normalise=configs['data']['normalise'])

            # out-of memory generative training
            steps_per_epoch = math.ceil(
                (data.len_train - configs['data']['sequence_length']) /
                configs['training']['batch_size'])
            model.train_generator(data_gen=data.generate_train_batch(
                seq_len=configs['data']['sequence_length'],
                batch_size=configs['training']['batch_size'],
                normalise=configs['data']['normalise']),
                                  epochs=configs['training']['epochs'],
                                  batch_size=configs['training']['batch_size'],
                                  steps_per_epoch=steps_per_epoch,
                                  save_dir=configs['model']['save_dir'],
                                  file_name=sTicker)

            x_test, y_test = data.get_test_data(
                seq_len=configs['data']['sequence_length'],
                normalise=configs['data']['normalise'])

            predictions = model.predict_sequences_multiple(
                x_test, configs['data']['sequence_length'],
                configs['data']['sequence_length'])
            # predictions = model.predict_sequence_full(x_test, configs['data']['sequence_length'])
            # predictions = model.predict_point_by_point(x_test)
            prediction_data = predictions[-1:]
            prediction_data = prediction_data[0]
            normalizer = data.data_test[-50, [0]]
            # print(normalizer)
            finaloutput = []
            # print(prediction_data)
            for i in range(len(prediction_data)):
                finaloutput.append((prediction_data[i] + 1) * normalizer[0])

            # print(finaloutput)
            numpyFinal = np.array(finaloutput)
            save_future(sTicker, numpyFinal)

            plot_results_triple(sFile, finaloutput, file_to_save, sTicker,
                                predictions, y_test,
                                configs['data']['sequence_length'])
            # plot_results_multiple(sTicker, predictions, y_test, configs['data']['sequence_length'])
            # plot_results(sTicker, predictions, y_test)

            totalDiff = [0] * len(predictions[0])
            for x in range(0, 49):
                y = y_test[(len(y_test) - 49) +
                           x] - predictions[len(predictions) - 1][x]
                if y < 0.0:
                    y *= -1
                totalDiff[x] = y
            stdDev = float(np.std(totalDiff, 0))

            nPred = predictions[len(predictions) - 1]
            closeVal1 = pd.read_csv('stock_market_data.csv')
            closeVal2 = closeVal1['Close']
            closeVal3 = closeVal2[len(closeVal2) - 1]

            avgPerc = 0
            for deviate in nPred:
                avgPerc += float(deviate)
            divisor = len(nPred)
            nMod1 = avgPerc / divisor

            latPerc = nPred[len(nPred) - 1] - nPred[len(nPred) - 2]
            nMod2 = nMod1 + 1
            nMod3 = latPerc + 1
            dPred1 = closeVal3 * nMod2
            dPred2 = closeVal3 * nMod3

            sFile['price_latest'][postn] = "{0:.2f}".format(closeVal3)
            sFile['percent_change_avg'][postn] = "{0:.6f}".format(nMod1)
            sFile['percent_change_latest'][postn] = "{0:.6f}".format(latPerc)
            sFile['prediction_by_avg'][postn] = "{0:.2f}".format(dPred1)
            sFile['prediction_by_latest'][postn] = "{0:.2f}".format(dPred2)
            sFile['standard_deviation'][postn] = "{0:.6f}".format(stdDev)

        except:
            sFile['price_latest'][postn] = "n/a"
            sFile['percent_change_avg'][postn] = "n/a"
            sFile['percent_change_latest'][postn] = "n/a"
            sFile['prediction_by_avg'][postn] = "n/a"
            sFile['prediction_by_latest'][postn] = "n/a"
            sFile['standard_deviation'][postn] = "n/a"

        updatedFile = pd.DataFrame({
            'stock_ticker':
            sFile['stock_ticker'],
            'percent_change_avg':
            sFile['percent_change_avg'],
            'percent_change_latest':
            sFile['percent_change_latest'],
            'prediction_by_avg':
            sFile['prediction_by_avg'],
            'prediction_by_latest':
            sFile['prediction_by_latest'],
            'price_latest':
            sFile['price_latest'],
            'standard_deviation':
            sFile['standard_deviation']
        })
        updatedFile.to_csv("stock_performance.csv", index=False, header=True)