Пример #1
0
def main():
    configs = json.load(open("config.json", "r"))
    if not os.path.exists(configs["model"]["save_dir"]):
        os.makedirs(configs["model"]["save_dir"])

    data = DataLoader(
        os.path.join("data", configs["data"]["filename"]),
        configs["data"]["train_test_split"],
        configs["data"]["columns"],
    )

    model = Model()
    model.build_model(configs)
    x, y = data.get_train_data(
        seq_len=configs["data"]["sequence_length"],
        normalise=configs["data"]["normalise"],
    )

    """
	# in-memory training
	model.train(
		x,
		y,
		epochs = configs['training']['epochs'],
		batch_size = configs['training']['batch_size'],
		save_dir = configs['model']['save_dir']
	)
	"""
    # out-of memory generative training
    steps_per_epoch = math.ceil(
        (data.len_train - configs["data"]["sequence_length"])
        / configs["training"]["batch_size"]
    )
    model.train_generator(
        data_gen=data.generate_train_batch(
            seq_len=configs["data"]["sequence_length"],
            batch_size=configs["training"]["batch_size"],
            normalise=configs["data"]["normalise"],
        ),
        epochs=configs["training"]["epochs"],
        batch_size=configs["training"]["batch_size"],
        steps_per_epoch=steps_per_epoch,
        save_dir=configs["model"]["save_dir"],
    )

    x_test, y_test = data.get_test_data(
        seq_len=configs["data"]["sequence_length"],
        normalise=configs["data"]["normalise"],
    )

    # predictions = model.predict_sequences_multiple(x_test, configs['data']['sequence_length'], configs['data']['sequence_length'])
    # predictions = model.predict_sequence_full(x_test, configs['data']['sequence_length'])
    predictions = model.predict_point_by_point(x_test)

    # plot_results_multiple(predictions, y_test, configs["data"]["sequence_length"])
    plot_results(predictions, y_test)
Пример #2
0
def main():
    configs = json.load(open('config.json', 'r'))
    if not os.path.exists(configs['model']['save_dir']):
        os.makedirs(configs['model']['save_dir'])

    #獲取數據
    data = DataLoader(os.path.join('data', configs['data']['filename']),
                      configs['data']['train_test_split'],
                      configs['data']['columns'])
    #建立模型
    model = Model()
    model.build_model(configs)
    x, y = data.get_train_data(seq_len=configs['data']['sequence_length'],
                               normalise=configs['data']['normalise'])
    '''
	# in-memory training
	model.train(
		x,
		y,
		epochs = configs['training']['epochs'],
		batch_size = configs['training']['batch_size'],
		save_dir = configs['model']['save_dir']
	)
	'''
    #out-of memory generative training
    #每一輪的'前傳導-后傳導'組合數量
    steps_per_epoch = math.ceil(
        (data.len_train - configs['data']['sequence_length']) /
        configs['training']['batch_size'])
    model.train_generator(data_gen=data.generate_train_batch(
        seq_len=configs['data']['sequence_length'],
        batch_size=configs['training']['batch_size'],
        normalise=configs['data']['normalise']),
                          epochs=configs['training']['epochs'],
                          batch_size=configs['training']['batch_size'],
                          steps_per_epoch=steps_per_epoch,
                          save_dir=configs['model']['save_dir'])

    x_test, y_test = data.get_test_data(
        seq_len=configs['data']['sequence_length'],
        normalise=configs['data']['normalise'])

    predictions = model.predict_sequences_multiple(
        x_test, configs['data']['sequence_length'],
        configs['data']['sequence_length'])
    #predictions = model.predict_sequence_full(x_test, configs['data']['sequence_length'])
    #predictions = model.predict_point_by_point(x_test)

    plot_results_multiple(predictions, y_test,
                          configs['data']['sequence_length'])
    #plot_results(predictions, y_test)


#if __name__=='__main__':
#	main()
Пример #3
0
def main():
    configs = json.load(open('config.json', 'r'))
    if not os.path.exists(configs['model']['save_dir']):
        os.makedirs(configs['model']['save_dir'])

    rates_count = configs['data']['sequence_length'] * configs['data'][
        'number_sequences']
    #os.path.join('data', configs['data']['filename']),
    data = DataLoader(configs['data']['symbol'],
                      configs['data']['train_test_split'],
                      configs['data']['columns'], rates_count)

    model = Model()
    model.build_model(configs)
    x, y = data.get_train_data(seq_len=configs['data']['sequence_length'],
                               normalise=configs['data']['normalise'])
    '''
	# in-memory training
	model.train(
		x,
		y,
		epochs = configs['training']['epochs'],
		batch_size = configs['training']['batch_size'],
		save_dir = configs['model']['save_dir']
	)
	'''
    # out-of memory generative training
    steps_per_epoch = math.ceil(
        (data.len_train - configs['data']['sequence_length']) /
        configs['training']['batch_size'])
    model.train_generator(data_gen=data.generate_train_batch(
        seq_len=configs['data']['sequence_length'],
        batch_size=configs['training']['batch_size'],
        normalise=configs['data']['normalise']),
                          epochs=configs['training']['epochs'],
                          batch_size=configs['training']['batch_size'],
                          steps_per_epoch=steps_per_epoch,
                          save_dir=configs['model']['save_dir'])

    x_test, y_test = data.get_test_data(
        seq_len=configs['data']['sequence_length'],
        normalise=configs['data']['normalise'])

    predictions = model.predict_sequences_multiple(
        x_test, configs['data']['sequence_length'],
        configs['data']['sequence_length'])
    #predictions = model.predict_sequence_full(x_test, configs['data']['sequence_length'], configs['data']['sequence_length'])
    #predictions = model.predict_point_by_point(x_test)

    plot_results_multiple(predictions, y_test,
                          configs['data']['sequence_length'])
    #plot_results(predictions, y_test)

    input("Press Enter to continue...")
Пример #4
0
def main():
    configs = json.load(open('config.json', 'r'))

    #create folder for save model params
    if not os.path.exists(configs['model']['save_dir']): os.makedirs(configs['model']['save_dir'])

    data = DataLoader(
        os.path.join('data', configs['data']['filename']),
        configs['data']['train_test_split'],
        configs['data']['columns']
    )

    #plot true data
    #plot_results(data.data_train,True)

    #train model
    model = Model()
    model.build_model(configs)
    x, y = data.get_train_data(
        seq_len=configs['data']['sequence_length'],
        normalise=configs['data']['normalise']
    )

    steps_per_epoch = math.ceil((data.len_train - configs['data']['sequence_length']) / configs['training']['batch_size'])
    model.train_generator(
        data_gen=data.generate_train_batch(
            seq_len=configs['data']['sequence_length'],
            batch_size=configs['training']['batch_size'],
            normalise=configs['data']['normalise']
        ),
        epochs=configs['training']['epochs'],
        batch_size=configs['training']['batch_size'],
        steps_per_epoch=steps_per_epoch,
        save_dir=configs['model']['save_dir']
    )

    x_test, y_test = data.get_test_data(
        seq_len=configs['data']['sequence_length'],
        normalise=configs['data']['normalise']
    )


    predictions = model.predict_point_by_point(x_test)
#    plot_results(predictions, y_test)
#    print (predictions)
#    plot_results(predictions, y_test)



    data1 = pd.DataFrame(predictions)    
    data1.to_csv('predict.csv')
    data2 = pd.DataFrame(y_test)
    data2.to_csv('true.csv')
def main(model_name=None):
    configs = json.load(open('config.json', 'r'))
    if not os.path.exists(configs['model']['save_dir']):
        os.makedirs(configs['model']['save_dir'])

    data = DataLoader(os.path.join('data', configs['data']['filename']),
                      configs['data']['train_test_split'],
                      configs['data']['columns'])

    model = Model()

    if model_name:
        saved_model = os.path.join(configs['model']['save_dir'], model_name)
        model.load_model(saved_model)
    else:
        model.build_model(configs)

    x, y = data.get_train_data(seq_len=configs['data']['sequence_length'],
                               normalise=configs['data']['normalise'])
    '''
        # in-memory training
        model.train(
            x,
            y,
            epochs = configs['training']['epochs'],
            batch_size = configs['training']['batch_size'],
            save_dir = configs['model']['save_dir']
        )
    '''

    # out-of memory generative training
    steps_per_epoch = math.ceil(
        (data.len_train - configs['data']['sequence_length']) /
        configs['training']['batch_size'])
    if not model_name:
        model.train_generator(data_gen=data.generate_train_batch(
            seq_len=configs['data']['sequence_length'],
            batch_size=configs['training']['batch_size'],
            normalise=configs['data']['normalise']),
                              epochs=configs['training']['epochs'],
                              batch_size=configs['training']['batch_size'],
                              steps_per_epoch=steps_per_epoch,
                              save_dir=configs['model']['save_dir'])

    x_test, y_test = data.get_test_data(
        seq_len=configs['data']['sequence_length'],
        normalise=configs['data']['normalise'])

    predictions = model.predict_sequences_multiple(
        x_test, configs['data']['sequence_length'],
        configs['data']['sequence_length'])
    plot_results_multiple(predictions, y_test,
                          configs['data']['sequence_length'])
def main():
    configs = json.load(open('config.json', 'r'))
    if not os.path.exists(configs['model']['save_dir']): os.makedirs(configs['model']['save_dir'])

    data = DataLoader(
        os.path.join('data', configs['data']['filename']),
        configs['data']['train_test_split'],
        configs['data']['columns']
    )

    model = Model()
    model.build_model(configs)
    x, y = data.get_train_data(
        seq_len=configs['data']['sequence_length'],
        normalise=configs['data']['normalise']
    )

    '''
	# in-memory training
	model.train(
		x,
		y,
		epochs = configs['training']['epochs'],
		batch_size = configs['training']['batch_size'],
		save_dir = configs['model']['save_dir']
	)
	'''
    # out-of memory generative training
    steps_per_epoch = math.ceil((data.len_train - configs['data']['sequence_length']) / configs['training']['batch_size'])
    model.train_generator(
        data_gen=data.generate_train_batch(
            seq_len=configs['data']['sequence_length'],
            batch_size=configs['training']['batch_size'],
            normalise=configs['data']['normalise']
        ),
        epochs=configs['training']['epochs'],
        batch_size=configs['training']['batch_size'],
        steps_per_epoch=steps_per_epoch,
        save_dir=configs['model']['save_dir']
    )

    x_test, y_test = data.get_test_data(
        seq_len=configs['data']['sequence_length'],
        normalise=configs['data']['normalise']
    )

    predictions = model.predict_sequences_multiple(x_test, configs['data']['sequence_length'], configs['data']['sequence_length'])
    # predictions = model.predict_sequence_full(x_test, configs['data']['sequence_length'])
    # predictions = model.predict_point_by_point(x_test)

    plot_results_multiple(predictions, y_test, configs['data']['sequence_length'])
Пример #7
0
def main():
    configs = json.load(open('config.json', 'r'))

    data = DataLoader(
        os.path.join('data', configs['data']['filename']),
        configs['data']['train_test_split'],
        configs['data']['columns']
    )

    model = Model()
    model.build_model(configs)
    x, y = data.get_train_data(
        seq_len=configs['data']['sequence_length'],
        normalise=configs['data']['normalise']
    )

    '''
	# in-memory training
	model.train(
		x,
		y,
		epochs = configs['training']['epochs'],
		batch_size = configs['training']['batch_size']
	)
	'''
    # out-of memory generative training
    # math.ceil(所有窗的个数 / batch_size)
    steps_per_epoch = math.ceil((data.len_train - configs['data']['sequence_length']) / configs['training']['batch_size'])
    model.train_generator(
        data_gen=data.generate_train_batch(
            seq_len=configs['data']['sequence_length'],
            batch_size=configs['training']['batch_size'],
            normalise=configs['data']['normalise']
        ),
        epochs=configs['training']['epochs'],
        batch_size=configs['training']['batch_size'],
        steps_per_epoch=steps_per_epoch
    )

    x_test, y_test = data.get_test_data(
        seq_len=configs['data']['sequence_length'],
        normalise=configs['data']['normalise']
    )

    # predictions = model.predict_sequences_multiple(x_test, configs['data']['sequence_length'], configs['data']['sequence_length'])
    predictions = model.predict_sequence_full(x_test, configs['data']['sequence_length'])
    # predictions = model.predict_point_by_point(x_test)

    # plot_results_multiple(predictions, y_test, configs['data']['sequence_length'])
    plot_results(predictions, y_test)
def main():
    configs = json.load(open('config.json', 'r'))
    if not os.path.exists(configs['model']['save_dir']):
        os.makedirs(configs['model']['save_dir'])

    data = DataLoader(os.path.join('data', configs['data']['filename']),
                      configs['data']['train_test_split'],
                      configs['data']['columns'])

    model = Model()
    model.build_model(configs)
    x, y = data.get_train_data(seq_len=configs['data']['sequence_length'],
                               normalise=configs['data']['normalise'])
    '''
	# in-memory training
	model.train(
		x,
		y,
		epochs = configs['training']['epochs'],
		batch_size = configs['training']['batch_size'],
		save_dir = configs['model']['save_dir']
	)
	'''
    # out-of memory generative training

    steps_per_epoch = math.ceil(
        (data.len_train - configs['data']['sequence_length']) /
        configs['training']['batch_size'])
    model.train_generator(data_gen=data.generate_train_batch(
        seq_len=configs['data']['sequence_length'],
        batch_size=configs['training']['batch_size'],
        normalise=configs['data']['normalise']),
                          epochs=configs['training']['epochs'],
                          batch_size=configs['training']['batch_size'],
                          steps_per_epoch=steps_per_epoch,
                          save_dir=configs['model']['save_dir'],
                          configs=configs)

    x_test, y_test, p0 = data.get_test_data(
        seq_len=configs['data']['sequence_length'],
        normalise=configs['data']['normalise'])

    # predictions = model.predict_sequences_multiple(x_test, configs['data']['sequence_length'], configs['data']['sequence_length'])
    # predictions = model.predict_sequence_full(x_test, configs['data']['sequence_length'])
    predictions = model.predict_point_by_point(x_test)
    y_test = np.reshape(np.copy(y_test), -1)

    plot_results((p0 * (predictions + 1))[-200:], (p0 * (y_test + 1))[-200:])
    measure_performance(predictions, y_test)
Пример #9
0
def main():
    configs = json.load(open('config.json', 'r'))
    if not os.path.exists(configs['model']['save_dir']): os.makedirs(configs['model']['save_dir'])

    data = DataLoader(
        os.path.join('data', configs['data']['filename']),
        configs['data']['train_test_split'],
        configs['data']['columns']
    )
    model = Model()
    model.build_model(configs)
    x, y = data.get_train_data(
        seq_len=configs['data']['sequence_length'],
        normalise=configs['data']['normalise']
    )
    '''
	# in-memory training
	model.train(
		x,
		y,
		epochs = configs['training']['epochs'],
		batch_size = configs['training']['batch_size'],
		save_dir = configs['model']['save_dir']
	)
	'''
    # out-of memory generative training
    steps_per_epoch = math.ceil((data.len_train - configs['data']['sequence_length']) / configs['training']['batch_size'])
    model.train_generator(
        data_gen=data.generate_train_batch(
            seq_len=configs['data']['sequence_length'],
            batch_size=configs['training']['batch_size'],
            normalise=configs['data']['normalise']
        ),
        epochs=configs['training']['epochs'],
        batch_size=configs['training']['batch_size'],
        steps_per_epoch=steps_per_epoch,
        save_dir=configs['model']['save_dir']
    )

    x_test, y_test, onedot = data.get_test_data(
        seq_len=configs['data']['sequence_length'],
        normalise=configs['data']['normalise']
    )
    #predictions = model.predict_sequences_multiple(x_test, configs['data']['sequence_length'], configs['data']['sequence_length'])
    #predictions = model.predict_sequence_full(x_test, configs['data']['sequence_length'])
    predictions = model.predict_point_by_point(onedot)
    with open('output.txt', 'w') as f:
        f.write('预测下一时间的螺栓螺母消耗量为:' + str(int((predictions[-1] + 1) * data.last_raw_data(seq_len=configs['data']['sequence_length']))))
Пример #10
0
def main():
    configs = json.load(open(CONFIG, 'r'))

    data = DataLoader(DATA, configs['data']['train_test_split'],
                      configs['data']['columns'])

    model = Model()
    model.build_model(configs)
    x, y = data.get_train_data(seq_len=configs['data']['sequence_length'],
                               normalise=configs['data']['normalise'])
    '''
	# in-memory training
	model.train(
		x,
		y,
		epochs = configs['training']['epochs'],
		batch_size = configs['training']['batch_size']
	)
	'''
    # out-of memory generative training
    steps_per_epoch = math.ceil(
        (data.len_train - configs['data']['sequence_length']) /
        configs['training']['batch_size'])
    model.train_generator(data_gen=data.generate_train_batch(
        seq_len=configs['data']['sequence_length'],
        batch_size=configs['training']['batch_size'],
        normalise=configs['data']['normalise']),
                          epochs=configs['training']['epochs'],
                          batch_size=configs['training']['batch_size'],
                          steps_per_epoch=steps_per_epoch,
                          model_path=MODEL)

    x_test, y_test = data.get_test_data(
        seq_len=configs['data']['sequence_length'],
        normalise=configs['data']['normalise'])

    predictions = model.predict_sequences_multiple(
        x_test, configs['data']['sequence_length'],
        configs['data']['sequence_length'])
    #predictions = model.predict_sequence_full(x_test, configs['data']['sequence_length'])
    #predictions = model.predict_point_by_point(x_test)

    plot_results_multiple(predictions, y_test,
                          configs['data']['sequence_length'])
    #plot_results(predictions, y_test)
    sys.stdout.write("--END--")
Пример #11
0
def main():
    configs = json.load(open('config.json', 'r'))
    if not os.path.exists(configs['model']['save_dir']):
        os.makedirs(configs['model']['save_dir'])

    data = DataLoader(os.path.join('data', configs['data']['filename']),
                      configs['data']['train_test_split'],
                      configs['data']['columns'])

    model = Model()
    model.build_model(configs)
    x, y = data.get_train_data(seq_len=configs['data']['sequence_length'],
                               normalise=configs['data']['normalise'])
    '''
	# in-memory training (heavier computation)
	model.train(
		x,
		y,
		epochs = configs['training']['epochs'],
		batch_size = configs['training']['batch_size'],
		save_dir = configs['model']['save_dir']
	)
	'''
    # out-of memory generative training
    steps_per_epoch = math.ceil(
        (data.len_train - configs['data']['sequence_length']) /
        configs['training']['batch_size'])
    model.train_generator(data_gen=data.generate_train_batch(
        seq_len=configs['data']['sequence_length'],
        batch_size=configs['training']['batch_size'],
        normalise=configs['data']['normalise']),
                          epochs=configs['training']['epochs'],
                          batch_size=configs['training']['batch_size'],
                          steps_per_epoch=steps_per_epoch,
                          save_dir=configs['model']['save_dir'])

    x_test, y_test = data.get_test_data(
        seq_len=configs['data']['sequence_length'],
        normalise=configs['data']['normalise'])

    #predictions are made point by point with model.predict_point_by_point
    predictions = model.predict_point_by_point(x_test)
    plot_results(predictions, y_test)
Пример #12
0
def main():
    configs = json.load(open('config.json', 'r'))
    if not os.path.exists(configs['model']['save_dir']):
        os.makedirs(configs['model']['save_dir'])

    dataframe = pd.read_csv(configs['data']['filename'])
    f = open('/Users/yucheng/Downloads/project2/stockIDs.txt', 'r')
    stockIDs = [int(line.split('\n')[0]) for line in f.readlines()]
    for id in stockIDs[377:378]:
        # for id in stockIDs[444:500]:
        print("index: ", stockIDs.index(id))
        data = DataLoader(dataframe, id, configs['data']['train_test_split'],
                          configs['data']['columns'])

        model = Model()
        model.build_model(configs)
        x, y = data.get_train_data(seq_len=configs['data']['sequence_length'],
                                   normalise=configs['data']['normalise'])
        '''
    	# in-memory training
    	model.train(
    		x,
    		y,
    		epochs = configs['training']['epochs'],
    		batch_size = configs['training']['batch_size'],
    		save_dir = configs['model']['save_dir']
    	)
    	'''
        # out-of memory generative training
        steps_per_epoch = math.ceil(
            (data.len_train - configs['data']['sequence_length']) /
            configs['training']['batch_size'])
        model.train_generator(id=id,
                              data_gen=data.generate_train_batch(
                                  seq_len=configs['data']['sequence_length'],
                                  batch_size=configs['training']['batch_size'],
                                  normalise=configs['data']['normalise']),
                              epochs=configs['training']['epochs'],
                              batch_size=configs['training']['batch_size'],
                              steps_per_epoch=steps_per_epoch,
                              save_dir=configs['model']['save_dir'])
Пример #13
0
def main():

    configs = json.load(open('config.json', 'r'))
    if not os.path.exists(configs['model']['save_dir']):
        os.makedirs(configs['model']['save_dir'])

    data = DataLoader(os.path.join('data', configs['data']['filename']),
                      configs['data']['train_test_split'],
                      configs['data']['columns'])

    model = Model()

    # model.build_model(configs)
    model.load_model("saved_models/dow_30_50%.h5")

    x, y = data.get_train_data(seq_len=configs['data']['sequence_length'],
                               normalise=configs['data']['normalise'])

    # out-of memory generative training
    steps_per_epoch = math.ceil(
        (data.len_train - configs['data']['sequence_length']) /
        configs['training']['batch_size'])

    model.train_generator(data_gen=data.generate_train_batch(
        seq_len=configs['data']['sequence_length'],
        batch_size=configs['training']['batch_size'],
        normalise=configs['data']['normalise']),
                          epochs=configs['training']['epochs'],
                          batch_size=configs['training']['batch_size'],
                          steps_per_epoch=steps_per_epoch,
                          save_dir=configs['model']['save_dir'])

    x_test, y_test = data.get_test_data(
        seq_len=configs['data']['sequence_length'],
        normalise=configs['data']['normalise'])

    print("x_test.shape")
    print(x_test.shape)

    predictions = model.predict_point_by_point(x_test)

    ########################################################################
    from sklearn.metrics import mean_squared_error
    # loss_final = mean_squared_error(predictions, y_test)
    # print("Testing Loss = " + str(loss_final))
    ########################################################################

    # plot_results_multiple(predictions, y_test, configs['data']['sequence_length'])

    print(predictions.shape)
    print(y_test.shape)

    m = pd.DataFrame(predictions)
    n = pd.DataFrame(y_test)

    m.to_csv("predictions.csv")
    n.to_csv("y_test.csv")

    p = 0
    t = 0

    t_1 = 0

    count = 0

    for a in range(len(predictions)):

        if (a == 0):
            t_1 = y_test[a]
            continue
        '''
            1 1 1 1 1 1 1 1 1
            1 1 1 1 1 1 1 1 1
        
        '''

        p = predictions[a]
        t = y_test[a]

        match = (t - t_1) * (p - t_1)

        if (match > 0):
            count += 1

        t_1 = t

    print("Good prediction rate = " + str(count / len(predictions)))

    plot_results(predictions, y_test)
Пример #14
0
def main():
    configs = json.load(open('config.json', 'r'))
    if not os.path.exists(configs['model']['save_dir']):
        os.makedirs(configs['model']['save_dir'])
    if not os.path.exists(configs['data']['data picture save dir']):
        os.makedirs(configs['data']['data picture save dir'])

    data = DataLoader(os.path.join('data', configs['data']['filename']),
                      configs['data']['train_test_split'],
                      configs['data']['columns'], configs['data']['id'])

    model = Model()
    model.build_model(configs)

    # x, y = data.get_train_data(
    #     seq_len=configs['data']['sequence_length'],
    #     normalise=configs['data']['normalise']
    # )
    '''
	# in-memory training
	model.train(
		x,
		y,
		epochs = configs['training']['epochs'],
		batch_size = configs['training']['batch_size'],
		save_dir = configs['model']['save_dir']
	)
	'''
    # out-of memory generative training
    steps_per_epoch = math.ceil(
        (data.len_train - configs['data']['sequence_length']) /
        configs['training']['batch_size'])
    model.train_generator(data_gen=data.generate_train_batch(
        seq_len=configs['data']['sequence_length'],
        batch_size=configs['training']['batch_size'],
        normalise=configs['data']['normalise']),
                          epochs=configs['training']['epochs'],
                          batch_size=configs['training']['batch_size'],
                          steps_per_epoch=steps_per_epoch,
                          save_dir=configs['model']['save_dir'])

    x_test, y_test = data.get_test_data(
        seq_len=configs['data']['sequence_length'],
        normalise=configs['data']['normalise'])

    #predictions = model.predict_sequences_multiple(x_test, configs['data']['sequence_length'], configs['data']['sequence_length'])
    # predictions = model.predict_sequence_full(x_test, configs['data']['sequence_length'])
    predictions = model.predict_point_by_point(x_test)

    sess = backend.get_session()

    rmsee = backend.mean(rmse(y_test, predictions), axis=0)
    msee = backend.mean(mse(y_test, predictions), axis=0)

    with sess.as_default():
        mse_val = msee.eval()
        rmse_val = rmsee.eval()
        print("mse:", mse_val)
        print("rmse:", rmse_val)

    #plot_results_multiple(predictions, y_test, configs['data']['sequence_length'])
    plot_results(predictions, y_test, configs['data']['data picture save dir'],
                 configs['data']['id'])

    with open("note.txt", 'a+') as f:
        f.write(
            '\n%s-e%s.h5:\n' %
            (dt.datetime.now().strftime('%m%d-%H%M%S'), configs['data']['id']))
        f.write("data split:%f\n" % configs["data"]["train_test_split"])
        f.write("epochs:%d\n" % configs["training"]["epochs"])
        f.write("batch size:%d\n" % configs["training"]["batch_size"])
        f.write("mse:%f\n" % mse_val)
        f.write("rmse:%f\n" % rmse_val)
        f.write("notes:%s\n" % configs['data']['note'])
def main():
    # instantiation
    data = DataLoader(os.path.join('data', configs['data']['filename']),
                      configs['data']['train_test_split'],
                      configs['data']['columns'])

    model = Model()
    model.build_model(configs)
    x, y = data.get_train_data(seq_len=configs['data']['sequence_length'],
                               normalise=configs['data']['normalise'])
    '''
	# in-memory training
	model.train(
		x,
		y,
		epochs = configs['training']['epochs'],
		batch_size = configs['training']['batch_size']
	)
	'''
    # out-of memory generative training
    # 不懂为什么要自己算这个,LSTM不是自己自带batch参数吗,为什么要自己算出来一共要输多少次batch???

    # 会出现:
    # in data_generator_task, generator_output = next(self._generator), StopIteration
    # in fit_generator, str(generator_output))
    # output of generator should be a tuple `(x, y, sample_weight)` or `(x, y)`. Found: None
    # 所以出错的时候,手动减少steps_per_epoch
    steps_per_epoch = math.ceil(
        (data.len_train - configs['data']['sequence_length']) /
        configs['training']['batch_size']) - 7
    model.train_generator(data_gen=data.generate_train_batch(
        seq_len=configs['data']['sequence_length'],
        batch_size=configs['training']['batch_size'],
        normalise=configs['data']['normalise']),
                          epochs=configs['training']['epochs'],
                          batch_size=configs['training']['batch_size'],
                          steps_per_epoch=steps_per_epoch)

    x_test, y_test = data.get_test_data(
        seq_len=configs['data']['sequence_length'],
        normalise=configs['data']['normalise'])

    predictions = model.predict_sequences_multiple(
        x_test, configs['data']['sequence_length'],
        configs['data']['sequence_length'])
    plot_results_multiple(predictions, y_test,
                          configs['data']['sequence_length'])

    # predictions = model.predict_sequence_full(x_test, configs['data']['sequence_length'])
    # predictions = model.predict_point_by_point(x_test)

    y_true_train, y_true_test = data.get_split_data()
    unnorm_data = predict_unnorm_data(
        y_true_test,
        prediction_len=configs['data']['sequence_length'],
        predictions=predictions)
    # 计算RMSE并输出dataframe
    begin_date = datetime(year=2018, month=9, day=18)
    end_date = begin_date + timedelta(
        days=(configs['data']['sequence_length'] - 1))
    # y_true_test:(301,2)
    y_true_test = pd.DataFrame(y_true_test)
    file = pd.read_csv(os.path.join('data', configs['data']['filename']))
    file = file['time'][len(y_true_train):]
    file = pd.Series(file)
    # 出现了无法新建列并赋值的error
    # 因为dataframe和Series都有自己的index,.values才能取到真正的值并赋给下一个变量
    y_true_test['time'] = file.values
    y_true_test = y_true_test.set_index('time')
    y_true_test.index = pd.to_datetime(y_true_test.index)
    calc_RMSE(predicted_data=unnorm_data,
              y_test_true=y_true_test,
              begin_date=begin_date,
              end_date=end_date)
Пример #16
0
    save_filename = os.path.join(
        saved_dir, '%s-F%s.weights.{epoch:02d}-{val_loss:.6f}.hdf5' %
        (dt.datetime.now().strftime('%Y%m%d-%H%M%S'), str(j)))
    callbacks = [
        EarlyStopping(monitor='val_loss', patience=2),
        ModelCheckpoint(filepath=save_filename,
                        monitor='val_loss',
                        save_best_only=False)
    ]

    steps_per_epoch = (data.len_train - sequence_length) // batch_size
    steps_per_epoch_val = (data.len_val - sequence_length) // batch_size
    model.fit_generator(
        generator=data.generate_train_batch(seq_len=sequence_length,
                                            batch_size=batch_size,
                                            normalise=True),
        steps_per_epoch=steps_per_epoch,
        epochs=epochs,
        callbacks=callbacks,
        workers=0,
        verbose=2,
        validation_data=data.generate_val_batch(seq_len=sequence_length,
                                                batch_size=batch_size,
                                                normalise=True),
        validation_steps=steps_per_epoch_val)


def invert_prediction(df_, y_pred_, index_at_p0, spredicted_col_):
    p0 = df_.loc[index_at_p0, spredicted_col_]
    y_pred_inverted_ = p0 * (y_pred_ + 1)
def main():
    configs = json.load(open('config.json', 'r'))

    #==================== selection =====================#
    if configs['mode']['selection'] == True:
        if not os.path.exists(configs['model']['save_dir']):
            os.makedirs(configs['model']['save_dir'])

        #IDs = configs['data']['IDs']
        with open(
                'D:\ColumbiaCourses\Advanced Big Data Analytics 6895\milestone3\LSTM-Neural-Network-for-Time-Series-Prediction\data\ID.csv',
                newline='') as f:
            reader = csv.reader(f)
            IDs = list(reader)
        IDs = [x[0] for x in IDs]

        model = Model()
        if configs['mode']['train_new_model'] == True:
            model.build_model(configs)
            print('[Model] Training Started')
            cnt = 0
            #===== train ====#
            for ID in IDs:
                cnt += 1
                filename = str(ID) + '.csv'
                data = DataLoader(filename=os.path.join('data', filename),
                                  split=configs['data']['train_test_split'],
                                  cols=configs['data']['columns'],
                                  test_only=False)
                x, y = data.get_train_data(
                    seq_len=configs['data']['sequence_length'],
                    normalise=configs['data']['normalise'])

                if cnt % 1 == 0:
                    tocheckpoint = True
                else:
                    tocheckpoint = False
                steps_per_epoch = math.ceil(
                    (data.len_train - configs['data']['sequence_length']) /
                    configs['training']['batch_size'])
                model.train_generator_all(
                    data_gen=data.generate_train_batch(
                        seq_len=configs['data']['sequence_length'],
                        batch_size=configs['training']['batch_size'],
                        normalise=configs['data']['normalise']),
                    epochs=configs['training']['epochs'],
                    batch_size=configs['training']['batch_size'],
                    steps_per_epoch=steps_per_epoch,
                    save_dir=configs['model']['save_dir'],
                    tocheckpoint=tocheckpoint,
                    ID=ID)
            print('[Model] Training All Finished')
        else:
            model.load_model(configs['mode']['train_file_path'])

        #===== predict =====#
        print('[Prediction]Start to predict and rank')
        ranklist = []
        for ID in IDs:
            print('predicting %s'.format(ID))
            filename = str(ID) + '.csv'
            data = DataLoader(filename=os.path.join('data', filename),
                              split=configs['data']['train_test_split'],
                              cols=configs['data']['columns'],
                              test_only=False)
            x_test, y_test = data.get_test_data(
                seq_len=configs['data']['sequence_length'],
                normalise=configs['data']['normalise'])
            predictions = model.predict_point_by_point(x_test)
            test_score = score(y_true=y_test, y_pred=predictions)
            ranklist.append((ID, *test_score))
        ranklist.sort(key=lambda x: x[1])
        with open("ranklist.csv", "w", newline="") as f:
            writer = csv.writer(f)
            writer.writerows(ranklist)
        return
    #====================================================#

    #==================== single task ===================#
    if not os.path.exists(configs['model']['save_dir']):
        os.makedirs(configs['model']['save_dir'])
    data = DataLoader(
        os.path.join('data', configs['data']['filename']),
        configs['data']['train_test_split'],
        configs['data']['columns'],
        configs['mode']['test_only']  #############################
    )

    model = Model()
    if configs['mode']['test_only'] == True:
        model.load_model(configs['mode']['test_file_path'])
    else:
        if configs['mode']['train_new_model'] == True:
            model.build_model(configs)
        else:
            model.load_model(configs['mode']['train_file_path'])

        x, y = data.get_train_data(seq_len=configs['data']['sequence_length'],
                                   normalise=configs['data']['normalise'])
        '''
        # in-memory training
        model.train(
            x,
            y,
            epochs = configs['training']['epochs'],
            batch_size = configs['training']['batch_size'],
            save_dir = configs['model']['save_dir']
        )
        '''
        # out-of memory generative training
        steps_per_epoch = math.ceil(
            (data.len_train - configs['data']['sequence_length']) /
            configs['training']['batch_size'])
        model.train_generator(data_gen=data.generate_train_batch(
            seq_len=configs['data']['sequence_length'],
            batch_size=configs['training']['batch_size'],
            normalise=configs['data']['normalise']),
                              epochs=configs['training']['epochs'],
                              batch_size=configs['training']['batch_size'],
                              steps_per_epoch=steps_per_epoch,
                              save_dir=configs['model']['save_dir'],
                              mode=configs['mode'])

    x_test, y_test = data.get_test_data(
        seq_len=configs['data']['sequence_length'],
        normalise=configs['data']['normalise'])

    #predictions = model.predict_sequences_multiple(x_test, configs['data']['sequence_length'], configs['data']['prediction_length'])
    #predictions = model.predict_sequence_full(x_test, configs['data']['sequence_length'])
    predictions = model.predict_point_by_point(x_test)
    test_score = score(y_true=y_test, y_pred=predictions)

    # plot_results_multiple(predictions, y_test, configs['data']['prediction_length'])
    plot_results(predictions, y_test)
def main():
    config_path = 'config/config.json'
    with open(config_path, 'r') as f:
        configs = json.load(f)
        logging.info("Loaded {}".format(config_path))

    logging.info("\n{}\n".format(configs))

    data_path = configs['data']['filename']
    data_dir = os.path.dirname(data_path)
    dtypes = configs['data'].get('dtypes', None)
    windowed_normalization = configs['data']['normalise']

    data = DataLoader(data_path,
                      configs['data']['train_test_split'],
                      configs['data']['columns'],
                      scaler_path=os.path.join(data_dir, "scaler"),
                      windowed_normalization=windowed_normalization,
                      dtypes=dtypes)

    model = Model()

    if configs['model'].get('load_model'):
        model_path = os.path.join(configs['model']['load_model'])
        logging.info("Loading {}".format(model_path))
        model.load_model(model_path, configs)
        plot_dir = os.path.join(os.path.dirname(model_path), "plots")
        os.makedirs(plot_dir, exist_ok=True)
    else:
        plot_dir = os.path.join(configs['model']['save_dir'], "plots")
        os.makedirs(plot_dir, exist_ok=True)
        model.build_model(configs)
        x, y = data.get_train_data(
            seq_len=configs['data']['sequence_length'],
            windowed_normalization=windowed_normalization)
        '''
        # in-memory training
        model.train(
            x,
            y,
            epochs = configs['training']['epochs'],
            batch_size = configs['training']['batch_size'],
            save_dir = configs['model']['save_dir']
        )
        '''
        # out-of-memory generative training
        steps_per_epoch = math.ceil(
            (data.len_train - configs['data']['sequence_length']) /
            configs['training']['batch_size'])
        model.train_generator(data_gen=data.generate_train_batch(
            seq_len=configs['data']['sequence_length'],
            batch_size=configs['training']['batch_size'],
            windowed_normalization=windowed_normalization),
                              epochs=configs['training']['epochs'],
                              batch_size=configs['training']['batch_size'],
                              steps_per_epoch=steps_per_epoch,
                              save_dir=configs['model']['save_dir'])

    x_test, y_test = data.get_test_data(
        seq_len=configs['data']['sequence_length'],
        windowed_normalization=windowed_normalization)

    predictions_multiple = model.predict_sequences_multiple(
        x_test, configs['data']['sequence_length'],
        configs['data']['sequence_length'])
    plot_results_multiple(predictions_multiple,
                          y_test,
                          configs['data']['sequence_length'],
                          out_path=os.path.join(plot_dir, "multiple.png"))
Пример #19
0
        plt.legend()
    plt.show()


# %% Build/Train the model
model = Model()

model.build_model(configs)

# out-of memory generative training
steps_per_epoch = math.ceil(
    (data.len_train - configs['data']['sequence_length']) /
    configs['training']['batch_size'])

model.train_generator(data_gen=data.generate_train_batch(
    seq_len=configs['data']['sequence_length'],
    batch_size=configs['training']['batch_size'],
    normalise=configs['data']['normalise']),
                      epochs=configs['training']['epochs'],
                      batch_size=configs['training']['batch_size'],
                      steps_per_epoch=steps_per_epoch,
                      save_dir=configs['model']['save_dir'])

x_test, y_test = data.get_test_data(seq_len=configs['data']['sequence_length'],
                                    normalise=configs['data']['normalise'])

predictions = model.predict_sequences_multiple(
    x_test, configs['data']['sequence_length'],
    configs['data']['sequence_length'])

plot_results_multiple(predictions, y_test, configs['data']['sequence_length'])
Пример #20
0
def main():
    configs = json.load(open('config.json', 'r'))
    if not os.path.exists(configs['model']['save_dir']):
        os.makedirs(configs['model']['save_dir'])

    data = DataLoader(os.path.join('data', configs['data']['filename']),
                      configs['data']['train_test_split'],
                      configs['data']['columns'])

    # Get data and normalise it first
    # data_total = data.get_total_data(
    #     seq_len=configs['data']['sequence_length'],
    #     normalise=False)
    # last_ob = data_total[0][0]
    # print("last_ob: ", str(last_ob))
    # print("data_tatal shape: ", str(data_total.shape))
    # scaler, normalised_data = data.transform_data(data_total)
    # print("norm data shape: ", str(normalised_data.shape))
    # if plotData:
    #     plot_data(normalised_data)

    # data.update_data(normalised_data)

    # Get training and test data
    x, y = data.get_train_data(seq_len=configs['data']['sequence_length'],
                               normalise=False)
    print("x shape: ", str(x.shape))
    print("y shape: ", str(y.shape))
    if plotData:
        plot_data(y.flatten())

    x_test, y_test = data.get_test_data(
        seq_len=configs['data']['sequence_length'], normalise=False)
    print("x_test shape: ", str(x_test.shape))
    print("y_test shape: ", str(y_test.shape))
    if plotData:
        plot_data(y_test.flatten())

    # Build the model(s)
    model = MyModel()
    if useFuncModel:
        model.build_functional_model(configs)
    if useSeqModel:
        model.build_sequential_model(configs)

    # x, y = data.get_train_data(
    #     seq_len=configs['data']['sequence_length'],
    #     normalise=configs['data']['normalise']
    # )

    if plotData:
        print("y.shape: ", str(y.shape))
        print("y_test.shape: ", str(y_test.shape))
        print("data_total.shape: ", str(data_total.shape))
        plot_data(data_total)
        # plot_train_test_total(y, y_test, data_total + 1, configs['data']['sequence_length'])

    # in-memory training
    if useFuncModel:
        model.train(x,
                    y,
                    epochs=configs['training']['epochs'],
                    batch_size=configs['training']['batch_size'],
                    save_dir=configs['model']['save_dir'],
                    modelType=ModelType.FUNCTIONAL)
    if useSeqModel:
        model.train(x,
                    y,
                    epochs=configs['training']['epochs'],
                    batch_size=configs['training']['batch_size'],
                    save_dir=configs['model']['save_dir'],
                    modelType=ModelType.SEQUENTIAL)
    '''
    # Train the models: out-of memory generative training
    steps_per_epoch = math.ceil((data.len_train - configs['data']['sequence_length']) / configs['training']['batch_size'])
    model.train_generator(
        data_gen=data.generate_train_batch(
            seq_len=configs['data']['sequence_length'],
            batch_size=configs['training']['batch_size'],
            normalise=configs['data']['normalise']
        ),
        epochs=configs['training']['epochs'],
        batch_size=configs['training']['batch_size'],
        steps_per_epoch=steps_per_epoch,
        save_dir=configs['model']['save_dir'],
        modelType=ModelType.FUNCTIONAL
    )
    if useSeqModel:
        model.train_generator(
            data_gen=data.generate_train_batch(
                seq_len=configs['data']['sequence_length'],
                batch_size=configs['training']['batch_size'],
                normalise=configs['data']['normalise']
            ),
            epochs=configs['training']['epochs'],
            batch_size=configs['training']['batch_size'],
            steps_per_epoch=steps_per_epoch,
            save_dir=configs['model']['save_dir'],
            modelType=ModelType.SEQUENTIAL
        )    
    '''

    # Visualize convolutional layer operations on raw training data
    if visualizeConvolution and useFuncModel:
        print("*****x shape: ", str(x.shape))
        conv_predictions = model.conv_layer_analysis(
            x, configs['data']['sequence_length'],
            configs['data']['sequence_length'])

    # Compare performance
    print("comparing models")
    func_train_perf = 1
    func_test_perf = 1
    if useFuncModel and evaluatePerformance:
        func_train_perf = model.eval_generator(
            data_gen=data.generate_train_batch(
                seq_len=configs['data']['sequence_length'],
                batch_size=configs['training']['batch_size'],
                normalise=configs['data']['normalise']),
            batch_size=configs['training']['batch_size'],
            save_dir=configs['model']['save_dir'],
            modelType=ModelType.FUNCTIONAL)
        func_test_perf = model.eval(
            x=x_test,
            y=y_test,
            batch_size=configs['training']['batch_size'],
            modelType=ModelType.FUNCTIONAL)
    seq_train_perf = 1
    seq_test_perf = 1
    # print("FUNCTIONAL MODEL TRAIN PERF: ", str(func_train_perf))
    # print("FUNCTIONAL MODEL TEST PERF: ", str(func_test_perf))
    if useSeqModel and evaluatePerformance:
        print("Evaluate Sequential Model Performance")
        seq_train_perf = model.eval_generator(
            data_gen=data.generate_train_batch(
                seq_len=configs['data']['sequence_length'],
                batch_size=configs['training']['batch_size'],
                normalise=configs['data']['normalise']),
            batch_size=configs['training']['batch_size'],
            save_dir=configs['model']['save_dir'],
            modelType=ModelType.SEQUENTIAL)
        seq_test_perf = model.eval(
            x=x_test,
            y=y_test,
            batch_size=configs['training']['batch_size'],
            modelType=ModelType.SEQUENTIAL)

    print("FUNCTIONAL MODEL TRAIN PERF: ", str(func_train_perf))
    print("SEQUENTIAL MODEL TRAIN PERF: ", str(seq_train_perf))
    print("FUNCTIONAL MODEL TEST PERF: ", str(func_test_perf))
    print("SEQUENTIAL MODEL TEST PERF: ", str(seq_test_perf))

    # Plot predictions on each of the models
    if plotPredictions:
        if useFuncModel:
            # Run predictions on Functional model (with conv layers)
            func_predictions = model.predict_sequences_multiple(
                x, configs['data']['sequence_length'],
                configs['data']['sequence_length'], ModelType.FUNCTIONAL)
            print("y.shape: ", str(y.shape))
            plot_results_multiple(func_predictions, y,
                                  configs['data']['sequence_length'], True,
                                  True, y0)
            # Run predictions on Functional model (with conv layers)
            func_predictions_test = model.predict_sequences_multiple(
                x_test, configs['data']['sequence_length'],
                configs['data']['sequence_length'], ModelType.FUNCTIONAL)
            plot_results_multiple_over_total(
                func_predictions, data_total,
                configs['data']['sequence_length'], True, 0)
            plot_results_multiple(func_predictions_test, y_test,
                                  configs['data']['sequence_length'], True,
                                  False, 0)

        # Run predictions on Sequential model
        if useSeqModel:
            seq_predictions = model.predict_sequences_multiple(
                x,
                configs['data']['sequence_length'],
                configs['data']['sequence_length'],
                ModelType.SEQUENTIAL,
            )
            plot_results_multiple_over_total(
                seq_predictions, data_total,
                configs['data']['sequence_length'], True, 0)