def main(): configs = json.load(open("config.json", "r")) if not os.path.exists(configs["model"]["save_dir"]): os.makedirs(configs["model"]["save_dir"]) data = DataLoader( os.path.join("data", configs["data"]["filename"]), configs["data"]["train_test_split"], configs["data"]["columns"], ) model = Model() model.build_model(configs) x, y = data.get_train_data( seq_len=configs["data"]["sequence_length"], normalise=configs["data"]["normalise"], ) """ # in-memory training model.train( x, y, epochs = configs['training']['epochs'], batch_size = configs['training']['batch_size'], save_dir = configs['model']['save_dir'] ) """ # out-of memory generative training steps_per_epoch = math.ceil( (data.len_train - configs["data"]["sequence_length"]) / configs["training"]["batch_size"] ) model.train_generator( data_gen=data.generate_train_batch( seq_len=configs["data"]["sequence_length"], batch_size=configs["training"]["batch_size"], normalise=configs["data"]["normalise"], ), epochs=configs["training"]["epochs"], batch_size=configs["training"]["batch_size"], steps_per_epoch=steps_per_epoch, save_dir=configs["model"]["save_dir"], ) x_test, y_test = data.get_test_data( seq_len=configs["data"]["sequence_length"], normalise=configs["data"]["normalise"], ) # predictions = model.predict_sequences_multiple(x_test, configs['data']['sequence_length'], configs['data']['sequence_length']) # predictions = model.predict_sequence_full(x_test, configs['data']['sequence_length']) predictions = model.predict_point_by_point(x_test) # plot_results_multiple(predictions, y_test, configs["data"]["sequence_length"]) plot_results(predictions, y_test)
def main(): configs = json.load(open('config.json', 'r')) if not os.path.exists(configs['model']['save_dir']): os.makedirs(configs['model']['save_dir']) #獲取數據 data = DataLoader(os.path.join('data', configs['data']['filename']), configs['data']['train_test_split'], configs['data']['columns']) #建立模型 model = Model() model.build_model(configs) x, y = data.get_train_data(seq_len=configs['data']['sequence_length'], normalise=configs['data']['normalise']) ''' # in-memory training model.train( x, y, epochs = configs['training']['epochs'], batch_size = configs['training']['batch_size'], save_dir = configs['model']['save_dir'] ) ''' #out-of memory generative training #每一輪的'前傳導-后傳導'組合數量 steps_per_epoch = math.ceil( (data.len_train - configs['data']['sequence_length']) / configs['training']['batch_size']) model.train_generator(data_gen=data.generate_train_batch( seq_len=configs['data']['sequence_length'], batch_size=configs['training']['batch_size'], normalise=configs['data']['normalise']), epochs=configs['training']['epochs'], batch_size=configs['training']['batch_size'], steps_per_epoch=steps_per_epoch, save_dir=configs['model']['save_dir']) x_test, y_test = data.get_test_data( seq_len=configs['data']['sequence_length'], normalise=configs['data']['normalise']) predictions = model.predict_sequences_multiple( x_test, configs['data']['sequence_length'], configs['data']['sequence_length']) #predictions = model.predict_sequence_full(x_test, configs['data']['sequence_length']) #predictions = model.predict_point_by_point(x_test) plot_results_multiple(predictions, y_test, configs['data']['sequence_length']) #plot_results(predictions, y_test) #if __name__=='__main__': # main()
def main(): configs = json.load(open('config.json', 'r')) if not os.path.exists(configs['model']['save_dir']): os.makedirs(configs['model']['save_dir']) rates_count = configs['data']['sequence_length'] * configs['data'][ 'number_sequences'] #os.path.join('data', configs['data']['filename']), data = DataLoader(configs['data']['symbol'], configs['data']['train_test_split'], configs['data']['columns'], rates_count) model = Model() model.build_model(configs) x, y = data.get_train_data(seq_len=configs['data']['sequence_length'], normalise=configs['data']['normalise']) ''' # in-memory training model.train( x, y, epochs = configs['training']['epochs'], batch_size = configs['training']['batch_size'], save_dir = configs['model']['save_dir'] ) ''' # out-of memory generative training steps_per_epoch = math.ceil( (data.len_train - configs['data']['sequence_length']) / configs['training']['batch_size']) model.train_generator(data_gen=data.generate_train_batch( seq_len=configs['data']['sequence_length'], batch_size=configs['training']['batch_size'], normalise=configs['data']['normalise']), epochs=configs['training']['epochs'], batch_size=configs['training']['batch_size'], steps_per_epoch=steps_per_epoch, save_dir=configs['model']['save_dir']) x_test, y_test = data.get_test_data( seq_len=configs['data']['sequence_length'], normalise=configs['data']['normalise']) predictions = model.predict_sequences_multiple( x_test, configs['data']['sequence_length'], configs['data']['sequence_length']) #predictions = model.predict_sequence_full(x_test, configs['data']['sequence_length'], configs['data']['sequence_length']) #predictions = model.predict_point_by_point(x_test) plot_results_multiple(predictions, y_test, configs['data']['sequence_length']) #plot_results(predictions, y_test) input("Press Enter to continue...")
def main(): configs = json.load(open('config.json', 'r')) #create folder for save model params if not os.path.exists(configs['model']['save_dir']): os.makedirs(configs['model']['save_dir']) data = DataLoader( os.path.join('data', configs['data']['filename']), configs['data']['train_test_split'], configs['data']['columns'] ) #plot true data #plot_results(data.data_train,True) #train model model = Model() model.build_model(configs) x, y = data.get_train_data( seq_len=configs['data']['sequence_length'], normalise=configs['data']['normalise'] ) steps_per_epoch = math.ceil((data.len_train - configs['data']['sequence_length']) / configs['training']['batch_size']) model.train_generator( data_gen=data.generate_train_batch( seq_len=configs['data']['sequence_length'], batch_size=configs['training']['batch_size'], normalise=configs['data']['normalise'] ), epochs=configs['training']['epochs'], batch_size=configs['training']['batch_size'], steps_per_epoch=steps_per_epoch, save_dir=configs['model']['save_dir'] ) x_test, y_test = data.get_test_data( seq_len=configs['data']['sequence_length'], normalise=configs['data']['normalise'] ) predictions = model.predict_point_by_point(x_test) # plot_results(predictions, y_test) # print (predictions) # plot_results(predictions, y_test) data1 = pd.DataFrame(predictions) data1.to_csv('predict.csv') data2 = pd.DataFrame(y_test) data2.to_csv('true.csv')
def main(model_name=None): configs = json.load(open('config.json', 'r')) if not os.path.exists(configs['model']['save_dir']): os.makedirs(configs['model']['save_dir']) data = DataLoader(os.path.join('data', configs['data']['filename']), configs['data']['train_test_split'], configs['data']['columns']) model = Model() if model_name: saved_model = os.path.join(configs['model']['save_dir'], model_name) model.load_model(saved_model) else: model.build_model(configs) x, y = data.get_train_data(seq_len=configs['data']['sequence_length'], normalise=configs['data']['normalise']) ''' # in-memory training model.train( x, y, epochs = configs['training']['epochs'], batch_size = configs['training']['batch_size'], save_dir = configs['model']['save_dir'] ) ''' # out-of memory generative training steps_per_epoch = math.ceil( (data.len_train - configs['data']['sequence_length']) / configs['training']['batch_size']) if not model_name: model.train_generator(data_gen=data.generate_train_batch( seq_len=configs['data']['sequence_length'], batch_size=configs['training']['batch_size'], normalise=configs['data']['normalise']), epochs=configs['training']['epochs'], batch_size=configs['training']['batch_size'], steps_per_epoch=steps_per_epoch, save_dir=configs['model']['save_dir']) x_test, y_test = data.get_test_data( seq_len=configs['data']['sequence_length'], normalise=configs['data']['normalise']) predictions = model.predict_sequences_multiple( x_test, configs['data']['sequence_length'], configs['data']['sequence_length']) plot_results_multiple(predictions, y_test, configs['data']['sequence_length'])
def main(): configs = json.load(open('config.json', 'r')) if not os.path.exists(configs['model']['save_dir']): os.makedirs(configs['model']['save_dir']) data = DataLoader( os.path.join('data', configs['data']['filename']), configs['data']['train_test_split'], configs['data']['columns'] ) model = Model() model.build_model(configs) x, y = data.get_train_data( seq_len=configs['data']['sequence_length'], normalise=configs['data']['normalise'] ) ''' # in-memory training model.train( x, y, epochs = configs['training']['epochs'], batch_size = configs['training']['batch_size'], save_dir = configs['model']['save_dir'] ) ''' # out-of memory generative training steps_per_epoch = math.ceil((data.len_train - configs['data']['sequence_length']) / configs['training']['batch_size']) model.train_generator( data_gen=data.generate_train_batch( seq_len=configs['data']['sequence_length'], batch_size=configs['training']['batch_size'], normalise=configs['data']['normalise'] ), epochs=configs['training']['epochs'], batch_size=configs['training']['batch_size'], steps_per_epoch=steps_per_epoch, save_dir=configs['model']['save_dir'] ) x_test, y_test = data.get_test_data( seq_len=configs['data']['sequence_length'], normalise=configs['data']['normalise'] ) predictions = model.predict_sequences_multiple(x_test, configs['data']['sequence_length'], configs['data']['sequence_length']) # predictions = model.predict_sequence_full(x_test, configs['data']['sequence_length']) # predictions = model.predict_point_by_point(x_test) plot_results_multiple(predictions, y_test, configs['data']['sequence_length'])
def main(): configs = json.load(open('config.json', 'r')) data = DataLoader( os.path.join('data', configs['data']['filename']), configs['data']['train_test_split'], configs['data']['columns'] ) model = Model() model.build_model(configs) x, y = data.get_train_data( seq_len=configs['data']['sequence_length'], normalise=configs['data']['normalise'] ) ''' # in-memory training model.train( x, y, epochs = configs['training']['epochs'], batch_size = configs['training']['batch_size'] ) ''' # out-of memory generative training # math.ceil(所有窗的个数 / batch_size) steps_per_epoch = math.ceil((data.len_train - configs['data']['sequence_length']) / configs['training']['batch_size']) model.train_generator( data_gen=data.generate_train_batch( seq_len=configs['data']['sequence_length'], batch_size=configs['training']['batch_size'], normalise=configs['data']['normalise'] ), epochs=configs['training']['epochs'], batch_size=configs['training']['batch_size'], steps_per_epoch=steps_per_epoch ) x_test, y_test = data.get_test_data( seq_len=configs['data']['sequence_length'], normalise=configs['data']['normalise'] ) # predictions = model.predict_sequences_multiple(x_test, configs['data']['sequence_length'], configs['data']['sequence_length']) predictions = model.predict_sequence_full(x_test, configs['data']['sequence_length']) # predictions = model.predict_point_by_point(x_test) # plot_results_multiple(predictions, y_test, configs['data']['sequence_length']) plot_results(predictions, y_test)
def main(): configs = json.load(open('config.json', 'r')) if not os.path.exists(configs['model']['save_dir']): os.makedirs(configs['model']['save_dir']) data = DataLoader(os.path.join('data', configs['data']['filename']), configs['data']['train_test_split'], configs['data']['columns']) model = Model() model.build_model(configs) x, y = data.get_train_data(seq_len=configs['data']['sequence_length'], normalise=configs['data']['normalise']) ''' # in-memory training model.train( x, y, epochs = configs['training']['epochs'], batch_size = configs['training']['batch_size'], save_dir = configs['model']['save_dir'] ) ''' # out-of memory generative training steps_per_epoch = math.ceil( (data.len_train - configs['data']['sequence_length']) / configs['training']['batch_size']) model.train_generator(data_gen=data.generate_train_batch( seq_len=configs['data']['sequence_length'], batch_size=configs['training']['batch_size'], normalise=configs['data']['normalise']), epochs=configs['training']['epochs'], batch_size=configs['training']['batch_size'], steps_per_epoch=steps_per_epoch, save_dir=configs['model']['save_dir'], configs=configs) x_test, y_test, p0 = data.get_test_data( seq_len=configs['data']['sequence_length'], normalise=configs['data']['normalise']) # predictions = model.predict_sequences_multiple(x_test, configs['data']['sequence_length'], configs['data']['sequence_length']) # predictions = model.predict_sequence_full(x_test, configs['data']['sequence_length']) predictions = model.predict_point_by_point(x_test) y_test = np.reshape(np.copy(y_test), -1) plot_results((p0 * (predictions + 1))[-200:], (p0 * (y_test + 1))[-200:]) measure_performance(predictions, y_test)
def main(): configs = json.load(open('config.json', 'r')) if not os.path.exists(configs['model']['save_dir']): os.makedirs(configs['model']['save_dir']) data = DataLoader( os.path.join('data', configs['data']['filename']), configs['data']['train_test_split'], configs['data']['columns'] ) model = Model() model.build_model(configs) x, y = data.get_train_data( seq_len=configs['data']['sequence_length'], normalise=configs['data']['normalise'] ) ''' # in-memory training model.train( x, y, epochs = configs['training']['epochs'], batch_size = configs['training']['batch_size'], save_dir = configs['model']['save_dir'] ) ''' # out-of memory generative training steps_per_epoch = math.ceil((data.len_train - configs['data']['sequence_length']) / configs['training']['batch_size']) model.train_generator( data_gen=data.generate_train_batch( seq_len=configs['data']['sequence_length'], batch_size=configs['training']['batch_size'], normalise=configs['data']['normalise'] ), epochs=configs['training']['epochs'], batch_size=configs['training']['batch_size'], steps_per_epoch=steps_per_epoch, save_dir=configs['model']['save_dir'] ) x_test, y_test, onedot = data.get_test_data( seq_len=configs['data']['sequence_length'], normalise=configs['data']['normalise'] ) #predictions = model.predict_sequences_multiple(x_test, configs['data']['sequence_length'], configs['data']['sequence_length']) #predictions = model.predict_sequence_full(x_test, configs['data']['sequence_length']) predictions = model.predict_point_by_point(onedot) with open('output.txt', 'w') as f: f.write('预测下一时间的螺栓螺母消耗量为:' + str(int((predictions[-1] + 1) * data.last_raw_data(seq_len=configs['data']['sequence_length']))))
def main(): configs = json.load(open(CONFIG, 'r')) data = DataLoader(DATA, configs['data']['train_test_split'], configs['data']['columns']) model = Model() model.build_model(configs) x, y = data.get_train_data(seq_len=configs['data']['sequence_length'], normalise=configs['data']['normalise']) ''' # in-memory training model.train( x, y, epochs = configs['training']['epochs'], batch_size = configs['training']['batch_size'] ) ''' # out-of memory generative training steps_per_epoch = math.ceil( (data.len_train - configs['data']['sequence_length']) / configs['training']['batch_size']) model.train_generator(data_gen=data.generate_train_batch( seq_len=configs['data']['sequence_length'], batch_size=configs['training']['batch_size'], normalise=configs['data']['normalise']), epochs=configs['training']['epochs'], batch_size=configs['training']['batch_size'], steps_per_epoch=steps_per_epoch, model_path=MODEL) x_test, y_test = data.get_test_data( seq_len=configs['data']['sequence_length'], normalise=configs['data']['normalise']) predictions = model.predict_sequences_multiple( x_test, configs['data']['sequence_length'], configs['data']['sequence_length']) #predictions = model.predict_sequence_full(x_test, configs['data']['sequence_length']) #predictions = model.predict_point_by_point(x_test) plot_results_multiple(predictions, y_test, configs['data']['sequence_length']) #plot_results(predictions, y_test) sys.stdout.write("--END--")
def main(): configs = json.load(open('config.json', 'r')) if not os.path.exists(configs['model']['save_dir']): os.makedirs(configs['model']['save_dir']) data = DataLoader(os.path.join('data', configs['data']['filename']), configs['data']['train_test_split'], configs['data']['columns']) model = Model() model.build_model(configs) x, y = data.get_train_data(seq_len=configs['data']['sequence_length'], normalise=configs['data']['normalise']) ''' # in-memory training (heavier computation) model.train( x, y, epochs = configs['training']['epochs'], batch_size = configs['training']['batch_size'], save_dir = configs['model']['save_dir'] ) ''' # out-of memory generative training steps_per_epoch = math.ceil( (data.len_train - configs['data']['sequence_length']) / configs['training']['batch_size']) model.train_generator(data_gen=data.generate_train_batch( seq_len=configs['data']['sequence_length'], batch_size=configs['training']['batch_size'], normalise=configs['data']['normalise']), epochs=configs['training']['epochs'], batch_size=configs['training']['batch_size'], steps_per_epoch=steps_per_epoch, save_dir=configs['model']['save_dir']) x_test, y_test = data.get_test_data( seq_len=configs['data']['sequence_length'], normalise=configs['data']['normalise']) #predictions are made point by point with model.predict_point_by_point predictions = model.predict_point_by_point(x_test) plot_results(predictions, y_test)
def main(): configs = json.load(open('config.json', 'r')) if not os.path.exists(configs['model']['save_dir']): os.makedirs(configs['model']['save_dir']) dataframe = pd.read_csv(configs['data']['filename']) f = open('/Users/yucheng/Downloads/project2/stockIDs.txt', 'r') stockIDs = [int(line.split('\n')[0]) for line in f.readlines()] for id in stockIDs[377:378]: # for id in stockIDs[444:500]: print("index: ", stockIDs.index(id)) data = DataLoader(dataframe, id, configs['data']['train_test_split'], configs['data']['columns']) model = Model() model.build_model(configs) x, y = data.get_train_data(seq_len=configs['data']['sequence_length'], normalise=configs['data']['normalise']) ''' # in-memory training model.train( x, y, epochs = configs['training']['epochs'], batch_size = configs['training']['batch_size'], save_dir = configs['model']['save_dir'] ) ''' # out-of memory generative training steps_per_epoch = math.ceil( (data.len_train - configs['data']['sequence_length']) / configs['training']['batch_size']) model.train_generator(id=id, data_gen=data.generate_train_batch( seq_len=configs['data']['sequence_length'], batch_size=configs['training']['batch_size'], normalise=configs['data']['normalise']), epochs=configs['training']['epochs'], batch_size=configs['training']['batch_size'], steps_per_epoch=steps_per_epoch, save_dir=configs['model']['save_dir'])
def main(): configs = json.load(open('config.json', 'r')) if not os.path.exists(configs['model']['save_dir']): os.makedirs(configs['model']['save_dir']) data = DataLoader(os.path.join('data', configs['data']['filename']), configs['data']['train_test_split'], configs['data']['columns']) model = Model() # model.build_model(configs) model.load_model("saved_models/dow_30_50%.h5") x, y = data.get_train_data(seq_len=configs['data']['sequence_length'], normalise=configs['data']['normalise']) # out-of memory generative training steps_per_epoch = math.ceil( (data.len_train - configs['data']['sequence_length']) / configs['training']['batch_size']) model.train_generator(data_gen=data.generate_train_batch( seq_len=configs['data']['sequence_length'], batch_size=configs['training']['batch_size'], normalise=configs['data']['normalise']), epochs=configs['training']['epochs'], batch_size=configs['training']['batch_size'], steps_per_epoch=steps_per_epoch, save_dir=configs['model']['save_dir']) x_test, y_test = data.get_test_data( seq_len=configs['data']['sequence_length'], normalise=configs['data']['normalise']) print("x_test.shape") print(x_test.shape) predictions = model.predict_point_by_point(x_test) ######################################################################## from sklearn.metrics import mean_squared_error # loss_final = mean_squared_error(predictions, y_test) # print("Testing Loss = " + str(loss_final)) ######################################################################## # plot_results_multiple(predictions, y_test, configs['data']['sequence_length']) print(predictions.shape) print(y_test.shape) m = pd.DataFrame(predictions) n = pd.DataFrame(y_test) m.to_csv("predictions.csv") n.to_csv("y_test.csv") p = 0 t = 0 t_1 = 0 count = 0 for a in range(len(predictions)): if (a == 0): t_1 = y_test[a] continue ''' 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 ''' p = predictions[a] t = y_test[a] match = (t - t_1) * (p - t_1) if (match > 0): count += 1 t_1 = t print("Good prediction rate = " + str(count / len(predictions))) plot_results(predictions, y_test)
def main(): configs = json.load(open('config.json', 'r')) if not os.path.exists(configs['model']['save_dir']): os.makedirs(configs['model']['save_dir']) if not os.path.exists(configs['data']['data picture save dir']): os.makedirs(configs['data']['data picture save dir']) data = DataLoader(os.path.join('data', configs['data']['filename']), configs['data']['train_test_split'], configs['data']['columns'], configs['data']['id']) model = Model() model.build_model(configs) # x, y = data.get_train_data( # seq_len=configs['data']['sequence_length'], # normalise=configs['data']['normalise'] # ) ''' # in-memory training model.train( x, y, epochs = configs['training']['epochs'], batch_size = configs['training']['batch_size'], save_dir = configs['model']['save_dir'] ) ''' # out-of memory generative training steps_per_epoch = math.ceil( (data.len_train - configs['data']['sequence_length']) / configs['training']['batch_size']) model.train_generator(data_gen=data.generate_train_batch( seq_len=configs['data']['sequence_length'], batch_size=configs['training']['batch_size'], normalise=configs['data']['normalise']), epochs=configs['training']['epochs'], batch_size=configs['training']['batch_size'], steps_per_epoch=steps_per_epoch, save_dir=configs['model']['save_dir']) x_test, y_test = data.get_test_data( seq_len=configs['data']['sequence_length'], normalise=configs['data']['normalise']) #predictions = model.predict_sequences_multiple(x_test, configs['data']['sequence_length'], configs['data']['sequence_length']) # predictions = model.predict_sequence_full(x_test, configs['data']['sequence_length']) predictions = model.predict_point_by_point(x_test) sess = backend.get_session() rmsee = backend.mean(rmse(y_test, predictions), axis=0) msee = backend.mean(mse(y_test, predictions), axis=0) with sess.as_default(): mse_val = msee.eval() rmse_val = rmsee.eval() print("mse:", mse_val) print("rmse:", rmse_val) #plot_results_multiple(predictions, y_test, configs['data']['sequence_length']) plot_results(predictions, y_test, configs['data']['data picture save dir'], configs['data']['id']) with open("note.txt", 'a+') as f: f.write( '\n%s-e%s.h5:\n' % (dt.datetime.now().strftime('%m%d-%H%M%S'), configs['data']['id'])) f.write("data split:%f\n" % configs["data"]["train_test_split"]) f.write("epochs:%d\n" % configs["training"]["epochs"]) f.write("batch size:%d\n" % configs["training"]["batch_size"]) f.write("mse:%f\n" % mse_val) f.write("rmse:%f\n" % rmse_val) f.write("notes:%s\n" % configs['data']['note'])
def main(): # instantiation data = DataLoader(os.path.join('data', configs['data']['filename']), configs['data']['train_test_split'], configs['data']['columns']) model = Model() model.build_model(configs) x, y = data.get_train_data(seq_len=configs['data']['sequence_length'], normalise=configs['data']['normalise']) ''' # in-memory training model.train( x, y, epochs = configs['training']['epochs'], batch_size = configs['training']['batch_size'] ) ''' # out-of memory generative training # 不懂为什么要自己算这个,LSTM不是自己自带batch参数吗,为什么要自己算出来一共要输多少次batch??? # 会出现: # in data_generator_task, generator_output = next(self._generator), StopIteration # in fit_generator, str(generator_output)) # output of generator should be a tuple `(x, y, sample_weight)` or `(x, y)`. Found: None # 所以出错的时候,手动减少steps_per_epoch steps_per_epoch = math.ceil( (data.len_train - configs['data']['sequence_length']) / configs['training']['batch_size']) - 7 model.train_generator(data_gen=data.generate_train_batch( seq_len=configs['data']['sequence_length'], batch_size=configs['training']['batch_size'], normalise=configs['data']['normalise']), epochs=configs['training']['epochs'], batch_size=configs['training']['batch_size'], steps_per_epoch=steps_per_epoch) x_test, y_test = data.get_test_data( seq_len=configs['data']['sequence_length'], normalise=configs['data']['normalise']) predictions = model.predict_sequences_multiple( x_test, configs['data']['sequence_length'], configs['data']['sequence_length']) plot_results_multiple(predictions, y_test, configs['data']['sequence_length']) # predictions = model.predict_sequence_full(x_test, configs['data']['sequence_length']) # predictions = model.predict_point_by_point(x_test) y_true_train, y_true_test = data.get_split_data() unnorm_data = predict_unnorm_data( y_true_test, prediction_len=configs['data']['sequence_length'], predictions=predictions) # 计算RMSE并输出dataframe begin_date = datetime(year=2018, month=9, day=18) end_date = begin_date + timedelta( days=(configs['data']['sequence_length'] - 1)) # y_true_test:(301,2) y_true_test = pd.DataFrame(y_true_test) file = pd.read_csv(os.path.join('data', configs['data']['filename'])) file = file['time'][len(y_true_train):] file = pd.Series(file) # 出现了无法新建列并赋值的error # 因为dataframe和Series都有自己的index,.values才能取到真正的值并赋给下一个变量 y_true_test['time'] = file.values y_true_test = y_true_test.set_index('time') y_true_test.index = pd.to_datetime(y_true_test.index) calc_RMSE(predicted_data=unnorm_data, y_test_true=y_true_test, begin_date=begin_date, end_date=end_date)
save_filename = os.path.join( saved_dir, '%s-F%s.weights.{epoch:02d}-{val_loss:.6f}.hdf5' % (dt.datetime.now().strftime('%Y%m%d-%H%M%S'), str(j))) callbacks = [ EarlyStopping(monitor='val_loss', patience=2), ModelCheckpoint(filepath=save_filename, monitor='val_loss', save_best_only=False) ] steps_per_epoch = (data.len_train - sequence_length) // batch_size steps_per_epoch_val = (data.len_val - sequence_length) // batch_size model.fit_generator( generator=data.generate_train_batch(seq_len=sequence_length, batch_size=batch_size, normalise=True), steps_per_epoch=steps_per_epoch, epochs=epochs, callbacks=callbacks, workers=0, verbose=2, validation_data=data.generate_val_batch(seq_len=sequence_length, batch_size=batch_size, normalise=True), validation_steps=steps_per_epoch_val) def invert_prediction(df_, y_pred_, index_at_p0, spredicted_col_): p0 = df_.loc[index_at_p0, spredicted_col_] y_pred_inverted_ = p0 * (y_pred_ + 1)
def main(): configs = json.load(open('config.json', 'r')) #==================== selection =====================# if configs['mode']['selection'] == True: if not os.path.exists(configs['model']['save_dir']): os.makedirs(configs['model']['save_dir']) #IDs = configs['data']['IDs'] with open( 'D:\ColumbiaCourses\Advanced Big Data Analytics 6895\milestone3\LSTM-Neural-Network-for-Time-Series-Prediction\data\ID.csv', newline='') as f: reader = csv.reader(f) IDs = list(reader) IDs = [x[0] for x in IDs] model = Model() if configs['mode']['train_new_model'] == True: model.build_model(configs) print('[Model] Training Started') cnt = 0 #===== train ====# for ID in IDs: cnt += 1 filename = str(ID) + '.csv' data = DataLoader(filename=os.path.join('data', filename), split=configs['data']['train_test_split'], cols=configs['data']['columns'], test_only=False) x, y = data.get_train_data( seq_len=configs['data']['sequence_length'], normalise=configs['data']['normalise']) if cnt % 1 == 0: tocheckpoint = True else: tocheckpoint = False steps_per_epoch = math.ceil( (data.len_train - configs['data']['sequence_length']) / configs['training']['batch_size']) model.train_generator_all( data_gen=data.generate_train_batch( seq_len=configs['data']['sequence_length'], batch_size=configs['training']['batch_size'], normalise=configs['data']['normalise']), epochs=configs['training']['epochs'], batch_size=configs['training']['batch_size'], steps_per_epoch=steps_per_epoch, save_dir=configs['model']['save_dir'], tocheckpoint=tocheckpoint, ID=ID) print('[Model] Training All Finished') else: model.load_model(configs['mode']['train_file_path']) #===== predict =====# print('[Prediction]Start to predict and rank') ranklist = [] for ID in IDs: print('predicting %s'.format(ID)) filename = str(ID) + '.csv' data = DataLoader(filename=os.path.join('data', filename), split=configs['data']['train_test_split'], cols=configs['data']['columns'], test_only=False) x_test, y_test = data.get_test_data( seq_len=configs['data']['sequence_length'], normalise=configs['data']['normalise']) predictions = model.predict_point_by_point(x_test) test_score = score(y_true=y_test, y_pred=predictions) ranklist.append((ID, *test_score)) ranklist.sort(key=lambda x: x[1]) with open("ranklist.csv", "w", newline="") as f: writer = csv.writer(f) writer.writerows(ranklist) return #====================================================# #==================== single task ===================# if not os.path.exists(configs['model']['save_dir']): os.makedirs(configs['model']['save_dir']) data = DataLoader( os.path.join('data', configs['data']['filename']), configs['data']['train_test_split'], configs['data']['columns'], configs['mode']['test_only'] ############################# ) model = Model() if configs['mode']['test_only'] == True: model.load_model(configs['mode']['test_file_path']) else: if configs['mode']['train_new_model'] == True: model.build_model(configs) else: model.load_model(configs['mode']['train_file_path']) x, y = data.get_train_data(seq_len=configs['data']['sequence_length'], normalise=configs['data']['normalise']) ''' # in-memory training model.train( x, y, epochs = configs['training']['epochs'], batch_size = configs['training']['batch_size'], save_dir = configs['model']['save_dir'] ) ''' # out-of memory generative training steps_per_epoch = math.ceil( (data.len_train - configs['data']['sequence_length']) / configs['training']['batch_size']) model.train_generator(data_gen=data.generate_train_batch( seq_len=configs['data']['sequence_length'], batch_size=configs['training']['batch_size'], normalise=configs['data']['normalise']), epochs=configs['training']['epochs'], batch_size=configs['training']['batch_size'], steps_per_epoch=steps_per_epoch, save_dir=configs['model']['save_dir'], mode=configs['mode']) x_test, y_test = data.get_test_data( seq_len=configs['data']['sequence_length'], normalise=configs['data']['normalise']) #predictions = model.predict_sequences_multiple(x_test, configs['data']['sequence_length'], configs['data']['prediction_length']) #predictions = model.predict_sequence_full(x_test, configs['data']['sequence_length']) predictions = model.predict_point_by_point(x_test) test_score = score(y_true=y_test, y_pred=predictions) # plot_results_multiple(predictions, y_test, configs['data']['prediction_length']) plot_results(predictions, y_test)
def main(): config_path = 'config/config.json' with open(config_path, 'r') as f: configs = json.load(f) logging.info("Loaded {}".format(config_path)) logging.info("\n{}\n".format(configs)) data_path = configs['data']['filename'] data_dir = os.path.dirname(data_path) dtypes = configs['data'].get('dtypes', None) windowed_normalization = configs['data']['normalise'] data = DataLoader(data_path, configs['data']['train_test_split'], configs['data']['columns'], scaler_path=os.path.join(data_dir, "scaler"), windowed_normalization=windowed_normalization, dtypes=dtypes) model = Model() if configs['model'].get('load_model'): model_path = os.path.join(configs['model']['load_model']) logging.info("Loading {}".format(model_path)) model.load_model(model_path, configs) plot_dir = os.path.join(os.path.dirname(model_path), "plots") os.makedirs(plot_dir, exist_ok=True) else: plot_dir = os.path.join(configs['model']['save_dir'], "plots") os.makedirs(plot_dir, exist_ok=True) model.build_model(configs) x, y = data.get_train_data( seq_len=configs['data']['sequence_length'], windowed_normalization=windowed_normalization) ''' # in-memory training model.train( x, y, epochs = configs['training']['epochs'], batch_size = configs['training']['batch_size'], save_dir = configs['model']['save_dir'] ) ''' # out-of-memory generative training steps_per_epoch = math.ceil( (data.len_train - configs['data']['sequence_length']) / configs['training']['batch_size']) model.train_generator(data_gen=data.generate_train_batch( seq_len=configs['data']['sequence_length'], batch_size=configs['training']['batch_size'], windowed_normalization=windowed_normalization), epochs=configs['training']['epochs'], batch_size=configs['training']['batch_size'], steps_per_epoch=steps_per_epoch, save_dir=configs['model']['save_dir']) x_test, y_test = data.get_test_data( seq_len=configs['data']['sequence_length'], windowed_normalization=windowed_normalization) predictions_multiple = model.predict_sequences_multiple( x_test, configs['data']['sequence_length'], configs['data']['sequence_length']) plot_results_multiple(predictions_multiple, y_test, configs['data']['sequence_length'], out_path=os.path.join(plot_dir, "multiple.png"))
plt.legend() plt.show() # %% Build/Train the model model = Model() model.build_model(configs) # out-of memory generative training steps_per_epoch = math.ceil( (data.len_train - configs['data']['sequence_length']) / configs['training']['batch_size']) model.train_generator(data_gen=data.generate_train_batch( seq_len=configs['data']['sequence_length'], batch_size=configs['training']['batch_size'], normalise=configs['data']['normalise']), epochs=configs['training']['epochs'], batch_size=configs['training']['batch_size'], steps_per_epoch=steps_per_epoch, save_dir=configs['model']['save_dir']) x_test, y_test = data.get_test_data(seq_len=configs['data']['sequence_length'], normalise=configs['data']['normalise']) predictions = model.predict_sequences_multiple( x_test, configs['data']['sequence_length'], configs['data']['sequence_length']) plot_results_multiple(predictions, y_test, configs['data']['sequence_length'])
def main(): configs = json.load(open('config.json', 'r')) if not os.path.exists(configs['model']['save_dir']): os.makedirs(configs['model']['save_dir']) data = DataLoader(os.path.join('data', configs['data']['filename']), configs['data']['train_test_split'], configs['data']['columns']) # Get data and normalise it first # data_total = data.get_total_data( # seq_len=configs['data']['sequence_length'], # normalise=False) # last_ob = data_total[0][0] # print("last_ob: ", str(last_ob)) # print("data_tatal shape: ", str(data_total.shape)) # scaler, normalised_data = data.transform_data(data_total) # print("norm data shape: ", str(normalised_data.shape)) # if plotData: # plot_data(normalised_data) # data.update_data(normalised_data) # Get training and test data x, y = data.get_train_data(seq_len=configs['data']['sequence_length'], normalise=False) print("x shape: ", str(x.shape)) print("y shape: ", str(y.shape)) if plotData: plot_data(y.flatten()) x_test, y_test = data.get_test_data( seq_len=configs['data']['sequence_length'], normalise=False) print("x_test shape: ", str(x_test.shape)) print("y_test shape: ", str(y_test.shape)) if plotData: plot_data(y_test.flatten()) # Build the model(s) model = MyModel() if useFuncModel: model.build_functional_model(configs) if useSeqModel: model.build_sequential_model(configs) # x, y = data.get_train_data( # seq_len=configs['data']['sequence_length'], # normalise=configs['data']['normalise'] # ) if plotData: print("y.shape: ", str(y.shape)) print("y_test.shape: ", str(y_test.shape)) print("data_total.shape: ", str(data_total.shape)) plot_data(data_total) # plot_train_test_total(y, y_test, data_total + 1, configs['data']['sequence_length']) # in-memory training if useFuncModel: model.train(x, y, epochs=configs['training']['epochs'], batch_size=configs['training']['batch_size'], save_dir=configs['model']['save_dir'], modelType=ModelType.FUNCTIONAL) if useSeqModel: model.train(x, y, epochs=configs['training']['epochs'], batch_size=configs['training']['batch_size'], save_dir=configs['model']['save_dir'], modelType=ModelType.SEQUENTIAL) ''' # Train the models: out-of memory generative training steps_per_epoch = math.ceil((data.len_train - configs['data']['sequence_length']) / configs['training']['batch_size']) model.train_generator( data_gen=data.generate_train_batch( seq_len=configs['data']['sequence_length'], batch_size=configs['training']['batch_size'], normalise=configs['data']['normalise'] ), epochs=configs['training']['epochs'], batch_size=configs['training']['batch_size'], steps_per_epoch=steps_per_epoch, save_dir=configs['model']['save_dir'], modelType=ModelType.FUNCTIONAL ) if useSeqModel: model.train_generator( data_gen=data.generate_train_batch( seq_len=configs['data']['sequence_length'], batch_size=configs['training']['batch_size'], normalise=configs['data']['normalise'] ), epochs=configs['training']['epochs'], batch_size=configs['training']['batch_size'], steps_per_epoch=steps_per_epoch, save_dir=configs['model']['save_dir'], modelType=ModelType.SEQUENTIAL ) ''' # Visualize convolutional layer operations on raw training data if visualizeConvolution and useFuncModel: print("*****x shape: ", str(x.shape)) conv_predictions = model.conv_layer_analysis( x, configs['data']['sequence_length'], configs['data']['sequence_length']) # Compare performance print("comparing models") func_train_perf = 1 func_test_perf = 1 if useFuncModel and evaluatePerformance: func_train_perf = model.eval_generator( data_gen=data.generate_train_batch( seq_len=configs['data']['sequence_length'], batch_size=configs['training']['batch_size'], normalise=configs['data']['normalise']), batch_size=configs['training']['batch_size'], save_dir=configs['model']['save_dir'], modelType=ModelType.FUNCTIONAL) func_test_perf = model.eval( x=x_test, y=y_test, batch_size=configs['training']['batch_size'], modelType=ModelType.FUNCTIONAL) seq_train_perf = 1 seq_test_perf = 1 # print("FUNCTIONAL MODEL TRAIN PERF: ", str(func_train_perf)) # print("FUNCTIONAL MODEL TEST PERF: ", str(func_test_perf)) if useSeqModel and evaluatePerformance: print("Evaluate Sequential Model Performance") seq_train_perf = model.eval_generator( data_gen=data.generate_train_batch( seq_len=configs['data']['sequence_length'], batch_size=configs['training']['batch_size'], normalise=configs['data']['normalise']), batch_size=configs['training']['batch_size'], save_dir=configs['model']['save_dir'], modelType=ModelType.SEQUENTIAL) seq_test_perf = model.eval( x=x_test, y=y_test, batch_size=configs['training']['batch_size'], modelType=ModelType.SEQUENTIAL) print("FUNCTIONAL MODEL TRAIN PERF: ", str(func_train_perf)) print("SEQUENTIAL MODEL TRAIN PERF: ", str(seq_train_perf)) print("FUNCTIONAL MODEL TEST PERF: ", str(func_test_perf)) print("SEQUENTIAL MODEL TEST PERF: ", str(seq_test_perf)) # Plot predictions on each of the models if plotPredictions: if useFuncModel: # Run predictions on Functional model (with conv layers) func_predictions = model.predict_sequences_multiple( x, configs['data']['sequence_length'], configs['data']['sequence_length'], ModelType.FUNCTIONAL) print("y.shape: ", str(y.shape)) plot_results_multiple(func_predictions, y, configs['data']['sequence_length'], True, True, y0) # Run predictions on Functional model (with conv layers) func_predictions_test = model.predict_sequences_multiple( x_test, configs['data']['sequence_length'], configs['data']['sequence_length'], ModelType.FUNCTIONAL) plot_results_multiple_over_total( func_predictions, data_total, configs['data']['sequence_length'], True, 0) plot_results_multiple(func_predictions_test, y_test, configs['data']['sequence_length'], True, False, 0) # Run predictions on Sequential model if useSeqModel: seq_predictions = model.predict_sequences_multiple( x, configs['data']['sequence_length'], configs['data']['sequence_length'], ModelType.SEQUENTIAL, ) plot_results_multiple_over_total( seq_predictions, data_total, configs['data']['sequence_length'], True, 0)