def main(): configs = json.load(open('config.json', 'r')) if not os.path.exists(configs['model']['save_dir']): os.makedirs(configs['model']['save_dir']) if not os.path.exists(configs['model']['log_dir']): os.makedirs(configs['model']['log_dir']) data_loader = DataLoader(os.path.join('data', configs['data']['filename_train']), configs['data']['train_test_split'], configs['data']['columns'], is_training=True) model = Model() model.build_model(configs) steps_per_epoch = math.ceil( (data_loader.len_train - configs['data']['sequence_length']) / configs['training']['batch_size']) validation_steps = math.ceil( (data_loader.len_val - configs['data']['sequence_length']) / configs['training']['batch_size']) model.train_generator(train_loader=data_loader.batch_generator( seq_len=configs['data']['sequence_length'], batch_size=configs['training']['batch_size'], normalise=configs['data']['normalise'], generator_type='train'), val_loader=data_loader.batch_generator( seq_len=configs['data']['sequence_length'], batch_size=configs['training']['batch_size'], normalise=configs['data']['normalise'], generator_type='val'), epochs=configs['training']['epochs'], batch_size=configs['training']['batch_size'], steps_per_epoch=steps_per_epoch, validation_steps=validation_steps, save_dir=configs['model']['save_dir'], log_dir=configs['model']['log_dir']) test_data_loader = DataLoader(os.path.join( 'data', configs['data']['filename_test']), 0, configs['data']['columns'], is_training=False) x_test, y_test = test_data_loader.get_test_data( seq_len=configs['data']['sequence_length'], normalise=configs['data']['normalise']) predictions = model.predict_sequences_multiple( x_test, configs['data']['sequence_length'], configs['data']['sequence_length']) # predictions = model.predict_sequences_full(x_test, configs['data']['sequence_length']) # predictions = model.predict_point_by_point(x_test) plot = Plot() plot.plot_results_multiple(predictions, y_test, configs['data']['sequence_length'])
def main(): configs = json.load(open('config.json', 'r')) if not os.path.exists(configs['model']['save_dir']): os.makedirs(configs['model']['save_dir']) data = DataLoader(os.path.join('data', configs['data']['filename']), configs['data']['train_test_split'], configs['data']['columns']) model = Model() model.build_model(configs) x, y = data.get_train_data(seq_len=configs['data']['sequence_length'], normalise=configs['data']['normalise']) ''' # in-memory training model.train( x, y, epochs = configs['training']['epochs'], batch_size = configs['training']['batch_size'], save_dir = configs['model']['save_dir'] ) ''' # out-of memory generative training steps_per_epoch = math.ceil( (data.len_train - configs['data']['sequence_length']) / configs['training']['batch_size']) model.train_generator(data_gen=data.generate_train_batch( seq_len=configs['data']['sequence_length'], batch_size=configs['training']['batch_size'], normalise=configs['data']['normalise']), epochs=configs['training']['epochs'], batch_size=configs['training']['batch_size'], steps_per_epoch=steps_per_epoch, save_dir=configs['model']['save_dir']) ## data = DataLoader(os.path.join('data', config_test['data']['filename']), config_test['data']['train_test_split'], config_test['data']['columns']) x_test, y_test = data.get_test_data( seq_len=config_test['data']['sequence_length'], normalise=config_test['data']['normalise']) # predictions = model.predict_sequences_multiple(x_test, configs['data']['sequence_length'], configs['data']['sequence_length']) # predictions = model.predict_sequence_full(x_test, configs['data']['sequence_length']) predictions = model.predict_point_by_point(x_test) # plot_results_multiple(predictions, y_test, configs['data']['sequence_length']) plot_results(predictions, y_test)
def predict(): configs = json.load(open(CONFIG, 'r')) data = DataLoader(DATA, configs['data']['train_test_split'], configs['data']['columns']) global model if model == None: model = Model() model.load_model(MODEL) x_test, y_test = data.get_test_data( seq_len=configs['data']['sequence_length'], normalise=configs['data']['normalise']) if TYPE == "sequence": predictions = model.predict_sequences_multiple( x_test, configs['data']['sequence_length'], configs['data']['sequence_length']) plot_results_multiple(predictions, y_test, configs['data']['sequence_length']) if TYPE == "point" or TYPE == "predict": predictions = model.predict_point_by_point(x_test) if TYPE == "full": predictions = model.predict_sequence_full( x_test, configs['data']['sequence_length']) if TYPE == "full" or TYPE == "point": plot_results(predictions, y_test) if TYPE == "predict": predicted_value = data.denormalize_windows( predictions[-1], configs['data']['sequence_length']) sys.stdout.write("--END--{}--END--\n".format(predicted_value)) else: sys.stdout.write("--END--")
def main(train_after=False): config_file = 'web_flask/LSTM/config.json' configs = json.load(open(config_file, 'r')) if not os.path.exists(configs['model']['save_dir']): os.makedirs(configs['model']['save_dir']) data = DataLoader(configs['data']['filename'], configs['data']['train_test_split'], configs['data']['columns'], normalise_meth=configs['data']['normalise']) model = Model() model.build_model(configs) if not train_after else \ model.load_model(os.path.join( configs['model']['save_dir'],configs['model']['model_name'])) history = LossHistory() x, y = data.get_train_data(seq_len=configs['data']['sequence_length'], normalise=configs['data']['normalise']) x_test, y_test = data.get_test_data( seq_len=configs['data']['sequence_length'], normalise=configs['data']['normalise']) # in-memory training model.train(x, y, epochs=configs['training']['epochs'], batch_size=configs['training']['batch_size'], save_dir=configs['model']['save_dir'], history=history, x_test=x_test, y_test=y_test) ''' # out-of memory generative training steps_per_epoch = math.ceil((data.len_train - configs['data']['sequence_length']) / configs['training']['batch_size']) model.train_generator( data_gen=data.generate_train_batch( seq_len=configs['data']['sequence_length'], batch_size=configs['training']['batch_size'], normalise=configs['data']['normalise'] ), epochs=configs['training']['epochs'], batch_size=configs['training']['batch_size'], steps_per_epoch=steps_per_epoch, save_dir=configs['model']['save_dir'] ) ''' history.loss_plot('epoch') #loss, accuracy = model.model.evaluate(x_test, y_test) #print(loss,accuracy) #predictions = model.predict_sequences_multiple(x_test, configs['data']['sequence_length'], configs['data']['sequence_length']) #predictions = model.predict_sequence_full(x_test, configs['data']['sequence_length']) predictions = model.predict_point_by_point(x[0]) #_test) #plot_results_multiple(predictions, y, configs['data']['sequence_length']) plot_results(predictions, y)
def main(): configs = json.load(open('config.json', 'r')) if not os.path.exists(configs['model']['save_dir']): os.makedirs(configs['model']['save_dir']) data = DataLoader( os.path.join('data', configs['data']['filename']), configs['data']['train_test_split'], configs['data']['columns'] ) model = Model() model.build_model(configs) x, y = data.get_train_data( seq_len=configs['data']['sequence_length'], normalise=configs['data']['normalise'] ) if not configs['training']['train']: model.load_model(filepath='saved_models/02102019-164727-e2.h5') else: model.train( x, y, epochs=configs['training']['epochs'], batch_size=configs['training']['batch_size'], save_dir=configs['model']['save_dir'] ) # out-of memory generative training # steps_per_epoch = math.ceil( # (data.len_train - configs['data']['sequence_length']) / configs['training']['batch_size']) # model.train_generator( # data_gen=data.generate_train_batch( # seq_len=configs['data']['sequence_length'], # batch_size=configs['training']['batch_size'], # normalise=configs['data']['normalise'] # ), # epochs=configs['training']['epochs'], # batch_size=configs['training']['batch_size'], # steps_per_epoch=steps_per_epoch, # save_dir=configs['model']['save_dir'] # ) x_test, y_test = data.get_test_data( seq_len=configs['data']['sequence_length'], normalise=configs['data']['normalise'] ) # predictions = model.predict_sequences_multiple(x_test, configs['data']['sequence_length'], # configs['data']['sequence_length']) # predictions = model.predict_sequence_full(x_test, configs['data']['sequence_length']) # plot_results_multiple(predictions, y_test, configs['data']['sequence_length']) predictions = model.predict_point_by_point(x_test) plot_results(predictions, y_test)
def main_sin(): config = json.load(open("config_sin.json", 'r')) data = DataLoader(os.path.join('data', config['data']['filename']), config['data']['train_test_split'], config['data']['columns']) x_train, y_train = data.get_train_data(config['data']['sequence_length'], config['data']['normalise']) x_test, y_test = data.get_test_data() model = Model() model.build_model(config) model.train(x_train, y_train, config['training']['epochs'], config['training']['batch_size'])
def main(): configs = json.load(open('config.json', 'r')) #create folder for save model params if not os.path.exists(configs['model']['save_dir']): os.makedirs(configs['model']['save_dir']) data = DataLoader( os.path.join('data', configs['data']['filename']), configs['data']['train_test_split'], configs['data']['columns'] ) #plot true data #plot_results(data.data_train,True) #train model model = Model() model.build_model(configs) x, y = data.get_train_data( seq_len=configs['data']['sequence_length'], normalise=configs['data']['normalise'] ) steps_per_epoch = math.ceil((data.len_train - configs['data']['sequence_length']) / configs['training']['batch_size']) model.train_generator( data_gen=data.generate_train_batch( seq_len=configs['data']['sequence_length'], batch_size=configs['training']['batch_size'], normalise=configs['data']['normalise'] ), epochs=configs['training']['epochs'], batch_size=configs['training']['batch_size'], steps_per_epoch=steps_per_epoch, save_dir=configs['model']['save_dir'] ) x_test, y_test = data.get_test_data( seq_len=configs['data']['sequence_length'], normalise=configs['data']['normalise'] ) predictions = model.predict_point_by_point(x_test) # plot_results(predictions, y_test) # print (predictions) # plot_results(predictions, y_test) data1 = pd.DataFrame(predictions) data1.to_csv('predict.csv') data2 = pd.DataFrame(y_test) data2.to_csv('true.csv')
def predict(test): # initialize dataLoader with split of 0 cleaner.main_func() data = DataLoader(test, 0, configs['data']['columns']) x_test, y_test = data.get_test_data( seq_len=configs['data']['sequence_length'], normalise=False) model = Model() model.load_model('saved_models/tracker.h5') predictions = model.predict_point_by_point(x_test) plot_results(predictions, y_test) return "OK"
def main(): configs = json.load(open('config.json', 'r')) if not os.path.exists(configs['model']['save_dir']): os.makedirs(configs['model']['save_dir']) data = DataLoader(os.path.join('data', configs['data']['filename']), configs['data']['train_test_split'], configs['data']['columns']) model = Model() model.build_model(configs) # 从已经保存的模型中加载模型,此时不需要再进行模型训练:即不需要再执行model.train()部分 # model.load_model(r'saved_models/15102019-155115-e2.h5') x, y = data.get_train_data(seq_len=configs['data']['sequence_length'], normalise=configs['data']['normalise']) # print('x的shape是:{0}'.format(x.shape)) # (3942, 49, 2) # print('y的shape是:{0}'.format(y.shape)) # (3942, 1) # in-memory training model.train(x, y, epochs=configs['training']['epochs'], batch_size=configs['training']['batch_size'], save_dir=configs['model']['save_dir']) ''' # out-of memory generative training steps_per_epoch = math.ceil((data.len_train - configs['data']['sequence_length']) / configs['training']['batch_size']) model.train_generator( data_gen=data.generate_train_batch( seq_len=configs['data']['sequence_length'], batch_size=configs['training']['batch_size'], normalise=configs['data']['normalise'] ), epochs=configs['training']['epochs'], batch_size=configs['training']['batch_size'], steps_per_epoch=steps_per_epoch, save_dir=configs['model']['save_dir'] ) ''' x_test, y_test = data.get_test_data( seq_len=configs['data']['sequence_length'], normalise=configs['data']['normalise']) # predictions = model.predict_sequences_multiple(x_test, configs['data']['sequence_length'], configs['data']['sequence_length']) # predictions = model.predict_sequence_full(x_test, configs['data']['sequence_length']) predictions = model.predict_point_by_point(x_test) # plot_results_multiple(predictions, y_test, configs['data']['sequence_length']) plot_results(predictions, y_test)
def main(): configs = json.load(open('config.json', 'r')) if not os.path.exists(configs['model']['save_dir']): os.makedirs(configs['model']['save_dir']) data = DataLoader( os.path.join('data', configs['data']['filename']), configs['data']['train_test_split'], configs['data']['columns'] ) model = Model() model.build_model(configs) x, y = data.get_train_data( seq_len=configs['data']['sequence_length'], normalise=configs['data']['normalise'] ) ''' # in-memory training model.train( x, y, epochs = configs['training']['epochs'], batch_size = configs['training']['batch_size'], save_dir = configs['model']['save_dir'] ) ''' # out-of memory generative training steps_per_epoch = math.ceil((data.len_train - configs['data']['sequence_length']) / configs['training']['batch_size']) model.train_generator( data_gen=data.generate_train_batch( seq_len=configs['data']['sequence_length'], batch_size=configs['training']['batch_size'], normalise=configs['data']['normalise'] ), epochs=configs['training']['epochs'], batch_size=configs['training']['batch_size'], steps_per_epoch=steps_per_epoch, save_dir=configs['model']['save_dir'] ) x_test, y_test, onedot = data.get_test_data( seq_len=configs['data']['sequence_length'], normalise=configs['data']['normalise'] ) #predictions = model.predict_sequences_multiple(x_test, configs['data']['sequence_length'], configs['data']['sequence_length']) #predictions = model.predict_sequence_full(x_test, configs['data']['sequence_length']) predictions = model.predict_point_by_point(onedot) with open('output.txt', 'w') as f: f.write('预测下一时间的螺栓螺母消耗量为:' + str(int((predictions[-1] + 1) * data.last_raw_data(seq_len=configs['data']['sequence_length']))))
def main(choice): data = DataLoader(os.path.join('data', configs['data']['filename']), configs['data']['train_test_split'], configs['data']['columns']) model = Model() model.build_model(configs) if (choice != 'info'): x, y = data.get_train_data(seq_len=configs['data']['sequence_length'], normalise=configs['data']['normalise']) # in-memory training model.train(x, y, epochs=configs['training']['epochs'], batch_size=configs['training']['batch_size']) # out-of memory generative training # steps_per_epoch = math.ceil((data.len_train - configs['data']['sequence_length']) / configs['training']['batch_size']) # model.train_generator( # data_gen = data.generate_train_batch( # seq_len = configs['data']['sequence_length'], # batch_size = configs['training']['batch_size'], # normalise = configs['data']['normalise'] # ), # epochs = configs['training']['epochs'], # batch_size = configs['training']['batch_size'], # steps_per_epoch = steps_per_epoch # ) x_test, y_test = data.get_test_data( seq_len=configs['data']['sequence_length'], normalise=configs['data']['normalise']) if (choice == "multi"): predictions = model.predict_sequences_multiple( x_test, configs['data']['sequence_length'], configs['data']['sequence_length']) plot_results_multiple(predictions, y_test, configs['data']['sequence_length']) elif (choice == "seq"): predictions = model.predict_sequence_full( x_test, configs['data']['sequence_length']) plot_results(predictions, y_test) else: predictions = model.predict_point_by_point(x_test) plot_results(predictions, y_test)
def main(): configs = json.load(open(CONFIG, 'r')) data = DataLoader(DATA, configs['data']['train_test_split'], configs['data']['columns']) model = Model() model.build_model(configs) x, y = data.get_train_data(seq_len=configs['data']['sequence_length'], normalise=configs['data']['normalise']) ''' # in-memory training model.train( x, y, epochs = configs['training']['epochs'], batch_size = configs['training']['batch_size'] ) ''' # out-of memory generative training steps_per_epoch = math.ceil( (data.len_train - configs['data']['sequence_length']) / configs['training']['batch_size']) model.train_generator(data_gen=data.generate_train_batch( seq_len=configs['data']['sequence_length'], batch_size=configs['training']['batch_size'], normalise=configs['data']['normalise']), epochs=configs['training']['epochs'], batch_size=configs['training']['batch_size'], steps_per_epoch=steps_per_epoch, model_path=MODEL) x_test, y_test = data.get_test_data( seq_len=configs['data']['sequence_length'], normalise=configs['data']['normalise']) predictions = model.predict_sequences_multiple( x_test, configs['data']['sequence_length'], configs['data']['sequence_length']) #predictions = model.predict_sequence_full(x_test, configs['data']['sequence_length']) #predictions = model.predict_point_by_point(x_test) plot_results_multiple(predictions, y_test, configs['data']['sequence_length']) #plot_results(predictions, y_test) sys.stdout.write("--END--")
def main(): configs = json.load(open('config.json', 'r')) if not os.path.exists(configs['model']['save_dir']):os.makedirs(configs['model']['save_dir']) model = Model() my_model = model.build_model(configs) plot_model(my_model, to_file='output\model.png', show_shapes=True) data = DataLoader( os.path.join('data', configs['data']['filename']), configs['data']['train_test_split'], configs['data']['columns'] ) x, y = data.get_train_data( configs['data']['sequence_length'], configs['data']['normalise'] ) print(x.shape) print(y.shape) print(configs['training']['batch_size']) print(configs['model']['save_dir']) model.train(x, y, configs['training']['epochs'], configs['training']['batch_size'], configs['model']['save_dir'] ) x_test, y_test = data.get_test_data( configs['data']['sequence_length'], configs['data']['normalise'] ) # predictions = model.predict_sequences_multiplt(x_test, configs['data']['sequence_length'], configs['data']['sequence_length']) # predictions = model.predict_sequences_full(x_test, configs['data']['sequence_length']) prediction_point = model.predict_point_by_point(x_test) # print(prediction_point) # print(np.array(predictions).shape) # plot_results_multiple(predictions, y_test, configs['data']['sequence_length']) plot_results(prediction_point, y_test)
def openfile(self): # read csv file self.filename = filedialog.askopenfilename() self.dataloader = DataLoader(self.filename, ["Close","Volume","Open", "High","Low","Rocr100","Plus_dm"]) print(self.filename) # load true data without normalise seq_len = int(self.scale2.get()) x_test_true, self.y_test_true =self.dataloader.get_test_data(seq_len, normalise=False) print("load the data successfully") # make sure the data len is valid self.scale1.configure(to=self.dataloader.len_test - 50) self.scale1.set(self.dataloader.len_test - 50) self.ax.cla() self.ax.grid() self.ax.plot(self.y_test_true, label='True Data') self.graph.draw()
def main(): #load parameters configs = json.load(open('./data/config.json','r')) if not os.path.exists(configs['model']['save_dir']):os.makedirs(configs['model']['save_dir']) data = DataLoader( os.path.join('data',configs['data']['filename']), configs['data']['train_test_split'], configs['data']['columns'], ) #create RNN model model=Model() model.build_model(configs) #loading trainning data x,y = data.get_train_data( seq_len=configs['data']['sequence_length'], normalise=configs['data']['normalise'] ) print(x.shape) print(y.shape) #training model model.train( x, y, epochs=configs['training']['epochs'], batch_size=configs['training']['batch_size'], save_dir=configs['model']['save_dir'] ) #test results x_test, y_test = data.get_test_data( seq_len= configs['data']['sequence_length'], normalise=configs['data']['normalise'], ) #results visualization predictions_multiseq = model.predict_sequences_multiple(x_test,configs['data']['sequence_length'],configs['data']['sequence_length']) predictions_pointbypoint=model.predict_point_by_point(x_test) plot_results_multiple(predictions_multiseq,y_test,configs['data']['sequence_length']) plot_results(predictions_pointbypoint,y_test)
def main(): configs = json.load(open('config.json', 'r')) if not os.path.exists(configs['model']['save_dir']): os.makedirs(configs['model']['save_dir']) data = DataLoader( os.path.join('data', configs['data']['filename']), configs['data']['train_test_split'], ) model = Model() model.build_model(configs) # get train data x, y = data.get_train_data() #x=x.squeeze() # in-memory training model.train(x, y, epochs=configs['training']['epochs'], batch_size=configs['training']['batch_size'], save_dir=configs['model']['save_dir']) # # out-of memory generative training # steps_per_epoch = math.ceil((data.len_train - configs['data']['sequence_length']) / configs['training']['batch_size']) # model.train_generator( # data_gen=data.generate_train_batch( # batch_size=configs['training']['batch_size'], # ), # epochs=configs['training']['epochs'], # batch_size=configs['training']['batch_size'], # steps_per_epoch=steps_per_epoch, # save_dir=configs['model']['save_dir'] # ) # testing model x_test, y_test = data.get_test_data() #x_test=x_test.squeeze() predictions = model.predict_point_by_point(x_test) # plot_results_multiple(predictions, y_test, configs['data']['sequence_length']) plot_results(predictions, y_test)
def main(): #读取所需参数 configs = json.load(open('config.json', 'r')) if not os.path.exists(configs['model']['save_dir']): os.makedirs(configs['model']['save_dir']) #读取数据 data = DataLoader(os.path.join('data', configs['data']['filename']), configs['data']['train_test_split'], configs['data']['columns']) #创建RNN模型 model = Model() mymodel = model.build_model(configs) plot_model(mymodel, to_file='model.png', show_shapes=True) #加载训练数据 x, y = data.get_train_data(seq_len=configs['data']['sequence_length'], normalise=configs['data']['normalise']) print(x.shape) print(y.shape) #训练模型 model.train(x, y, epochs=configs['training']['epochs'], batch_size=configs['training']['batch_size'], save_dir=configs['model']['save_dir']) #测试结果 x_test, y_test = data.get_test_data( seq_len=configs['data']['sequence_length'], normalise=configs['data']['normalise']) #展示测试效果 predictions = model.predict_sequences_multiple( x_test, configs['data']['sequence_length'], configs['data']['sequence_length'], debug=False) print(np.array(predictions).shape) plot_results_multiple(predictions, y_test, configs['data']['sequence_length'])
def main(): configs = json.load(open('config.json', 'r')) if not os.path.exists(configs['model']['save_dir']): os.makedirs(configs['model']['save_dir']) dataframe = pd.read_csv(configs['data']['filename']) f = open('/Users/yucheng/Downloads/project2/stockIDs.txt', 'r') stockIDs = [int(line.split('\n')[0]) for line in f.readlines()] for id in stockIDs[377:378]: # for id in stockIDs[444:500]: print("index: ", stockIDs.index(id)) data = DataLoader(dataframe, id, configs['data']['train_test_split'], configs['data']['columns']) model = Model() model.build_model(configs) x, y = data.get_train_data(seq_len=configs['data']['sequence_length'], normalise=configs['data']['normalise']) ''' # in-memory training model.train( x, y, epochs = configs['training']['epochs'], batch_size = configs['training']['batch_size'], save_dir = configs['model']['save_dir'] ) ''' # out-of memory generative training steps_per_epoch = math.ceil( (data.len_train - configs['data']['sequence_length']) / configs['training']['batch_size']) model.train_generator(id=id, data_gen=data.generate_train_batch( seq_len=configs['data']['sequence_length'], batch_size=configs['training']['batch_size'], normalise=configs['data']['normalise']), epochs=configs['training']['epochs'], batch_size=configs['training']['batch_size'], steps_per_epoch=steps_per_epoch, save_dir=configs['model']['save_dir'])
def main(): configs = json.load(open('config.json', 'r')) model = Model() model.load_model("./saved_models/model2.h5") data = DataLoader( os.path.join('data', configs['data']['filename']), configs['data']['train_test_split'], configs['data']['columns'] ) x_test, y_test = data.get_test_data( seq_len=configs['data']['sequence_length'], normalise=configs['data']['normalise'] ) # predictions = model.predict_sequences_multiple(x_test, configs['data']['sequence_length'], configs['data']['sequence_length']) # predictions = model.predict_sequence_full(x_test, configs['data']['sequence_length']) predictions = model.predict_point_by_point(x_test) # plot_results_multiple(predictions, y_test, configs['data']['sequence_length']) plot_results(predictions, y_test)
def main_plot(): configs = json.load(open(config_file, 'r')) if not os.path.exists(configs['model']['save_dir']): os.makedirs(configs['model']['save_dir']) data = DataLoader(os.path.join('data', configs['data']['filename']), configs['data']['train_test_split'], configs['data']['columns'], normalise_meth=configs['data']['normalise']) x, y = data.get_test_data(seq_len=configs['data']['sequence_length'], normalise=configs['data']['normalise']) model = Model() global newest_model if newest_model: model_way = newest_model else: model_way = '/home/bf/Documents/Projects/helpplay/HelpPlay/train/LSTM-Neural-Network-for-Time-Series-Prediction/saved_models/10062019-163648-e40.h5' model.load_model(model_way) print(model.model.evaluate(x, y)) pre_y = model.predict_point_by_point(x) print(x) plot_results(pre_y, y)
############################################################################### # Train model on data ############################################################################### train_validate_df, test_df = np.split(df.sample(frac=1), [int(trainval__test_split * len(df))]) print('train/validate on %s elements at %s percent, test on %s elements' % (len(train_validate_df), (1 - train_val_split) * 100, len(test_df))) assert len(train_validate_df) > len(test_df) X = np.arange(len(train_validate_df)) ss = ShuffleSplit(n_splits=nfolds, test_size=train_val_split, random_state=0) folds = list(ss.split(X)) for j, (train_idx, val_idx) in enumerate(folds): assert len(train_idx) > len(val_idx) data = DataLoader(df, train_idx, val_idx, cols=cols, ipredicted_col=ipredicted_col) save_filename = os.path.join( saved_dir, '%s-F%s.weights.{epoch:02d}-{val_loss:.6f}.hdf5' % (dt.datetime.now().strftime('%Y%m%d-%H%M%S'), str(j))) callbacks = [ EarlyStopping(monitor='val_loss', patience=2), ModelCheckpoint(filepath=save_filename, monitor='val_loss', save_best_only=False) ] steps_per_epoch = (data.len_train - sequence_length) // batch_size steps_per_epoch_val = (data.len_val - sequence_length) // batch_size
from core.data_processor import DataLoader import pandas as pd import numpy as np import json import matplotlib.pyplot as plt plt.style.use('./plot.mplstyle') # %% Load data df = pd.read_csv('./data/sp500.csv') configs = json.load(open('model_config.json', 'r')) data = DataLoader(os.path.join('data', configs['data']['filename']), configs['data']['train_test_split'], configs['data']['columns']) def plot_results_multiple(predicted_data, true_data, prediction_len): fig = plt.figure(facecolor='white') ax = fig.add_subplot(111) ax.plot(true_data, label='True Data') # Pad the list of predictions to shift it in the graph to it's correct start for i, data in enumerate(predicted_data): padding = [None for p in range(i * prediction_len)] plt.plot(padding + data, label='Prediction') plt.legend() plt.show()
def main(): configs = json.load(open('configcrops.json', 'r')) if not os.path.exists(configs['model']['save_dir']): os.makedirs(configs['model']['save_dir']) data = DataLoader(os.path.join('data', configs['data']['filename']), configs['data']['train_test_split'], configs['data']['columns']) model = Model() model.build_model(configs) x, y = data.get_train_data(seq_len=configs['data']['sequence_length'], normalise=configs['data']['normalise']) # in-memory training model.train(x, y, epochs=configs['training']['epochs'], batch_size=configs['training']['batch_size'], save_dir=configs['model']['save_dir']) # Yogyakarta: Kulon progo, bantul, gunung kidul, sleman, DIY # Jawa Barat: Bandung, Tasikmalaya, Majalengka, Cirebon, Kuningan, Garut, Sumedang, Cianjut, Subang, Purwakarta, Indramayu # Ciamis, Sukabumi, Bogor, Bekasi, Karawang # # out-of memory generative training # steps_per_epoch = math.ceil((data.len_train - configs['data']['sequence_length']) / configs['training']['batch_size']) # model.train_generator( # data_gen=data.generate_train_batch( # seq_len=configs['data']['sequence_length'], # batch_size=configs['training']['batch_size'], # normalise=configs['data']['normalise'] # ), # epochs=configs['training']['epochs'], # batch_size=configs['training']['batch_size'], # steps_per_epoch=steps_per_epoch, # save_dir=configs['model']['save_dir'] # ) # # save_dir = configs['model']['save_dir'] x_test, y_test = data.get_test_data( seq_len=configs['data']['sequence_length'], normalise=configs['data']['normalise']) # print(x_test) # print(y_test) # predictions = model.predict_sequences_multiple(x_test, configs['data']['sequence_length'], configs['data']['sequence_length']) predictions_point = model.predict_point_by_point(x_test) print(len(predictions_point)) plot_results(predictions_point, y_test) # plot_results_multiple(predictions, y_test, configs['data']['sequence_length']) # predictions_full = model.predict_sequence_full(x_test, configs['data']['sequence_length']) # plot_results(predictions_full, y_test) groundtrue = data._groundtruths(1) groundtrue = (groundtrue.ravel()) print(len(groundtrue)) RMSElist = [] for i in range(len(groundtrue)): errorrate = groundtrue[i] - predictions_point[i] hasilkuadrat = errorrate * errorrate RMSElist.append(hasilkuadrat) RMSE = sum(RMSElist) / (len(predictions_point) - 2) RMSE = RMSE**(1 / 2) print(RMSE) getdataforecast = data._forecasting(5, 1) total_prediksi = 5 takefrom = 5 forecast_result = model.forecast(total_prediksi, getdataforecast, takefrom) # print(forecast_result[0]) # forecast_result=np.append(forecast_result,[0.0]) # print(forecast_result) n_steps = 8 # split into samples X, y = split_sequence(forecast_result, n_steps) # reshape from [samples, timesteps] into [samples, timesteps, features] n_features = 1 # print(X) X = X.reshape((X.shape[0], X.shape[1], n_features)) # define model model = Sequential() model.add(LSTM(50, activation='relu', input_shape=(n_steps, n_features))) model.add(Dense(1)) model.compile(optimizer='adam', loss='mse') # fit model model.fit(X, y, epochs=200, verbose=0) # demonstrate prediction for j in range(total_prediksi): getxlastnumber = array(forecast_result[(-n_steps - 1):-1]) x_input = getxlastnumber # print(x_input) x_input = x_input.reshape((1, n_steps, n_features)) yhat = model.predict(x_input, verbose=0) # print(yhat[0][0]) forecast_result = np.append(forecast_result, yhat[0]) # prediction_point=np.append(prediction_point,yhat[0]) plot_results_onlypredicted(forecast_result)
fig = plt.figure(facecolor='white') ax = fig.add_subplot(111) ax.plot(true_data, label='True Data') plt.plot(predicted_data, label='Prediction') plt.legend() plt.show() configs = json.load(open(sys.argv[1], 'r')) if not os.path.exists(configs['model']['save_dir']): os.makedirs(configs['model']['save_dir']) model_id = configs['model']['model_id'] save_dir = configs['model']['save_dir'] dataloader = DataLoader() x_scaler_filename = save_dir + "/" + model_id + "-x.scaler" y_scaler_filename = save_dir + "/" + model_id + "-y.scaler" dataloader.restore_scalers(x_scaler_filename, y_scaler_filename) filename = os.path.join('data', configs['data']['filename']) dataframe = pandas.read_csv(filename, sep=',', encoding='utf-8') dataframe.index.name = 'fecha' x_data = dataframe.get(configs['data']['x_cols'], ).values in_seq_len = configs['data']['input_sequence_length'] x_data = x_data[:, :] # pick three sequences to make predictions input_data = dataloader.prepare_input_data(x_data, in_seq_len) print("Input vector shape: " + str(x_data.shape)) model_filename = sys.argv[2]
def main(): configs = json.load(open('config.json', 'r')) if not os.path.exists(configs['model']['save_dir']): os.makedirs(configs['model']['save_dir']) # -- Data preparation: -- data = DataLoader(os.path.join('../data', configs['data']['filename']), configs['data']['train_test_split'], configs['data']['columns']) x, y = data.get_train_data(seq_len=configs['data']['sequence_length'], normalise=configs['data']['normalise']) x_test, y_test, p0_vec = data.get_test_data( seq_len=configs['data']['sequence_length'], normalise=configs['data']['normalise']) # -- Init and fit CNN model: -- n_features = x.shape[2] n_steps = configs['data']['sequence_length'] - 1 # Define model model = Sequential() model.add( Conv1D(filters=128, kernel_size=2, activation='linear', input_shape=(n_steps, n_features))) model.add(MaxPooling1D(pool_size=2)) model.add(Flatten()) model.add(Dense(50, activation='linear')) model.add(Dropout(0.2)) model.add(Dense(1)) # Compile model model.compile(optimizer=configs['model']['optimizer'], loss=configs['model']['loss']) # Fit model timer = Timer() timer.start() print('[Model] Training Started') model.fit(x, y, epochs=configs['training']['epochs'], batch_size=configs['training']['batch_size']) timer.stop() print('[Model] Predicting one step ahead...') # Get predictions yhat = model.predict(x_test, verbose=0) # Denormalize & plot p_pred, p_true = denorm_transform(p0_vec, yhat, y_test) plot_results(p_pred, p_true) #de-normalised, i.e., original fex units # Compute evaluation metrics assess = EvalMetrics(p_true, p_pred) MAE = assess.get_MAE() RMSE = assess.get_RMSE() print("MAE on validation set is: %f" % MAE) print("RMSE on validation set is: %f" % RMSE) # Save model save_dir = configs['model']['save_dir'] save_fname = os.path.join( save_dir, '%s_cnn.h5' % (dt.datetime.now().strftime('%d%m%Y-%H%M%S'))) model.save(save_fname)
def main(): configs = json.load(open('config.json', 'r')) if not os.path.exists(configs['model']['save_dir']): os.makedirs(configs['model']['save_dir']) if not os.path.exists(configs['data']['data picture save dir']): os.makedirs(configs['data']['data picture save dir']) data = DataLoader(os.path.join('data', configs['data']['filename']), configs['data']['train_test_split'], configs['data']['columns'], configs['data']['id']) model = Model() model.build_model(configs) # x, y = data.get_train_data( # seq_len=configs['data']['sequence_length'], # normalise=configs['data']['normalise'] # ) ''' # in-memory training model.train( x, y, epochs = configs['training']['epochs'], batch_size = configs['training']['batch_size'], save_dir = configs['model']['save_dir'] ) ''' # out-of memory generative training steps_per_epoch = math.ceil( (data.len_train - configs['data']['sequence_length']) / configs['training']['batch_size']) model.train_generator(data_gen=data.generate_train_batch( seq_len=configs['data']['sequence_length'], batch_size=configs['training']['batch_size'], normalise=configs['data']['normalise']), epochs=configs['training']['epochs'], batch_size=configs['training']['batch_size'], steps_per_epoch=steps_per_epoch, save_dir=configs['model']['save_dir']) x_test, y_test = data.get_test_data( seq_len=configs['data']['sequence_length'], normalise=configs['data']['normalise']) #predictions = model.predict_sequences_multiple(x_test, configs['data']['sequence_length'], configs['data']['sequence_length']) # predictions = model.predict_sequence_full(x_test, configs['data']['sequence_length']) predictions = model.predict_point_by_point(x_test) sess = backend.get_session() rmsee = backend.mean(rmse(y_test, predictions), axis=0) msee = backend.mean(mse(y_test, predictions), axis=0) with sess.as_default(): mse_val = msee.eval() rmse_val = rmsee.eval() print("mse:", mse_val) print("rmse:", rmse_val) #plot_results_multiple(predictions, y_test, configs['data']['sequence_length']) plot_results(predictions, y_test, configs['data']['data picture save dir'], configs['data']['id']) with open("note.txt", 'a+') as f: f.write( '\n%s-e%s.h5:\n' % (dt.datetime.now().strftime('%m%d-%H%M%S'), configs['data']['id'])) f.write("data split:%f\n" % configs["data"]["train_test_split"]) f.write("epochs:%d\n" % configs["training"]["epochs"]) f.write("batch size:%d\n" % configs["training"]["batch_size"]) f.write("mse:%f\n" % mse_val) f.write("rmse:%f\n" % rmse_val) f.write("notes:%s\n" % configs['data']['note'])
def main(): configs = json.load(open('config.json', 'r')) #==================== selection =====================# if configs['mode']['selection'] == True: if not os.path.exists(configs['model']['save_dir']): os.makedirs(configs['model']['save_dir']) #IDs = configs['data']['IDs'] with open( 'D:\ColumbiaCourses\Advanced Big Data Analytics 6895\milestone3\LSTM-Neural-Network-for-Time-Series-Prediction\data\ID.csv', newline='') as f: reader = csv.reader(f) IDs = list(reader) IDs = [x[0] for x in IDs] model = Model() if configs['mode']['train_new_model'] == True: model.build_model(configs) print('[Model] Training Started') cnt = 0 #===== train ====# for ID in IDs: cnt += 1 filename = str(ID) + '.csv' data = DataLoader(filename=os.path.join('data', filename), split=configs['data']['train_test_split'], cols=configs['data']['columns'], test_only=False) x, y = data.get_train_data( seq_len=configs['data']['sequence_length'], normalise=configs['data']['normalise']) if cnt % 1 == 0: tocheckpoint = True else: tocheckpoint = False steps_per_epoch = math.ceil( (data.len_train - configs['data']['sequence_length']) / configs['training']['batch_size']) model.train_generator_all( data_gen=data.generate_train_batch( seq_len=configs['data']['sequence_length'], batch_size=configs['training']['batch_size'], normalise=configs['data']['normalise']), epochs=configs['training']['epochs'], batch_size=configs['training']['batch_size'], steps_per_epoch=steps_per_epoch, save_dir=configs['model']['save_dir'], tocheckpoint=tocheckpoint, ID=ID) print('[Model] Training All Finished') else: model.load_model(configs['mode']['train_file_path']) #===== predict =====# print('[Prediction]Start to predict and rank') ranklist = [] for ID in IDs: print('predicting %s'.format(ID)) filename = str(ID) + '.csv' data = DataLoader(filename=os.path.join('data', filename), split=configs['data']['train_test_split'], cols=configs['data']['columns'], test_only=False) x_test, y_test = data.get_test_data( seq_len=configs['data']['sequence_length'], normalise=configs['data']['normalise']) predictions = model.predict_point_by_point(x_test) test_score = score(y_true=y_test, y_pred=predictions) ranklist.append((ID, *test_score)) ranklist.sort(key=lambda x: x[1]) with open("ranklist.csv", "w", newline="") as f: writer = csv.writer(f) writer.writerows(ranklist) return #====================================================# #==================== single task ===================# if not os.path.exists(configs['model']['save_dir']): os.makedirs(configs['model']['save_dir']) data = DataLoader( os.path.join('data', configs['data']['filename']), configs['data']['train_test_split'], configs['data']['columns'], configs['mode']['test_only'] ############################# ) model = Model() if configs['mode']['test_only'] == True: model.load_model(configs['mode']['test_file_path']) else: if configs['mode']['train_new_model'] == True: model.build_model(configs) else: model.load_model(configs['mode']['train_file_path']) x, y = data.get_train_data(seq_len=configs['data']['sequence_length'], normalise=configs['data']['normalise']) ''' # in-memory training model.train( x, y, epochs = configs['training']['epochs'], batch_size = configs['training']['batch_size'], save_dir = configs['model']['save_dir'] ) ''' # out-of memory generative training steps_per_epoch = math.ceil( (data.len_train - configs['data']['sequence_length']) / configs['training']['batch_size']) model.train_generator(data_gen=data.generate_train_batch( seq_len=configs['data']['sequence_length'], batch_size=configs['training']['batch_size'], normalise=configs['data']['normalise']), epochs=configs['training']['epochs'], batch_size=configs['training']['batch_size'], steps_per_epoch=steps_per_epoch, save_dir=configs['model']['save_dir'], mode=configs['mode']) x_test, y_test = data.get_test_data( seq_len=configs['data']['sequence_length'], normalise=configs['data']['normalise']) #predictions = model.predict_sequences_multiple(x_test, configs['data']['sequence_length'], configs['data']['prediction_length']) #predictions = model.predict_sequence_full(x_test, configs['data']['sequence_length']) predictions = model.predict_point_by_point(x_test) test_score = score(y_true=y_test, y_pred=predictions) # plot_results_multiple(predictions, y_test, configs['data']['prediction_length']) plot_results(predictions, y_test)
def main(): configs = json.load(open('config.json', 'r')) if not os.path.exists(configs['model']['save_dir']): os.makedirs(configs['model']['save_dir']) data = DataLoader(os.path.join('data', configs['data']['filename']), configs['data']['train_test_split'], configs['data']['columns']) model = Model() # model.build_model(configs) model.load_model("saved_models/dow_30_50%.h5") x, y = data.get_train_data(seq_len=configs['data']['sequence_length'], normalise=configs['data']['normalise']) # out-of memory generative training steps_per_epoch = math.ceil( (data.len_train - configs['data']['sequence_length']) / configs['training']['batch_size']) model.train_generator(data_gen=data.generate_train_batch( seq_len=configs['data']['sequence_length'], batch_size=configs['training']['batch_size'], normalise=configs['data']['normalise']), epochs=configs['training']['epochs'], batch_size=configs['training']['batch_size'], steps_per_epoch=steps_per_epoch, save_dir=configs['model']['save_dir']) x_test, y_test = data.get_test_data( seq_len=configs['data']['sequence_length'], normalise=configs['data']['normalise']) print("x_test.shape") print(x_test.shape) predictions = model.predict_point_by_point(x_test) ######################################################################## from sklearn.metrics import mean_squared_error # loss_final = mean_squared_error(predictions, y_test) # print("Testing Loss = " + str(loss_final)) ######################################################################## # plot_results_multiple(predictions, y_test, configs['data']['sequence_length']) print(predictions.shape) print(y_test.shape) m = pd.DataFrame(predictions) n = pd.DataFrame(y_test) m.to_csv("predictions.csv") n.to_csv("y_test.csv") p = 0 t = 0 t_1 = 0 count = 0 for a in range(len(predictions)): if (a == 0): t_1 = y_test[a] continue ''' 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 ''' p = predictions[a] t = y_test[a] match = (t - t_1) * (p - t_1) if (match > 0): count += 1 t_1 = t print("Good prediction rate = " + str(count / len(predictions))) plot_results(predictions, y_test)
def main(): configs = json.load(open('config.json', 'r')) if not os.path.exists(configs['model']['save_dir']): os.makedirs(configs['model']['save_dir']) data = DataLoader(os.path.join('data', configs['data']['filename']), configs['data']['train_test_split'], configs['data']['columns']) lossesMINE = [] lossesKERAS = [] # for day_prediction in [1, 2, 3, 4, 5, 10, 50]: day_prediction = 10 print("Predicting %i days..." % day_prediction) model = Model() model.build_model(configs) x, y = data.get_train_data(seq_len=configs['data']['sequence_length'], normalise=configs['data']['normalise'], day_pred=day_prediction) # in-memory training model.train(x, y, epochs=configs['training']['epochs'], batch_size=configs['training']['batch_size'], save_dir=configs['model']['save_dir']) # out-of memory generative training # steps_per_epoch = math.ceil((data.len_train - configs['data']['sequence_length']) / configs['training']['batch_size']) # model.train_generator( # data_gen=data.generate_train_batch( # seq_len=configs['data']['sequence_length'], # batch_size=configs['training']['batch_size'], # normalise=configs['data']['normalise'], # day_pred=day_prediction # ), # epochs=configs['training']['epochs'], # batch_size=configs['training']['batch_size'], # steps_per_epoch=steps_per_epoch, # save_dir=configs['model']['save_dir'] # ) x_test, y_test = data.get_test_data( seq_len=configs['data']['sequence_length'], normalise=configs['data']['normalise'], day_pred=day_prediction) # print(x_test.shape) # print(len(data.denormalization_vals)) # print(y_test.shape) #predictions = model.predict_sequences_multiple(x_test, configs['data']['sequence_length'], configs['data']['sequence_length']) #predictions = model.predict_sequence_full(x_test, configs['data']['sequence_length']) predictions = model.predict_point_by_point(x_test) # # y_test_unormalized = np.zeros((y_test.shape[0], )) # prediction_unormalized = [] # # for i in range(4): # for j in range(int(configs['data']['sequence_length']) - 10): # y_test_unormalized[j*(i+1)] = (y_test[j] + 1)*data.data_test[i*int(configs['data']['sequence_length']), 0] # prediction_unormalized.append((predictions[j*(i+1)] + 1)*data.data_test[i*int(configs['data']['sequence_length']), 0]) npPredictions = np.asarray(predictions) # print(type(npPredictions)) # print(type(y_test)) # print(npPredictions.shape) # print(y_test.shape) loss = 0 for i in range(len(npPredictions)): loss += (npPredictions[i] - y_test[i])**2 print(loss) keras_loss = model.model.evaluate(x_test, y_test) print(keras_loss) lossesMINE.append(loss) lossesKERAS.append(keras_loss) #plot_results_multiple(predictions, y_test, configs['data']['sequence_length']) real_y = np.reshape(y_test, (y_test.shape[0], )) * np.asarray( data.denormalization_vals) + np.asarray(data.denormalization_vals) real_pred = predictions * np.asarray( data.denormalization_vals) + np.asarray(data.denormalization_vals) # print(real_y.shape) # print(real_pred.shape) data.denormalization_vals = [] #plot_results(predictions, y_test) plot_results(real_pred, real_y) print(lossesMINE) print(lossesKERAS)
def main(): configs = json.load(open('config.json', 'r')) if not os.path.exists(configs['model']['save_dir']): os.makedirs(configs['model']['save_dir']) model = Model() model.build_model(configs) #get live sensor data from Arduino and predict next 10 sensor data sensor_port = serial.Serial('COM7', 9600) sensor_port.close() sensor_port.open() seq_len = configs['data']['sequence_length'], sensor_data = [] predictions_data = [] live_data = np.arange(seq_len[0] - 1) plt.ion() #real time graph while True: i = 0 while i < seq_len[0] - 1: # store incoming data to testing data array b = sensor_port.readline() # read a byte string live_data[i] = float(b.decode()) sensor_data.append(live_data[i]) i += 1 sensor_struct_data = live_data[ np.newaxis, :, np.newaxis] #contruct live data for LSTM predictions = model.predict_sequence_live( sensor_struct_data, configs['data']['sequence_length'] ) #Shift the window by 1 new prediction each time, re-run predictions on new window predictions_data.append(predictions) plot_results(predictions_data[-120:], sensor_data[-100:]) plt.show() plt.pause(0.1) #critical to display continous img #predict every 10 seq_len #if len(sensor_data) > 1 * seq_len[0]: #train every 100 seq_len if len(sensor_data) > 10 * seq_len[0]: np.savetxt('data\sensor.csv', sensor_data, delimiter=',', header='sensor_value') #load data for training data = DataLoader( os.path.join('data', configs['data']['filename']), configs['data']['train_test_split'], configs['data']['columns']) x, y = data.get_train_data( seq_len=configs['data']['sequence_length'], normalise=configs['data']['normalise']) # in-memory training model.train(x, y, epochs=configs['training']['epochs'], batch_size=configs['training']['batch_size'], save_dir=configs['model']['save_dir']) sensor_data = sensor_data[-100:]