with torch.no_grad(): for x, y in tqdm(test_dataloader, ascii=True, total=len(test_dataloader)): x = x.to(device) out = model(x) result.append(out.cpu().detach().numpy()) return np.array(result) if __name__ == "__main__": with open(test_data_path, 'rb') as f: test_data = pickle.load(f) test_dataset = StockDataset(test_data) test_dataloader = DataLoader( test_dataset, shuffle=False, batch_size=1, num_workers=1 ) model = torch.load(model_path).to(device) result = test(model, test_dataloader) y = test_data[1] print(compute_difference(result, y)) draw.draw_result(result, y)
# set network hyperparameters TRAIN = 0.8 WINDOW_SIZE = 50 SMA_OR_EMA = 2 # 0 = use Simple Moving Average, 1 = use Exponential Moving Average, any other number = else don't use either SMA or EMA SMOOTHING_WINDOW_SIZE = 26 # set up model model = StockPredictor(hidden_size = MODEL_HIDDEN_SIZE).to(device) model.load_state_dict(torch.load(os.path.join("models", MODEL_LOAD_NAME))) # determine which OOD stocks to use ood_stock_fns = ["acbi.us.txt"]#, "hscz.us.txt", "qvcb.us.txt", "qsr.us.txt"] # ["acbi.us.txt"] # preprocess the dataset stock_windows = StockPreprocessor(stock_fns = ood_stock_fns, window_size = WINDOW_SIZE, train = TRAIN, sma_or_ema = SMA_OR_EMA, smoothing_window_size = SMOOTHING_WINDOW_SIZE).get_all_data() dataset = StockDataset(stock_windows = stock_windows) # set up hyperparameters loss_func = nn.L1Loss(reduction = 'mean').to(device) loader = data.DataLoader(dataset, batch_size = 1, shuffle = False) # test the model avg_loss = 0.0 predictions = [] ground_truth = [] for batch_id, samples in enumerate(loader): # iterate over batches # input prices and ground-truth price prediction prices = samples['prices'].to(device) labels = samples['labels'].to(device) # make predictions and calculate loss
print(f"epoch {epoch+1} loss: {epoch_loss:.4f}") print("save model.....") torch.save(model, 'best.model') print("end") return epoch_loss_h if __name__ == "__main__": with open(data_path, 'rb') as f: train_data = pickle.load(f) train_dataset = StockDataset(train_data) train_dataloader = DataLoader(train_dataset, shuffle=False, batch_size=10, num_workers=1) model = Model(input_dim=5, hidden_dim=64, num_layers=2, output_dim=1, dropout=0.5).to(device) print(model) print([name for name, _ in model.named_parameters()])
# train = 0.8, window = 25, epochs = 20, batch size = 1, hidden size = 200, lr = 0.0005, no moving average, smoothing window = N/A, normalization window size = 100, 6 stocks => 0.058 L1 train loss, 0.058 test loss ('sum' reduction) # train = 0.8, window = 25, epochs = 20, batch size = 1, hidden size = 200, lr = 0.0005, no moving average, smoothing window = N/A, normalization window size = 1500, 6 stocks => 0.058 L1 train loss, 0.058 test loss ('sum' reduction) # set up dataset and model stock_fns = [ "aa.us.txt", "msft.us.txt", "goog.us.txt", "gpic.us.txt", "rfdi.us.txt", "aal.us.txt" ] # chosen somewhat randomly model = StockPredictor(hidden_size=HIDDEN_SIZE).to(device) train_windows, test_windows = StockPreprocessor( stock_fns=stock_fns, window_size=WINDOW_SIZE, train=TRAIN, sma_or_ema=SMA_OR_EMA, smoothing_window_size=SMOOTHING_WINDOW_SIZE).get_splits() train_dataset = StockDataset(stock_windows=train_windows) test_dataset = StockDataset(stock_windows=test_windows) # (OPTIONAL) uncomment to plot the stock data -- NOTE: only plots the first stock's history if a list of stocks is provided #train_dataset.plot_stock_raw() #test_dataset.plot_stock_raw() # load pre-trained model weights if MODEL_LOAD_NAME is not None: model.load_state_dict(torch.load(os.path.join("models", MODEL_LOAD_NAME))) # set up hyperparameters optimizer = optim.Adam(model.parameters(), lr=LEARNING_RATE) loss_func = nn.L1Loss(reduction='mean').to( device) #nn.MSELoss(reduction = 'sum').to(device) train_loader = data.DataLoader(train_dataset,
def export_metrics(self, data, model, result_df, period): (X_train, y_train), (X_test, y_test), (X_valid, y_valid) = data train_df, valid_df, test_df = result_df train_dataset = StockDataset(X_train, y_train) valid_dataset = StockDataset(X_valid, y_valid) test_dataset = StockDataset(X_test, y_test) train_data_loader = DataLoader(train_dataset) valid_data_loader = DataLoader(valid_dataset) test_data_loader = DataLoader(test_dataset) # Pass train set through model train_pred_df, train_label_df, train_pred_proba_df = self.model_passing( train_data_loader, train_df, model) valid_pred_df, valid_label_df, valid_pred_proba_df = self.model_passing( valid_data_loader, valid_df, model) test_pred_df, test_label_df, test_pred_proba_df = self.model_passing( test_data_loader, test_df, model) # Export raw raw_output_filename = os.path.join(self.config.checkpoint_dir, self.config.directory, period, self.config.raw_output_dir) os.mkdir(raw_output_filename) train_pred_df.to_csv( os.path.join(raw_output_filename, 'train_pred_df.csv')) valid_pred_df.to_csv( os.path.join(raw_output_filename, 'valid_pred_df.csv')) test_pred_df.to_csv( os.path.join(raw_output_filename, 'test_pred_df.csv')) train_label_df.to_csv( os.path.join(raw_output_filename, 'train_label_df.csv')) valid_label_df.to_csv( os.path.join(raw_output_filename, 'valid_label_df.csv')) test_label_df.to_csv( os.path.join(raw_output_filename, 'test_label_df.csv')) train_pred_proba_df.to_csv( os.path.join(raw_output_filename, 'train_pred_proba_df.csv')) valid_pred_proba_df.to_csv( os.path.join(raw_output_filename, 'valid_pred_proba_df.csv')) test_pred_proba_df.to_csv( os.path.join(raw_output_filename, 'test_pred_proba_df.csv')) # Export classification report flat_train_pred, flat_train_label = train_pred_df.values.flatten( ).astype(int), train_label_df.values.flatten().astype(int) flat_valid_pred, flat_valid_label = valid_pred_df.values.flatten( ).astype(int), valid_label_df.values.flatten().astype(int) flat_test_pred, flat_test_label = test_pred_df.values.flatten().astype( int), test_label_df.values.flatten().astype(int) train_report = pd.DataFrame( classification_report(flat_train_label, flat_train_pred, output_dict=True)).transpose() valid_report = pd.DataFrame( classification_report(flat_valid_label, flat_valid_pred, output_dict=True)).transpose() test_report = pd.DataFrame( classification_report(flat_test_label, flat_test_pred, output_dict=True)).transpose() report_filename = os.path.join(self.config.checkpoint_dir, self.config.directory, period, self.config.report_dir) os.mkdir(report_filename) train_report.to_csv(os.path.join(report_filename, 'train_report.csv')) valid_report.to_csv(os.path.join(report_filename, 'valid_report.csv')) test_report.to_csv(os.path.join(report_filename, 'test_report.csv')) # Export Confusion Matrix train_conf = pd.DataFrame( confusion_matrix(flat_train_label, flat_train_pred)).transpose() valid_conf = pd.DataFrame( confusion_matrix(flat_valid_label, flat_valid_pred)).transpose() test_conf = pd.DataFrame( confusion_matrix(flat_test_label, flat_test_pred)).transpose() conf_filename = os.path.join(self.config.checkpoint_dir, self.config.directory, period, self.config.confusion_mat_dir) os.mkdir(conf_filename) train_conf.to_csv(os.path.join(conf_filename, 'train_conf.csv')) valid_conf.to_csv(os.path.join(conf_filename, 'valid_conf.csv')) test_conf.to_csv(os.path.join(conf_filename, 'test_conf.csv'))
def main(): config = get_args() logger = set_logger(config) dataset = StockDataset(config) config.num_relations = dataset.num_relations config.num_companies = dataset.num_companies run_config = tf.ConfigProto() run_config.gpu_options.allow_growth = True model_name = config.model_type exp_name = '%s_%s_%s_%s_%s_%s_%s_%s' % ( config.data_type, model_name, str(config.test_phase), str(config.test_size), str(config.train_proportion), str( config.lr), str(config.dropout), str(config.lookback)) if not (os.path.exists(os.path.join(config.save_dir, exp_name))): os.makedirs(os.path.join(config.save_dir, exp_name)) sess = tf.Session(config=run_config) model = init_prediction_model(config) init = tf.group(tf.global_variables_initializer(), tf.local_variables_initializer()) sess.run(init) def model_summary(logger): model_vars = tf.trainable_variables() slim.model_analyzer.analyze_vars(model_vars, print_info=True) model_summary(logger) #Training evaluator = Evaluator(config, logger) trainer = Trainer(sess, model, dataset, config, logger, evaluator) trainer.train() #Testing loader = tf.train.Saver(max_to_keep=None) loader.restore( sess, tf.train.latest_checkpoint(os.path.join(config.save_dir, exp_name))) print("load best evaluation model") test_loss, report_all, report_topk = evaluator.evaluate( sess, model, dataset, 'test', trainer.best_f1['neighbors']) te_pred_rate, te_acc, te_cpt_acc, te_mac_f1, te_mic_f1, te_exp_rt = report_all logstr = 'EPOCH {} TEST ALL \nloss : {:2.4f} accuracy : {:2.4f} hit ratio : {:2.4f} pred_rate : {} macro f1 : {:2.4f} micro f1 : {:2.4f} expected return : {:2.4f}'\ .format(trainer.best_f1['epoch'],test_loss,te_acc,te_cpt_acc,te_pred_rate,te_mac_f1,te_mic_f1,te_exp_rt) logger.info(logstr) te_pred_rate, te_acc, te_cpt_acc, te_mac_f1, te_mic_f1, te_exp_rt = report_topk logstr = 'EPOCH {} TEST TopK \nloss : {:2.4f} accuracy : {:2.4f} hit ratio : {:2.4f} pred_rate : {} macro f1 : {:2.4f} micro f1 : {:2.4f} expected return : {:2.4f}'\ .format(trainer.best_f1['epoch'],test_loss,te_acc,te_cpt_acc,te_pred_rate,te_mac_f1,te_mic_f1,te_exp_rt) logger.info(logstr) #Print Log with open('%s_log.log' % model_name, 'a') as out_: out_.write("%d phase\n" % (config.test_phase)) out_.write( "%f\t%f\t%f\t%f\t%f\t%s\t%f\t%f\t%f\t%f\t%f\t%s\t%d\n" % (report_all[1], report_all[2], report_all[3], report_all[4], report_all[5], str(report_all[0]), report_topk[1], report_topk[2], report_topk[3], report_topk[4], report_topk[5], str( report_topk[0]), trainer.best_f1['epoch']))