def get_model(model_name, device): if model_name == "LSTM": model = LSTM(input_size=NUM_FEAT, hidden_size=500, output_size=len(classes), num_layers=2, bi=False).to(device) elif model_name == "BiLSTM": model = LSTM(input_size=NUM_FEAT, hidden_size=500, output_size=len(classes), num_layers=2, bi=True).to(device) elif model_name == "GRU": model = GRU(input_size=NUM_FEAT, hidden_size=500, output_size=len(classes), num_layers=2, bi=False).to(device) elif model_name == "BiGRU": model = GRU(input_size=NUM_FEAT, hidden_size=500, output_size=len(classes), num_layers=2, bi=True).to(device) elif model_name == "NN": model = NN(input_size=NUM_FEAT * SEQ_LENGTH, output_size=len(classes)).to(device) if os.path.exists(os.sep.join([WEIGHTS_DIR, model_name + ".pt"])): model.load_state_dict( torch.load(os.sep.join([WEIGHTS_DIR, model_name + ".pt"]))) else: model.apply(init_weights) return model
def get_best_model(model_type: str) -> nn.Module: model: nn.Module = None if model_type == 'RNN': model = RNN(emb_size=200, hidden_size=1500, seq_len=35, batch_size=20, vocab_size=vocab_size, num_layers=2, dp_keep_prob=0.35) model.load_state_dict( torch.load('./4_1_a/best_params.pt', map_location=device)) elif model_type == 'GRU': model = GRU(emb_size=200, hidden_size=1500, seq_len=35, batch_size=20, vocab_size=vocab_size, num_layers=2, dp_keep_prob=0.35) model.load_state_dict( torch.load('./4_1_b/best_params.pt', map_location=device)) elif model_type == 'TRANSFORMER': model = TRANSFORMER(vocab_size=vocab_size, n_units=512, n_blocks=6, dropout=1. - 0.9) model.batch_size = 128 model.seq_len = 35 model.vocab_size = vocab_size model.load_state_dict(torch.load('./4_1_c/best_params.pt')) return model
def _load_model(model_type): emb_size = 200 hidden_size = 1500 seq_len = 35 # 70 batch_size = 20 vocab_size = 10000 num_layers = 2 dp_keep_prob = 0.35 # Load model (Change to RNN if you want RNN to predict) if model_type == 'RNN': model = RNN(emb_size, hidden_size, seq_len, batch_size, vocab_size, num_layers, dp_keep_prob) PATH = os.path.join("RNN_ADAM_0", "best_params.pt") else: model = GRU(emb_size, hidden_size, seq_len, batch_size, vocab_size, num_layers, dp_keep_prob) PATH = os.path.join("GRU_SGD_LR_SCHEDULE_0", "best_params.pt") if torch.cuda.is_available(): model.load_state_dict(torch.load(PATH)).cuda() model.eval() else: model.load_state_dict(torch.load(PATH, map_location='cpu')) model.eval() return model
def __init__(self, trajectories, model_dir=None, **kwargs): super().__init__(trajectories, **kwargs) DATA_PATH = Path( os.getenv("DATA_PATH", "/home/stud/grimmalex/datasets/")) OUT_PATH = Path( os.getenv("OUT_PATH", "/home/stud/grimmalex/thesis/output/")) if model_dir is None: model_dir = OUT_PATH / "gru-sim/first/ml-1m/gru/2" print("model dir is {}".format(model_dir)) data, agent, seed = str(model_dir).split("/")[-3:] if "ml" in data: data = "ml/{}".format(data) data_dir = DATA_PATH / data trajectory_file = data_dir / "test.csv" config = get_base_config(trajectory_file, Path(model_dir), 1) with open(model_dir / "hyperparameters.yaml", "r") as f: hyperparameters = yaml.load(f, yaml.Loader) main_key = list(hyperparameters.keys())[0] config.hyperparameters = hyperparameters[main_key] w2v_path = config.hyperparameters["Embedding"]["w2v_context_path"] if str(DATA_PATH) not in w2v_path: end_path = str(w2v_path).split("datasets/")[-1] w2v_path = DATA_PATH / end_path config.hyperparameters["Embedding"]["w2v_context_path"] = w2v_path agent = GRU(config) path = agent.model_saver.get_last_checkpoint_path() agent.load_pretrained_models(path) self.agent = agent self.reward_type = "list"
def generate_sequences(id_2_word, num_samples, model_type, emb_size, hidden_size, seq_len, batch_size, num_layers, dp_keep_prob, vocab_size, path): if model_type=='RNN': model = RNN(emb_size=emb_size, hidden_size=hidden_size, seq_len=seq_len, batch_size=batch_size, vocab_size=vocab_size, num_layers=num_layers, dp_keep_prob=dp_keep_prob) else: model = GRU(emb_size=emb_size, hidden_size=hidden_size, seq_len=seq_len, batch_size=batch_size, vocab_size=vocab_size, num_layers=num_layers, dp_keep_prob=dp_keep_prob) model.load_state_dict(torch.load(path)) model = model.to(device) hidden = nn.Parameter(torch.zeros(num_layers, num_samples, hidden_size)).to(device) input = torch.ones(10000)*1/1000 input = torch.multinomial(input, num_samples).to(device) output = model.generate(input, hidden, seq_len) f = open(model_type + '_generated_sequences' +'.txt','w') for i in range(num_samples): for j in range(seq_len): f.write(id_2_word.get(output[j,i].item())+' ') f.write('\n') f.close()
def train(): fluid.enable_dygraph(device) processor = SentaProcessor(data_dir=args.data_dir, vocab_path=args.vocab_path, random_seed=args.random_seed) num_labels = len(processor.get_labels()) num_train_examples = processor.get_num_examples(phase="train") max_train_steps = args.epoch * num_train_examples // args.batch_size // dev_count train_data_generator = processor.data_generator( batch_size=args.batch_size, padding_size=args.padding_size, places=device, phase='train', epoch=args.epoch, shuffle=False) eval_data_generator = processor.data_generator( batch_size=args.batch_size, padding_size=args.padding_size, places=device, phase='dev', epoch=args.epoch, shuffle=False) if args.model_type == 'cnn_net': model = CNN(args.vocab_size, args.batch_size, args.padding_size) elif args.model_type == 'bow_net': model = BOW(args.vocab_size, args.batch_size, args.padding_size) elif args.model_type == 'gru_net': model = GRU(args.vocab_size, args.batch_size, args.padding_size) elif args.model_type == 'bigru_net': model = BiGRU(args.vocab_size, args.batch_size, args.padding_size) optimizer = fluid.optimizer.Adagrad(learning_rate=args.lr, parameter_list=model.parameters()) inputs = [Input([None, None], 'int64', name='doc')] labels = [Input([None, 1], 'int64', name='label')] model.prepare(optimizer, CrossEntropy(), Accuracy(topk=(1, )), inputs, labels, device=device) model.fit(train_data=train_data_generator, eval_data=eval_data_generator, batch_size=args.batch_size, epochs=args.epoch, save_dir=args.checkpoints, eval_freq=args.eval_freq, save_freq=args.save_freq)
def create_model(): if args.model_type == 'cnn_net': model = CNN(args.vocab_size, args.padding_size) elif args.model_type == 'bow_net': model = BOW(args.vocab_size, args.padding_size) elif args.model_type == 'lstm_net': model = LSTM(args.vocab_size, args.padding_size) elif args.model_type == 'gru_net': model = GRU(args.vocab_size, args.padding_size) elif args.model_type == 'bigru_net': model = BiGRU(args.vocab_size, args.batch_size, args.padding_size) else: raise ValueError("Unknown model type!") return model
def infer(): fluid.enable_dygraph(device) processor = SentaProcessor(data_dir=args.data_dir, vocab_path=args.vocab_path, random_seed=args.random_seed) infer_data_generator = processor.data_generator( batch_size=args.batch_size, padding_size=args.padding_size, places=device, phase='infer', epoch=1, shuffle=False) if args.model_type == 'cnn_net': model_infer = CNN(args.vocab_size, args.batch_size, args.padding_size) elif args.model_type == 'bow_net': model_infer = BOW(args.vocab_size, args.batch_size, args.padding_size) elif args.model_type == 'gru_net': model_infer = GRU(args.vocab_size, args.batch_size, args.padding_size) elif args.model_type == 'bigru_net': model_infer = BiGRU(args.vocab_size, args.batch_size, args.padding_size) print('Do inferring ...... ') inputs = [Input([None, None], 'int64', name='doc')] model_infer.prepare(None, CrossEntropy(), Accuracy(topk=(1, )), inputs, device=device) model_infer.load(args.checkpoints, reset_optimizer=True) preds = model_infer.predict(test_data=infer_data_generator) preds = np.array(preds[0]).reshape((-1, 2)) if args.output_dir: with open(os.path.join(args.output_dir, 'predictions.json'), 'w') as w: for p in range(len(preds)): label = np.argmax(preds[p]) result = json.dumps({ 'index': p, 'label': label, 'probs': preds[p].tolist() }) w.write(result + '\n') print('Predictions saved at ' + os.path.join(args.output_dir, 'predictions.json'))
def _load_model(emb_size, hidden_size, seq_len, batch_size, vocab_size, num_layers, dp_keep_prob, PATH, model_type): # Load model (Change to RNN if you want RNN to predict) if model_type == 'RNN': model = RNN(emb_size, hidden_size, seq_len, batch_size, vocab_size, num_layers, dp_keep_prob) else: model = GRU(emb_size, hidden_size, seq_len, batch_size, vocab_size, num_layers, dp_keep_prob) if torch.cuda.is_available(): model.load_state_dict(torch.load(PATH)).cuda() model.eval() else: model.load_state_dict(torch.load(PATH, map_location='cpu')) model.eval() return model
def make_my_model(model_name, device, seq_len=35, batch_size=20, pt=None): # --model=RNN --optimizer=ADAM --initial_lr=0.0001 --batch_size=20 --seq_len=35 --hidden_size=1500 --num_layers=2 --dp_keep_prob=0.35 --save_best # --model=GRU --optimizer=SGD_LR_SCHEDULE --initial_lr=10 --batch_size=20 --seq_len=35 --hidden_size=1500 --num_layers=2 --dp_keep_prob=0.35 --save_best # --model=TRANSFORMER --optimizer=SGD_LR_SCHEDULE --initial_lr=20 --batch_size=128 --seq_len=35 --hidden_size=512 --num_layers=6 --dp_keep_prob=0.9 --save_best if model_name == 'RNN': model = RNN(emb_size=200, hidden_size=1500, seq_len=seq_len, batch_size=batch_size, vocab_size=vocab_size, num_layers=2, dp_keep_prob=0.35) elif model_name == 'GRU': model = GRU(emb_size=200, hidden_size=1500, seq_len=seq_len, batch_size=batch_size, vocab_size=vocab_size, num_layers=2, dp_keep_prob=0.35) elif model_name == 'TRANSFORMER': model = TRANSFORMER(vocab_size=vocab_size, n_units=512, n_blocks=6, dropout=1. - 0.9) # these 3 attributes don't affect the Transformer's computations; # they are only used in run_epoch model.batch_size = 128 model.seq_len = 35 model.vocab_size = vocab_size else: print("ERROR: Model type not recognized.") return # Model to device model = model.to(device) # Load pt if pt is not None: model.load_state_dict(torch.load(pt, map_location=device)) return model
def load_model(model_info, device, vocab_size, emb_size=200, load_on_device=True): params_path = model_info.get_params_path() if model_info.model == 'RNN': model = RNN(emb_size=emb_size, hidden_size=model_info.hidden_size, seq_len=model_info.seq_len, batch_size=model_info.batch_size, vocab_size=vocab_size, num_layers=model_info.num_layers, dp_keep_prob=model_info.dp_keep_prob) elif model_info.model == 'GRU': model = GRU(emb_size=emb_size, hidden_size=model_info.hidden_size, seq_len=model_info.seq_len, batch_size=model_info.batch_size, vocab_size=vocab_size, num_layers=model_info.num_layers, dp_keep_prob=model_info.dp_keep_prob) else: model = TRANSFORMER(vocab_size=vocab_size, n_units=model_info.hidden_size, n_blocks=model_info.num_layers, dropout=1. - model_info.dp_keep_prob) model.batch_size = model_info.batch_size model.seq_len = model_info.seq_len model.vocab_size = vocab_size if load_on_device: model = model.to(device) model.load_state_dict(torch.load(params_path, map_location=device)) return model
def __init__(self, input_size, st_size, hidden_size, output_size, bounded=-1, lr=.001): super().__init__() self.input_size = input_size self.hidden_size = hidden_size self.output_size = output_size self.st_size = st_size self.lr = lr self.fc0_layer = nn.Linear(input_size, hidden_size) self.fc1_layer = nn.Linear(hidden_size, hidden_size) self.rnn_layer = GRU(hidden_size, st_size) self.fc3_layer = nn.Linear(st_size, hidden_size) self.fc4_layer = nn.Linear(hidden_size, output_size) self.optimizer = torch.optim.Adam(self.parameters(), lr=self.lr) self.bounded = bounded
def predict(id_2_word, seq_length=35, batch_size=20, load_GRU=True): if load_GRU: model = GRU(200, 1500, 35, 20, 10000, 2, 0.35)#(emb_size=350, hidden_size=1500, seq_len=35 batch_size=20, vocab_size=10000, num_layers=2, dp_keep_prob=0.35) model.load_state_dict(torch.load("model\\best_GRU.pt")) model.eval() filename = "predictions\\GRU_"+ str(seq_length) + ".txt" else: model = RNN(200, 1500, 35, 20, 10000, 2, 0.35) model.load_state_dict(torch.load("model\\best_RNN.pt")) model.eval() filename = "predictions\\RNN_"+ str(seq_length) + ".txt" hidden = model.init_hidden() random_input = torch.randint(10000, (batch_size,)) samples = model.generate(random_input, hidden, seq_length, batch_size) sequence = [[" " for j in range(seq_length)] for i in range(batch_size)] for i in range(batch_size): for j in range(seq_length): sequence[i][j] = id_2_word[samples[j, i].item()] save_prediction(sequence, seq_length, filename) return sequence
for m in range(len(model_types)): print("\n########## Running Main Loop ##########################") train_ppls = [] train_losses = [] val_ppls = [] val_losses = [] best_val_so_far = np.inf times = [] if model_types[m]=='RNN': model = RNN(emb_size=embSize[m], hidden_size=hiddenSize[m], seq_len=seqLen[m], batch_size=batchSize[m], vocab_size=vocab_size, num_layers=numLayers[m], dp_keep_prob=dropOut[m]) elif model_types[m]=='GRU': model =GRU(emb_size=embSize[m], hidden_size=hiddenSize[m], seq_len=seqLen[m], batch_size=batchSize[m], vocab_size=vocab_size, num_layers=numLayers[m], dp_keep_prob=dropOut[m]) else: model=TRANSFORMER(vocab_size=vocab_size,n_units=hiddenSize[m], n_blocks=numLayers[m],dropout=1-dropOut[m]) model.load_state_dict(torch.load(path[m])) model.batch_size=batchSize[m] model.seq_len=seqLen[m] model.vocab_size=vocab_size model = model.to(device) # MAIN LOOP val_loss = run_epoch(model, valid_data,model_types[m]) total_loss[m,:]=val_loss time=np.arange(1,seqLen[m]+1) print('Plotting graph...')
def main(args): # Fix Seed for Reproducibility # random.seed(args.seed) np.random.seed(args.seed) torch.manual_seed(args.seed) torch.cuda.manual_seed(args.seed) # Samples, Weights, and Plots Path # paths = [args.weights_path, args.plots_path, args.numpy_path] for path in paths: make_dirs(path) # Prepare Data # data = load_data(args.combined_path, args.which_data, args.preprocess, args.resample)[[args.feature]] id = args.which_data.split('_')[0] print("Data of {} is successfully Loaded!".format(args.which_data)) # Plot Time-series Data # if args.plot: plot_full(args.plots_path, data, id, args.feature) plot_split(args.plots_path, data, id, args.valid_start, args.test_start, args.feature) # Min-Max Scaler # scaler = MinMaxScaler() data[args.feature] = scaler.fit_transform(data) # Split the Dataset # copied_data = data.copy() if args.multi_step: X, y = split_sequence_multi_step(copied_data, args.window, args.output_size) else: X, y = split_sequence_uni_step(copied_data, args.window) # Get Data Loader # train_loader, val_loader, test_loader = get_data_loader( X, y, args.train_split, args.test_split, args.batch_size) # Constants # best_val_loss = 100 best_val_improv = 0 # Lists # train_losses, val_losses = list(), list() val_maes, val_mses, val_rmses, val_mapes, val_mpes, val_r2s = list(), list( ), list(), list(), list(), list() test_maes, test_mses, test_rmses, test_mapes, test_mpes, test_r2s = list( ), list(), list(), list(), list(), list() # Prepare Network # if args.network == 'dnn': model = DNN(args.window, args.hidden_size, args.output_size).to(device) elif args.network == 'cnn': model = CNN(args.window, args.hidden_size, args.output_size).to(device) elif args.network == 'rnn': model = RNN(args.input_size, args.hidden_size, args.num_layers, args.output_size).to(device) elif args.network == 'lstm': model = LSTM(args.input_size, args.hidden_size, args.num_layers, args.output_size, args.bidirectional).to(device) elif args.network == 'gru': model = GRU(args.input_size, args.hidden_size, args.num_layers, args.output_size).to(device) elif args.network == 'recursive': model = RecursiveLSTM(args.input_size, args.hidden_size, args.num_layers, args.output_size).to(device) elif args.network == 'attentional': model = AttentionalLSTM(args.input_size, args.qkv, args.hidden_size, args.num_layers, args.output_size, args.bidirectional).to(device) else: raise NotImplementedError if args.mode == 'train': # If fine-tuning # if args.transfer_learning: model.load_state_dict( torch.load( os.path.join( args.weights_path, 'BEST_{}_Device_ID_12.pkl'.format( model.__class__.__name__)))) for param in model.parameters(): param.requires_grad = True # Loss Function # criterion = torch.nn.MSELoss() # Optimizer # optimizer = torch.optim.Adam(model.parameters(), lr=args.lr, betas=(0.5, 0.999)) optimizer_scheduler = get_lr_scheduler(optimizer, args) # Train and Validation # print( "Training {} started with total epoch of {} using Driver ID of {}." .format(model.__class__.__name__, args.num_epochs, id)) for epoch in range(args.num_epochs): # Train # for i, (data, label) in enumerate(train_loader): # Data Preparation # data = data.to(device, dtype=torch.float32) label = label.to(device, dtype=torch.float32) # Forward Data # pred = model(data) # Calculate Loss # train_loss = criterion(pred, label) # Back Propagation and Update # optimizer.zero_grad() train_loss.backward() optimizer.step() # Add items to Lists # train_losses.append(train_loss.item()) print("Epoch [{}/{}]".format(epoch + 1, args.num_epochs)) print("Train") print("Loss : {:.4f}".format(np.average(train_losses))) optimizer_scheduler.step() # Validation # with torch.no_grad(): for i, (data, label) in enumerate(val_loader): # Data Preparation # data = data.to(device, dtype=torch.float32) label = label.to(device, dtype=torch.float32) # Forward Data # pred_val = model(data) # Calculate Loss # val_loss = criterion(pred_val, label) val_mae = mean_absolute_error(label.cpu(), pred_val.cpu()) val_mse = mean_squared_error(label.cpu(), pred_val.cpu(), squared=True) val_rmse = mean_squared_error(label.cpu(), pred_val.cpu(), squared=False) # val_mpe = mean_percentage_error(label.cpu(), pred_val.cpu()) # val_mape = mean_absolute_percentage_error(label.cpu(), pred_val.cpu()) val_r2 = r2_score(label.cpu(), pred_val.cpu()) # Add item to Lists # val_losses.append(val_loss.item()) val_maes.append(val_mae.item()) val_mses.append(val_mse.item()) val_rmses.append(val_rmse.item()) # val_mpes.append(val_mpe.item()) # val_mapes.append(val_mape.item()) val_r2s.append(val_r2.item()) # Print Statistics # print("Validation") print("Loss : {:.4f}".format(np.average(val_losses))) print(" MAE : {:.4f}".format(np.average(val_maes))) print(" MSE : {:.4f}".format(np.average(val_mses))) print("RMSE : {:.4f}".format(np.average(val_rmses))) # print(" MPE : {:.4f}".format(np.average(val_mpes))) # print("MAPE : {:.4f}".format(np.average(val_mapes))) print(" R^2 : {:.4f}".format(np.average(val_r2s))) # Save the model only if validation loss decreased # curr_val_loss = np.average(val_losses) if curr_val_loss < best_val_loss: best_val_loss = min(curr_val_loss, best_val_loss) if args.transfer_learning: torch.save( model.state_dict(), os.path.join( args.weights_path, 'BEST_{}_Device_ID_{}_transfer.pkl'.format( model.__class__.__name__, id))) else: torch.save( model.state_dict(), os.path.join( args.weights_path, 'BEST_{}_Device_ID_{}.pkl'.format( model.__class__.__name__, id))) print("Best model is saved!\n") best_val_improv = 0 elif curr_val_loss >= best_val_loss: best_val_improv += 1 print("Best Validation has not improved for {} epochs.\n". format(best_val_improv)) if best_val_improv == 10: break elif args.mode == 'test': # Prepare Network # if args.transfer_learning: model.load_state_dict( torch.load( os.path.join( args.weights_path, 'BEST_{}_Device_ID_{}.pkl'.format( model.__class__.__name__, id)))) else: model.load_state_dict( torch.load( os.path.join( args.weights_path, 'BEST_{}_Device_ID_{}.pkl'.format( model.__class__.__name__, id)))) print("{} for Device ID {} is successfully loaded!".format( model.__class__.__name__, id)) with torch.no_grad(): for i, (data, label) in enumerate(test_loader): # Data Preparation # data = data.to(device, dtype=torch.float32) label = label.to(device, dtype=torch.float32) # Forward Data # pred_test = model(data) # Convert to Original Value Range # pred_test = pred_test.data.cpu().numpy() label = label.data.cpu().numpy() if not args.multi_step: label = label.reshape(-1, 1) pred_test = scaler.inverse_transform(pred_test) label = scaler.inverse_transform(label) # Calculate Loss # test_mae = mean_absolute_error(label, pred_test) test_mse = mean_squared_error(label, pred_test, squared=True) test_rmse = mean_squared_error(label, pred_test, squared=False) # test_mpe = mean_percentage_error(label, pred_test) # test_mape = mean_absolute_percentage_error(label, pred_test) test_r2 = r2_score(label, pred_test) # Add item to Lists # test_maes.append(test_mae.item()) test_mses.append(test_mse.item()) test_rmses.append(test_rmse.item()) # test_mpes.append(test_mpe.item()) # test_mapes.append(test_mape.item()) test_r2s.append(test_r2.item()) # Print Statistics # print("Test {}".format(model.__class__.__name__)) print(" MAE : {:.4f}".format(np.average(test_maes))) print(" MSE : {:.4f}".format(np.average(test_mses))) print("RMSE : {:.4f}".format(np.average(test_rmses))) # print(" MPE : {:.4f}".format(np.average(test_mpes))) # print("MAPE : {:.4f}".format(np.average(test_mapes))) print(" R^2 : {:.4f}".format(np.average(test_r2s))) # Derive Metric and Plot # if args.transfer_learning: test_plot(pred_test, label, args.plots_path, args.feature, id, model, transfer_learning=False) else: test_plot(pred_test, label, args.plots_path, args.feature, id, model, transfer_learning=False)
# This is where your model code will be called. You may modify this code # if required for your implementation, but it should not typically be necessary, # and you must let the TAs know if you do so. if args.model == 'RNN': model = RNN(emb_size=args.emb_size, hidden_size=args.hidden_size, seq_len=args.seq_len, batch_size=args.batch_size, vocab_size=vocab_size, num_layers=args.num_layers, dp_keep_prob=args.dp_keep_prob) elif args.model == 'GRU': model = GRU(emb_size=args.emb_size, hidden_size=args.hidden_size, seq_len=args.seq_len, batch_size=args.batch_size, vocab_size=vocab_size, num_layers=args.num_layers, dp_keep_prob=args.dp_keep_prob) elif args.model == 'TRANSFORMER': if args.debug: # use a very small model model = TRANSFORMER(vocab_size=vocab_size, n_units=16, n_blocks=2) else: # Note that we're using num_layers and hidden_size to mean slightly # different things here than in the RNNs. # Also, the Transformer also has other hyperparameters # (such as the number of attention heads) which can change it's behavior. model = TRANSFORMER(vocab_size=vocab_size, n_units=args.hidden_size, n_blocks=args.num_layers, dropout=1. - args.dp_keep_prob)
clip = 2925.4042227640757 elif model_name == 'lstm': hidden_size = 200 num_layers = 1 init_forget_bias = 1 lr = 0.00016654418947982137 weight_decay = 7.040822706204121e-05 dropout = 0.18404592540409914 clip = 4389.748805208904 # モデルのインスタンス作成 if model_name == 'sru': model = SRU(input_size, phi_size, r_size, cell_out_size, output_size, dropout=dropout, gpu=gpu) model.initWeight() elif model_name == 'gru': model = GRU(input_size, hidden_size, output_size, num_layers, dropout, gpu=gpu) model.initWeight(init_forget_bias) elif model_name == 'lstm': model = LSTM(input_size, hidden_size, output_size, num_layers, dropout, gpu=gpu) model.initWeight(init_forget_bias) if gpu == True: model.cuda() # loss, optimizerの定義 criterion = nn.CrossEntropyLoss() optimizer = optim.Adam(model.parameters(), lr=lr, weight_decay=weight_decay) ''' 訓練 ''' n_epochs = 400
def main(): parser = argparse.ArgumentParser(description='Start trainning MoCoGAN.....') parser.add_argument('--batch-size', type=int, default=16, help='set batch_size') parser.add_argument('--epochs', type=int, default=60000, help='set num of iterations') parser.add_argument('--pre-train', type=int, default=-1, help='set 1 when you use pre-trained models'), parser.add_argument('--img_size', type=int, default=96, help='set the input image size of frame'), parser.add_argument('--data', type=str, default='data', help='set the path for the direcotry containing dataset'), parser.add_argument('--channel', type=int, default=3, help='set the no. of channel of the frame'), parser.add_argument('--hidden', type=int, default=100, help='set the hidden layer size for gru'), parser.add_argument('--dc', type=int, default=50, help='set the size of motion vector'), parser.add_argument('--de', type=int, default=10, help='set the size of randomly generated epsilon'), parser.add_argument('--lr', type=int, default=0.0002, help='set the learning rate'), parser.add_argument('--beta', type=int, default=0.5, help='set the beta for the optimizer'), parser.add_argument('--trained_path', type=str, default='trained_models', help='set the path were to trained models are saved'), parser.add_argument('--T', type=int, default=16, help='set the no. of frames to be selected') args = parser.parse_args() batch_size = args.batch_size pre_train = args.pre_train img_size = args.img_size channel = args.channel d_E = args.de hidden_size = args.hidden d_C = args.dc os.environ['CUDA_VISIBLE_DEVICES'] = '0' args.device = torch.device('cuda:0') if torch.cuda.is_available() else 'cpu' cuda = 1 if torch.cuda.is_available() else -1 # Making required folder if not os.path.exists('./generated_videos'): os.makedirs('./generated_videos') if not os.path.exists('./trained_models'): os.makedirs('./trained_models') if not os.path.exists('./resized_data'): os.makedirs('./resized_data') T = args.T start_epoch = 1 seed = 0 np.random.seed(seed) if cuda == True: torch.cuda.set_device(0) videos, current_path = preprocess(args) num_vid = len(videos) d_M = d_E nz = d_C + d_M criterion = nn.BCELoss() # setup model # dis_i = Image_Discriminator(channel) dis_v = Video_Discriminator() gen_i = Generator(channel, nz) gru = GRU(d_E, hidden_size, gpu=cuda) gru.initWeight() # setup optimizer # lr = args.lr beta = args.beta optim_Di = optim.Adam(dis_i.parameters(), lr=lr, betas=(beta,0.999)) optim_Dv = optim.Adam(dis_v.parameters(), lr=lr, betas=(beta,0.999)) optim_Gi = optim.Adam(gen_i.parameters(), lr=lr, betas=(beta,0.999)) optim_GRU = optim.Adam(gru.parameters(), lr=lr, betas=(beta,0.999)) if cuda == True: dis_i.cuda() dis_v.cuda() gen_i.cuda() gru.cuda() criterion.cuda() trained_path = os.path.join(current_path, args.trained_path) video_lengths = [video.shape[1] for video in videos] if pre_train == True: checkpoint = torch.load(trained_path+'/last_state') start_epoch = checkpoint['epoch'] Gi_loss = checkpoint['Gi'] Gv_loss = checkpoint['Gv'] Dv_loss = checkpoint['Dv'] Di_loss = checkpoint['Di'] dis_i.load_state_dict(torch.load(trained_path + '/Image_Discriminator.model')) dis_v.load_state_dict(torch.load(trained_path + '/Video_Discriminator.model')) gen_i.load_state_dict(torch.load(trained_path + '/Generator.model')) gru.load_state_dict(torch.load(trained_path + '/GRU.model')) optim_Di.load_state_dict(torch.load(trained_path + '/Image_Discriminator.state')) optim_Dv.load_state_dict(torch.load(trained_path + '/Video_Discriminator.state')) optim_Gi.load_state_dict(torch.load(trained_path + '/Generator.state')) optim_GRU.load_state_dict(torch.load(trained_path + '/GRU.state')) print("Using Pre-trained model") def checkpoint(model, optimizer, epoch): state = {'epoch': epoch+1, 'Gi': Gi_loss, 'Gv': Gv_loss, 'Dv': Dv_loss, 'Di': Di_loss} torch.save(state, os.path.join(trained_path, 'last_state')) filename = os.path.join(trained_path, '%s' % (model.__class__.__name__)) torch.save(model.state_dict(), filename + '.model') torch.save(optimizer.state_dict(), filename + '.state') def generate_z(num_frame): eps = Variable(torch.randn(batch_size, d_E)) z_c = Variable(torch.randn(batch_size, 1, d_C)) z_c = z_c.repeat(1, num_frame, 1) if cuda == True: z_c, eps = z_c.cuda(), eps.cuda() # Initialising the hidden var for GRU gru.initHidden(batch_size) z_m = gru(eps, num_frame).transpose(1, 0) # print(z_m.shape) z = torch.cat((z_m, z_c), 2) # (batch_size, num_frame, nz) return z if pre_train == -1: Gi_loss = [] Gv_loss = [] Di_loss = [] Dv_loss = [] for epoch in range(start_epoch, args.epochs+1): start_time = time.time() real_videos = Variable(randomVideo(videos, batch_size, T)) # (batch_size, channel, T, img_size, img_size) if cuda == True: real_videos = real_videos.cuda() real_imgs = real_videos[:, :, np.random.randint(0, T), :, :] num_frame = video_lengths[np.random.randint(0, num_vid)] # Generate Z having num_frame no. of frames Z = generate_z(num_frame).view(batch_size,num_frame, nz, 1, 1) #print(Z.shape) Z = sample(Z, T).contiguous().view(batch_size*T, nz, 1, 1) # So that conv layers (nz, 1, 1) noise to (channel, img_size, img_size) image frame fake_vid = gen_i(Z).view(batch_size, T, channel, img_size, img_size) fake_vid = fake_vid.transpose(2, 1) # sample a fake image from fake_vid frames fake_img = fake_vid[: , :, np.random.randint(0, T), :, :] r_label = Variable(torch.FloatTensor(batch_size, 1).fill_(0.9)).to(args.device) f_label = Variable(torch.FloatTensor(batch_size, 1).fill_(0.0)).to(args.device) # Training Discriminators # Video Discriminator dis_v.zero_grad() outputs = dis_v(real_videos) loss = criterion(outputs, r_label) loss.backward() real_loss = loss outputs = dis_v(fake_vid.detach()) loss = criterion(outputs, f_label) loss.backward() fake_loss = loss dv_loss = real_loss + fake_loss optim_Dv.step() # Image Discriminator dis_i.zero_grad() r_outputs = dis_i(real_imgs) lossi = criterion(r_outputs, r_label) lossi.backward() real_lossi = lossi f_outputs = dis_i(fake_img.detach()) fake_lossi = criterion(f_outputs, f_label) fake_lossi.backward() di_loss = real_lossi + fake_lossi optim_Di.step() # Training Generator and GRU gen_i.zero_grad() gru.zero_grad() gen_outputs = dis_v(fake_vid) gv_loss = criterion(gen_outputs, r_label) gv_loss.backward(retain_graph=True) gen_out = dis_i(fake_img) gi_loss = criterion(gen_out, r_label) gi_loss.backward() optim_Gi.step() optim_GRU.step() Gi_loss.append(gi_loss.item()) Gv_loss.append(gv_loss.item()) Dv_loss.append(dv_loss.item()) Di_loss.append(di_loss.item()) end_time = time.time() if epoch % 100 == 0: print('[%d/%d] Time_taken: %f || Gi loss: %.3f || Gv loss: %.3f || Di loss: %.3f || Dv loss: %.3f'%(epoch, args.epochs, end_time-start_time, gi_loss, gv_loss, di_loss, dv_loss)) if epoch % 5000 == 0: checkpoint(dis_i, optim_Di, epoch) checkpoint(dis_v, optim_Dv, epoch) checkpoint(gen_i, optim_Gi, epoch) checkpoint(gru, optim_GRU, epoch) if epoch % 1000 == 0: save_video(fake_vid[0].data.cpu().numpy().transpose(1, 2, 3, 0), epoch, current_path) # Plot plt.plot(Gi_loss, label='Image Generator') plt.plot(Gv_loss, label='Video Generator') plt.plot(Di_loss, label='Image Discriminator') plt.plot(Dv_loss, label='Video Discriminator') plt.legend() plt.savefig("plot.png")
print("Generation:") raw_data = ptb_raw_data(data_path=DATAPATH) train_data, valid_data, test_data, word_to_id, id_2_word = raw_data vocab_size = len(word_to_id) RNN = RNN(emb_size=200, hidden_size=1500, seq_len=0, batch_size=20, num_layers=2, vocab_size=vocab_size, dp_keep_prob=0.35).to(device) GRU = GRU(emb_size=200, hidden_size=1500, seq_len=0, batch_size=20, num_layers=2, vocab_size=vocab_size, dp_keep_prob=0.35).to(device) for seq_len in seq_lens: print("Sequence length: ", seq_len) #RNN output #Load "Best params model" RNN.seq_len = seq_len RNN.load_state_dict( torch.load(RNN_bestparams_path, map_location=device)) RNN_generation = generation(RNN, train_data, valid_data, test_data, word_to_id, id_2_word, seq_len, BatchSize) # print("RNN generated:") # print(RNN_generation) with open(os.path.join(OUTPUTPATH, 'RNN_%s_samples.txt' % (seq_len)),
def objective(args): global count count += 1 print( '-------------------------------------------------------------------') print('%d回目' % count) print(args) lr = args['l_rate'] weight_decay = args['weight_decay'] dropout = args['dropout'] clip = args['clip'] if mode == 'full': if model_name == 'sru': phi_size = int(args['phi_size']) r_size = int(args['r_size']) cell_out_size = int(args['cell_out_size']) elif model_name in ['gru', 'lstm']: hidden_size = int(args['hidden_size']) num_layers = int(args['num_layers']) init_forget_bias = args['init_forget_bias'] elif mode == 'limited': if model_name == 'sru': phi_size = 200 r_size = 60 cell_out_size = 200 elif model_name in ['gru', 'lstm']: hidden_size = 200 num_layers = 1 init_forget_bias = 1 train_X, test_X, train_y, test_y = load_mnist() input_size = train_X.shape[2] output_size = np.unique(train_y).size # モデルのインスタンスの作成 if model_name == 'sru': model = SRU(input_size, phi_size, r_size, cell_out_size, output_size, dropout=dropout, gpu=gpu) model.initWeight() elif model_name == 'gru': model = GRU(input_size, hidden_size, output_size, num_layers, dropout, gpu=gpu) model.initWeight(init_forget_bias) elif model_name == 'lstm': model = LSTM(input_size, hidden_size, output_size, num_layers, dropout, gpu=gpu) model.initWeight(init_forget_bias) if gpu == True: model.cuda() # loss, optimizerの定義 criterion = nn.CrossEntropyLoss() optimizer = optim.Adam(model.parameters(), lr=lr, weight_decay=weight_decay) ''' 訓練 ''' n_batches = train_X.shape[0] // batch_size n_batches_test = test_X.shape[0] // batch_size all_cost, all_acc = [], [] start_time = time.time() stop_count = 0 for epoch in range(n_epochs): train_cost, test_cost, train_acc, test_acc = 0, 0, 0, 0 train_X, train_y = shuffle(train_X, train_y, random_state=seed) # 訓練 model.train() train_X_t = np.transpose( train_X, (1, 0, 2)) # X.shape => (seq_len, n_samples, n_features) に変換 for i in range(n_batches): start = i * batch_size end = start + batch_size inputs, labels = train_X_t[:, start:end, :], train_y[start:end] inputs, labels = Variable(torch.from_numpy(inputs)), Variable( torch.from_numpy(labels)) if gpu == True: inputs, labels = inputs.cuda(), labels.cuda() cost, accuracy = train(model, inputs, labels, optimizer, criterion, clip) train_cost += cost / n_batches train_acc += accuracy / n_batches # 検証 model.eval() test_X_t = np.transpose(test_X, (1, 0, 2)) for i in range(n_batches_test): start = i * batch_size end = start + batch_size inputs, labels = test_X_t[:, start:end, :], test_y[start:end] inputs, labels = Variable(torch.from_numpy(inputs)), Variable( torch.from_numpy(labels)) if gpu == True: inputs, labels = inputs.cuda(), labels.cuda() cost, accuracy = test(model, inputs, labels, criterion) test_cost += cost / n_batches_test test_acc += accuracy / n_batches_test print( 'EPOCH:: %i, (%s) train_cost: %.3f, test_cost: %.3f, train_acc: %.3f, test_acc: %.3f' % (epoch + 1, timeSince(start_time), train_cost, test_cost, train_acc, test_acc)) # costが爆発したときに学習打ち切り if test_cost != test_cost or test_cost > 100000: print('Stop learning due to the extremely high cost') all_acc.append(test_acc) break # 5epochs連続でtest_costの減少が見られないとき早期打ち切り if len(all_cost) > 0 and test_cost >= all_cost[-1]: stop_count += 1 else: stop_count = 0 if stop_count == 5: print('Early stopping observing no learning') all_acc.append(test_acc) break # 過去のエポックのtest_accを上回った時だけモデルの保存 if len(all_acc) == 0 or test_acc > max(all_acc): checkpoint(model, optimizer, test_acc * 10000) all_cost.append(test_cost) all_acc.append(test_acc) print('max test_acc: %.3f' % max(all_acc)) # test_accの最大値をhyperoptに評価させる return -max(all_acc)
def main(): #print the config args print(config.transfer_learning) print(config.mode) print(config.input_size) # Fix Seed for Reproducibility # random.seed(config.seed) np.random.seed(config.seed) torch.manual_seed(config.seed) if torch.cuda.is_available(): torch.cuda.manual_seed(config.seed) # Samples, Weights, and Plots Path # paths = [config.weights_path, config.plots_path, config.numpy_path] for path in paths: make_dirs(path) # Prepare Data # data = load_data(config.combined_path, config.which_data, config.preprocess, config.resample) # id = config.which_data.split('_')[0] id = 12 #BOON added print("Data of {} is successfully Loaded!".format(config.which_data)) print(type(data)) print(data.shape) # Plot Time-series Data # if config.plot: plot_full(config.plots_path, data, id, config.feature) plot_split(config.plots_path, data, id, config.valid_start, config.test_start, config.feature) # Min-Max Scaler # scaler = MinMaxScaler() data.iloc[:,:] = scaler.fit_transform(data) print(type(data)) # Split the Dataset # train_X, train_Y, val_X, val_Y, test_X, test_Y, test_shifted = \ get_time_series_data_(data, config.valid_start, config.test_start, config.feature, config.label, config.window) print(train_X.shape) print(train_Y.shape) # Get Data Loader # train_loader, val_loader, test_loader = \ get_data_loader(train_X, train_Y, val_X, val_Y, test_X, test_Y, config.batch_size) # Constants # best_val_loss = 100 best_val_improv = 0 # Lists # train_losses, val_losses = list(), list() val_maes, val_mses, val_rmses, val_mapes, val_mpes, val_r2s = list(), list(), list(), list(), list(), list() # Prepare Network # if config.network == 'dnn': model = DNN(config.window, config.hidden_size, config.output_size).to(device) elif config.network == 'cnn': model = CNN(config.window, config.hidden_size, config.output_size).to(device) elif config.network == 'rnn': model = RNN(config.input_size, config.hidden_size, config.num_layers, config.output_size).to(device) elif config.network == 'lstm': model = LSTM(config.input_size, config.hidden_size, config.num_layers, config.output_size, config.bidirectional).to(device) elif config.network == 'gru': model = GRU(config.input_size, config.hidden_size, config.num_layers, config.output_size).to(device) elif config.network == 'recursive': model = RecursiveLSTM(config.input_size, config.hidden_size, config.num_layers, config.output_size).to(device) elif config.network == 'attentional': model = AttentionalLSTM(config.input_size, config.key, config.query, config.value, config.hidden_size, config.num_layers, config.output_size, config.bidirectional).to(device) else: raise NotImplementedError if config.mode == 'train': # If fine-tuning # print('config.TL = {}'.format(config.transfer_learning)) if config.transfer_learning: print('config.TL = {}'.format(config.transfer_learning)) print('TL: True') model.load_state_dict(torch.load(os.path.join(config.weights_path, 'BEST_{}_Device_ID_12.pkl'.format(config.network)))) for param in model.parameters(): param.requires_grad = True # Loss Function # criterion = torch.nn.MSELoss() # Optimizer # optimizer = torch.optim.Adam(model.parameters(), lr=config.lr, betas=(0.5, 0.999)) optimizer_scheduler = get_lr_scheduler(config.lr_scheduler, optimizer, config) # Train and Validation # print("Training {} started with total epoch of {} using Driver ID of {}.".format(config.network, config.num_epochs, id)) for epoch in range(config.num_epochs): # Train # for i, (data, label) in enumerate(train_loader): # Data Preparation # data = data.to(device, dtype=torch.float32) label = label.to(device, dtype=torch.float32) # Forward Data # pred = model(data) # Calculate Loss # train_loss = criterion(pred, label) # Back Propagation and Update # optimizer.zero_grad() train_loss.backward() optimizer.step() # Add items to Lists # train_losses.append(train_loss.item()) print("Epoch [{}/{}]".format(epoch+1, config.num_epochs)) print("Train") print("Loss : {:.4f}".format(np.average(train_losses))) optimizer_scheduler.step() # Validation # with torch.no_grad(): for i, (data, label) in enumerate(val_loader): # Data Preparation # data = data.to(device, dtype=torch.float32) label = label.to(device, dtype=torch.float32) # Forward Data # pred_val = model(data) # Calculate Loss # val_loss = criterion(pred_val, label) val_mae = mean_absolute_error(label.cpu(), pred_val.cpu()) val_mse = mean_squared_error(label.cpu(), pred_val.cpu(), squared=True) val_rmse = mean_squared_error(label.cpu(), pred_val.cpu(), squared=False) val_mpe = mean_percentage_error(label.cpu(), pred_val.cpu()) val_mape = mean_absolute_percentage_error(label.cpu(), pred_val.cpu()) val_r2 = r2_score(label.cpu(), pred_val.cpu()) # Add item to Lists # val_losses.append(val_loss.item()) val_maes.append(val_mae.item()) val_mses.append(val_mse.item()) val_rmses.append(val_rmse.item()) val_mpes.append(val_mpe.item()) val_mapes.append(val_mape.item()) val_r2s.append(val_r2.item()) # Print Statistics # print("Validation") print("Loss : {:.4f}".format(np.average(val_losses))) print(" MAE : {:.4f}".format(np.average(val_maes))) print(" MSE : {:.4f}".format(np.average(val_mses))) print("RMSE : {:.4f}".format(np.average(val_rmses))) print(" MPE : {:.4f}".format(np.average(val_mpes))) print("MAPE : {:.4f}".format(np.average(val_mapes))) print(" R^2 : {:.4f}".format(np.average(val_r2s))) # Save the model only if validation loss decreased # curr_val_loss = np.average(val_losses) if curr_val_loss < best_val_loss: best_val_loss = min(curr_val_loss, best_val_loss) # if config.transfer_learning: # torch.save(model.state_dict(), os.path.join(config.weights_path, 'BEST_{}_Device_ID_{}_transfer.pkl'.format(config.network, id))) # else: # torch.save(model.state_dict(), os.path.join(config.weights_path, 'BEST_{}_Device_ID_{}.pkl'.format(config.network, id))) if config.transfer_learning: torch.save(model.state_dict(), os.path.join(config.weights_path, 'BEST_{}_Device_ID_{}_transfer_BOON_reshaped.pkl'.format(config.network, id))) else: torch.save(model.state_dict(), os.path.join(config.weights_path, 'BEST_{}_Device_ID_{}_BOON_reshaped.pkl'.format(config.network, id))) print("Best model is saved!\n") best_val_improv = 0 elif curr_val_loss >= best_val_loss: best_val_improv += 1 print("Best Validation has not improved for {} epochs.\n".format(best_val_improv)) if best_val_improv == 10: break elif config.mode == 'test': # Prepare Network # if config.transfer_learning: model.load_state_dict(torch.load(os.path.join(config.weights_path, 'BEST_{}_Device_ID_{}_transfer_BOON_reshaped.pkl'.format(config.network, id)))) else: model.load_state_dict(torch.load(os.path.join(config.weights_path, 'BEST_{}_Device_ID_{}_BOON_reshaped.pkl'.format(config.network, id)))) print("{} for Device ID {} is successfully loaded!".format((config.network).upper(), id)) with torch.no_grad(): pred_test, labels = list(), list() for i, (data, label) in enumerate(test_loader): # Data Preparation # data = data.to(device, dtype=torch.float32) label = label.to(device, dtype=torch.float32) # Forward Data # pred = model(data) # Add items to Lists # pred_test += pred labels += label # Derive Metric and Plot # if config.transfer_learning: pred, actual = test(config.plots_path, id, config.network, scaler, pred_test, labels, test_shifted, transfer_learning=True) else: pred, actual = test(config.plots_path, id, config.network, scaler, pred_test, labels, test_shifted)
# MODEL SETUP # ############################################################################### if args["model"] == 'RNN': model = RNN(emb_size=args["emb_size"], hidden_size=args["hidden_size"], seq_len=args["seq_len"], batch_size=args["batch_size"], vocab_size=vocab_size, num_layers=args["num_layers"], dp_keep_prob=args["dp_keep_prob"]) elif args["model"] == 'GRU': model = GRU(emb_size=args["emb_size"], hidden_size=args["hidden_size"], seq_len=args["seq_len"], batch_size=args["batch_size"], vocab_size=vocab_size, num_layers=args["num_layers"], dp_keep_prob=args["dp_keep_prob"]) elif args["model"] == 'TRANSFORMER': if args["debug"]: # use a very small model model = TRANSFORMER(vocab_size=vocab_size, n_units=16, n_blocks=2) else: # Note that we're using num_layers and hidden_size to mean slightly # different things here than in the RNNs. # Also, the Transformer also has other hyperparameters # (such as the number of attention heads) which can change it's behavior. model = TRANSFORMER(vocab_size=vocab_size, n_units=args["hidden_size"],
def main(config): # Fix Seed # random.seed(config.seed) np.random.seed(config.seed) torch.manual_seed(config.seed) torch.cuda.manual_seed(config.seed) # Weights and Plots Path # paths = [config.weights_path, config.plots_path] for path in paths: make_dirs(path) # Prepare Data # data = load_data(config.which_data)[[config.feature]] data = data.copy() # Plot Time-Series Data # if config.plot_full: plot_full(config.plots_path, data, config.feature) scaler = MinMaxScaler() data[config.feature] = scaler.fit_transform(data) train_loader, val_loader, test_loader = \ data_loader(data, config.seq_length, config.train_split, config.test_split, config.batch_size) # Lists # train_losses, val_losses = list(), list() val_maes, val_mses, val_rmses, val_mapes, val_mpes, val_r2s = list(), list(), list(), list(), list(), list() test_maes, test_mses, test_rmses, test_mapes, test_mpes, test_r2s = list(), list(), list(), list(), list(), list() # Constants # best_val_loss = 100 best_val_improv = 0 # Prepare Network # if config.network == 'dnn': model = DNN(config.seq_length, config.hidden_size, config.output_size).to(device) elif config.network == 'cnn': model = CNN(config.seq_length, config.batch_size).to(device) elif config.network == 'rnn': model = RNN(config.input_size, config.hidden_size, config.num_layers, config.output_size).to(device) elif config.network == 'lstm': model = LSTM(config.input_size, config.hidden_size, config.num_layers, config.output_size, config.bidirectional).to(device) elif config.network == 'gru': model = GRU(config.input_size, config.hidden_size, config.num_layers, config.output_size).to(device) elif config.network == 'recursive': model = RecursiveLSTM(config.input_size, config.hidden_size, config.num_layers, config.output_size).to(device) elif config.network == 'attention': model = AttentionLSTM(config.input_size, config.key, config.query, config.value, config.hidden_size, config.num_layers, config.output_size, config.bidirectional).to(device) else: raise NotImplementedError # Loss Function # criterion = torch.nn.MSELoss() # Optimizer # optim = torch.optim.Adam(model.parameters(), lr=config.lr, betas=(0.5, 0.999)) optim_scheduler = get_lr_scheduler(config.lr_scheduler, optim) # Train and Validation # if config.mode == 'train': # Train # print("Training {} started with total epoch of {}.".format(model.__class__.__name__, config.num_epochs)) for epoch in range(config.num_epochs): for i, (data, label) in enumerate(train_loader): # Prepare Data # data = data.to(device, dtype=torch.float32) label = label.to(device, dtype=torch.float32) # Forward Data # pred = model(data) # Calculate Loss # train_loss = criterion(pred, label) # Initialize Optimizer, Back Propagation and Update # optim.zero_grad() train_loss.backward() optim.step() # Add item to Lists # train_losses.append(train_loss.item()) # Print Statistics # if (epoch+1) % config.print_every == 0: print("Epoch [{}/{}]".format(epoch+1, config.num_epochs)) print("Train Loss {:.4f}".format(np.average(train_losses))) # Learning Rate Scheduler # optim_scheduler.step() # Validation # with torch.no_grad(): for i, (data, label) in enumerate(val_loader): # Prepare Data # data = data.to(device, dtype=torch.float32) label = label.to(device, dtype=torch.float32) # Forward Data # pred_val = model(data) # Calculate Loss # val_loss = criterion(pred_val, label) val_mae = mean_absolute_error(label.cpu(), pred_val.cpu()) val_mse = mean_squared_error(label.cpu(), pred_val.cpu(), squared=True) val_rmse = mean_squared_error(label.cpu(), pred_val.cpu(), squared=False) val_mpe = mean_percentage_error(label.cpu(), pred_val.cpu()) val_mape = mean_absolute_percentage_error(label.cpu(), pred_val.cpu()) val_r2 = r2_score(label.cpu(), pred_val.cpu()) # Add item to Lists # val_losses.append(val_loss.item()) val_maes.append(val_mae.item()) val_mses.append(val_mse.item()) val_rmses.append(val_rmse.item()) val_mpes.append(val_mpe.item()) val_mapes.append(val_mape.item()) val_r2s.append(val_r2.item()) if (epoch + 1) % config.print_every == 0: # Print Statistics # print("Val Loss {:.4f}".format(np.average(val_losses))) print("Val MAE : {:.4f}".format(np.average(val_maes))) print("Val MSE : {:.4f}".format(np.average(val_mses))) print("Val RMSE : {:.4f}".format(np.average(val_rmses))) print("Val MPE : {:.4f}".format(np.average(val_mpes))) print("Val MAPE : {:.4f}".format(np.average(val_mapes))) print("Val R^2 : {:.4f}".format(np.average(val_r2s))) # Save the model Only if validation loss decreased # curr_val_loss = np.average(val_losses) if curr_val_loss < best_val_loss: best_val_loss = min(curr_val_loss, best_val_loss) torch.save(model.state_dict(), os.path.join(config.weights_path, 'BEST_{}.pkl'.format(model.__class__.__name__))) print("Best model is saved!\n") best_val_improv = 0 elif curr_val_loss >= best_val_loss: best_val_improv += 1 print("Best Validation has not improved for {} epochs.\n".format(best_val_improv)) elif config.mode == 'test': # Load the Model Weight # model.load_state_dict(torch.load(os.path.join(config.weights_path, 'BEST_{}.pkl'.format(model.__class__.__name__)))) # Test # with torch.no_grad(): for i, (data, label) in enumerate(test_loader): # Prepare Data # data = data.to(device, dtype=torch.float32) label = label.to(device, dtype=torch.float32) # Forward Data # pred_test = model(data) # Convert to Original Value Range # pred_test = pred_test.data.cpu().numpy() label = label.data.cpu().numpy().reshape(-1, 1) pred_test = scaler.inverse_transform(pred_test) label = scaler.inverse_transform(label) # Calculate Loss # test_mae = mean_absolute_error(label, pred_test) test_mse = mean_squared_error(label, pred_test, squared=True) test_rmse = mean_squared_error(label, pred_test, squared=False) test_mpe = mean_percentage_error(label, pred_test) test_mape = mean_absolute_percentage_error(label, pred_test) test_r2 = r2_score(label, pred_test) # Add item to Lists # test_maes.append(test_mae.item()) test_mses.append(test_mse.item()) test_rmses.append(test_rmse.item()) test_mpes.append(test_mpe.item()) test_mapes.append(test_mape.item()) test_r2s.append(test_r2.item()) # Print Statistics # print("Test {}".format(model.__class__.__name__)) print("Test MAE : {:.4f}".format(np.average(test_maes))) print("Test MSE : {:.4f}".format(np.average(test_mses))) print("Test RMSE : {:.4f}".format(np.average(test_rmses))) print("Test MPE : {:.4f}".format(np.average(test_mpes))) print("Test MAPE : {:.4f}".format(np.average(test_mapes))) print("Test R^2 : {:.4f}".format(np.average(test_r2s))) # Plot Figure # plot_pred_test(pred_test, label, config.plots_path, config.feature, model)
train_dataloader, valid_dataloader, learning_rate=learning_rate, patience=5) test_result = TestModel(model, test_dataloader, max_speed) StoreData(result_dict, model_name, train_result, test_result, directory, model, random_seed, save_model) # GRU importlib.reload(models) from models import GRU importlib.reload(utils) from utils import TrainModel, TestModel model_name = 'GRU' print(model_name) gru = GRU(A.shape[0]) gru, train_result = TrainModel(gru, train_dataloader, valid_dataloader, learning_rate=learning_rate, patience=5) test_result = TestModel(gru, test_dataloader, max_speed) StoreData(result_dict, model_name, train_result, test_result, directory, model, random_seed, save_model) # GRU-I importlib.reload(models) from models import GRU importlib.reload(utils) from utils import TrainModel, TestModel model_name = 'GRUI'
word_to_id, id_2_word = _build_vocab(train_path) vocab_size = len(word_to_id) # Create the model rnn = RNN(emb_size=argsdict["RNN_emb_size"], hidden_size=argsdict["RNN_hidden_size"], seq_len=argsdict["seq_len"], batch_size=argsdict["batch_size"], vocab_size=vocab_size, num_layers=argsdict["RNN_num_layers"], dp_keep_prob=1) gru = GRU(emb_size=argsdict["GRU_emb_size"], hidden_size=argsdict["GRU_hidden_size"], seq_len=argsdict["seq_len"], batch_size=argsdict["batch_size"], vocab_size=vocab_size, num_layers=argsdict["GRU_num_layers"], dp_keep_prob=1) # Load the model weight rnn.load_state_dict(torch.load(args.RNN_path)) gru.load_state_dict(torch.load(args.GRU_path)) rnn.eval() gru.eval() # Initialize the hidden state hidden = [rnn.init_hidden(), gru.init_hidden()] # Set the random seed manually for reproducibility.
# load data training_set = SignalDataset_iq(path, train=True) train_loader = torch.utils.data.DataLoader(training_set, **params_dataloader) test_set = SignalDataset_iq(path, train=False) test_loader = torch.utils.data.DataLoader(test_set, **params_dataloader) # get num_classes from training data set num_classes = training_set.num_classes # init model if arch == "rnn": model = RNN(**params_model, output_size=num_classes).to(device=device) elif arch == "gru": model = GRU(**params_model, output_size=num_classes).to(device=device) elif arch == "lstm": model = LSTM(**params_model, output_size=num_classes).to(device=device) else: raise Exception( "Only 'rnn', 'gru', and 'lstm' are available model options.") print("Model size: {0}".format(count_parameters(model))) criterion = nn.NLLLoss() op = torch.optim.SGD(model.parameters(), **params_op) scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(op, patience=4, factor=0.5, verbose=True)
def main(args): # Fix Seed # random.seed(args.seed) np.random.seed(args.seed) torch.manual_seed(args.seed) torch.cuda.manual_seed(args.seed) # Weights and Plots Path # paths = [args.weights_path, args.plots_path, args.numpy_path] for path in paths: make_dirs(path) # Prepare Data # data = load_data(args.which_data)[[args.feature]] data = data.copy() # Plot Time-Series Data # if args.plot_full: plot_full(args.plots_path, data, args.feature) scaler = MinMaxScaler() data[args.feature] = scaler.fit_transform(data) # Split the Dataset # copied_data = data.copy().values if args.multi_step: X, y = split_sequence_multi_step(copied_data, args.seq_length, args.output_size) step = 'MultiStep' else: X, y = split_sequence_uni_step(copied_data, args.seq_length) step = 'SingleStep' train_loader, val_loader, test_loader = data_loader( X, y, args.train_split, args.test_split, args.batch_size) # Lists # train_losses, val_losses = list(), list() val_maes, val_mses, val_rmses, val_mapes, val_mpes, val_r2s = list(), list( ), list(), list(), list(), list() test_maes, test_mses, test_rmses, test_mapes, test_mpes, test_r2s = list( ), list(), list(), list(), list(), list() pred_tests, labels = list(), list() # Constants # best_val_loss = 100 best_val_improv = 0 # Prepare Network # if args.model == 'dnn': model = DNN(args.seq_length, args.hidden_size, args.output_size).to(device) elif args.model == 'cnn': model = CNN(args.seq_length, args.batch_size, args.output_size).to(device) elif args.model == 'rnn': model = RNN(args.input_size, args.hidden_size, args.num_layers, args.output_size).to(device) elif args.model == 'lstm': model = LSTM(args.input_size, args.hidden_size, args.num_layers, args.output_size, args.bidirectional).to(device) elif args.model == 'gru': model = GRU(args.input_size, args.hidden_size, args.num_layers, args.output_size).to(device) elif args.model == 'attentional': model = AttentionalLSTM(args.input_size, args.qkv, args.hidden_size, args.num_layers, args.output_size, args.bidirectional).to(device) else: raise NotImplementedError # Loss Function # criterion = torch.nn.MSELoss() # Optimizer # optim = torch.optim.Adam(model.parameters(), lr=args.lr, betas=(0.5, 0.999)) optim_scheduler = get_lr_scheduler(args.lr_scheduler, optim) # Train and Validation # if args.mode == 'train': # Train # print("Training {} using {} started with total epoch of {}.".format( model.__class__.__name__, step, args.num_epochs)) for epoch in range(args.num_epochs): for i, (data, label) in enumerate(train_loader): # Prepare Data # data = data.to(device, dtype=torch.float32) label = label.to(device, dtype=torch.float32) # Forward Data # pred = model(data) # Calculate Loss # train_loss = criterion(pred, label) # Initialize Optimizer, Back Propagation and Update # optim.zero_grad() train_loss.backward() optim.step() # Add item to Lists # train_losses.append(train_loss.item()) # Print Statistics # if (epoch + 1) % args.print_every == 0: print("Epoch [{}/{}]".format(epoch + 1, args.num_epochs)) print("Train Loss {:.4f}".format(np.average(train_losses))) # Learning Rate Scheduler # optim_scheduler.step() # Validation # with torch.no_grad(): for i, (data, label) in enumerate(val_loader): # Prepare Data # data = data.to(device, dtype=torch.float32) label = label.to(device, dtype=torch.float32) # Forward Data # pred_val = model(data) # Calculate Loss # val_loss = criterion(pred_val, label) if args.multi_step: pred_val = np.mean(pred_val.detach().cpu().numpy(), axis=1) label = np.mean(label.detach().cpu().numpy(), axis=1) else: pred_val, label = pred_val.cpu(), label.cpu() # Calculate Metrics # val_mae = mean_absolute_error(label, pred_val) val_mse = mean_squared_error(label, pred_val, squared=True) val_rmse = mean_squared_error(label, pred_val, squared=False) val_mpe = mean_percentage_error(label, pred_val) val_mape = mean_absolute_percentage_error(label, pred_val) val_r2 = r2_score(label, pred_val) # Add item to Lists # val_losses.append(val_loss.item()) val_maes.append(val_mae.item()) val_mses.append(val_mse.item()) val_rmses.append(val_rmse.item()) val_mpes.append(val_mpe.item()) val_mapes.append(val_mape.item()) val_r2s.append(val_r2.item()) if (epoch + 1) % args.print_every == 0: # Print Statistics # print("Val Loss {:.4f}".format(np.average(val_losses))) print(" MAE : {:.4f}".format(np.average(val_maes))) print(" MSE : {:.4f}".format(np.average(val_mses))) print("RMSE : {:.4f}".format(np.average(val_rmses))) print(" MPE : {:.4f}".format(np.average(val_mpes))) print("MAPE : {:.4f}".format(np.average(val_mapes))) print(" R^2 : {:.4f}".format(np.average(val_r2s))) # Save the model only if validation loss decreased # curr_val_loss = np.average(val_losses) if curr_val_loss < best_val_loss: best_val_loss = min(curr_val_loss, best_val_loss) torch.save( model.state_dict(), os.path.join( args.weights_path, 'BEST_{}_using_{}.pkl'.format( model.__class__.__name__, step))) print("Best model is saved!\n") best_val_improv = 0 elif curr_val_loss >= best_val_loss: best_val_improv += 1 print("Best Validation has not improved for {} epochs.\n". format(best_val_improv)) elif args.mode == 'test': # Load the Model Weight # model.load_state_dict( torch.load( os.path.join( args.weights_path, 'BEST_{}_using_{}.pkl'.format(model.__class__.__name__, step)))) # Test # with torch.no_grad(): for i, (data, label) in enumerate(test_loader): # Prepare Data # data = data.to(device, dtype=torch.float32) label = label.to(device, dtype=torch.float32) # Forward Data # pred_test = model(data) # Convert to Original Value Range # pred_test, label = pred_test.detach().cpu().numpy( ), label.detach().cpu().numpy() pred_test = scaler.inverse_transform(pred_test) label = scaler.inverse_transform(label) if args.multi_step: pred_test = np.mean(pred_test, axis=1) label = np.mean(label, axis=1) pred_tests += pred_test.tolist() labels += label.tolist() # Calculate Loss # test_mae = mean_absolute_error(label, pred_test) test_mse = mean_squared_error(label, pred_test, squared=True) test_rmse = mean_squared_error(label, pred_test, squared=False) test_mpe = mean_percentage_error(label, pred_test) test_mape = mean_absolute_percentage_error(label, pred_test) test_r2 = r2_score(label, pred_test) # Add item to Lists # test_maes.append(test_mae.item()) test_mses.append(test_mse.item()) test_rmses.append(test_rmse.item()) test_mpes.append(test_mpe.item()) test_mapes.append(test_mape.item()) test_r2s.append(test_r2.item()) # Print Statistics # print("Test {} using {}".format(model.__class__.__name__, step)) print(" MAE : {:.4f}".format(np.average(test_maes))) print(" MSE : {:.4f}".format(np.average(test_mses))) print("RMSE : {:.4f}".format(np.average(test_rmses))) print(" MPE : {:.4f}".format(np.average(test_mpes))) print("MAPE : {:.4f}".format(np.average(test_mapes))) print(" R^2 : {:.4f}".format(np.average(test_r2s))) # Plot Figure # plot_pred_test(pred_tests[:args.time_plot], labels[:args.time_plot], args.plots_path, args.feature, model, step) # Save Numpy files # np.save( os.path.join( args.numpy_path, '{}_using_{}_TestSet.npy'.format(model.__class__.__name__, step)), np.asarray(pred_tests)) np.save( os.path.join(args.numpy_path, 'TestSet_using_{}.npy'.format(step)), np.asarray(labels)) else: raise NotImplementedError
device = 'cuda' dis_v = Discriminator_V(nc, ndf, T=T, ngpu=ngpu) if args.model == 'default': dis_i = Discriminator_I(nc, ndf, ngpu=ngpu) gen_i = Generator_I(nc, ngf, nz, ngpu=ngpu) else: args.latent = 512 args.n_mlp = 8 dis_i = Discriminator( args.size, channel_multiplier=args.channel_multiplier).to(device) gen_i = Generator(args.size, args.latent, args.n_mlp, channel_multiplier=args.channel_multiplier).to(device) gru = GRU(d_E, hidden_size, gpu=cuda) gru.initWeight() ''' prepare for train ''' label = torch.FloatTensor() def timeSince(since): now = time.time() s = now - since d = math.floor(s / ((60**2) * 24)) h = math.floor(s / (60**2)) - d * 24 m = math.floor(s / 60) - h * 60 - d * 24 * 60 s = s - m * 60 - h * (60**2) - d * 24 * (60**2) return '%dd %dh %dm %ds' % (d, h, m, s)
def main(): """ Main Function """ args = Config(yaml_file='./config.yaml') args.build() args.Print() if not (args.do_train or args.do_val or args.do_infer): raise ValueError("For args `do_train`, `do_val` and `do_infer`, at " "least one of them must be True.") place = set_device("gpu" if args.use_cuda else "cpu") fluid.enable_dygraph(place) processor = EmoTectProcessor(data_dir=args.data_dir, vocab_path=args.vocab_path, random_seed=args.random_seed) num_labels = args.num_labels if args.model_type == 'cnn_net': model = CNN(args.vocab_size, args.max_seq_len) elif args.model_type == 'bow_net': model = BOW(args.vocab_size, args.max_seq_len) elif args.model_type == 'lstm_net': model = LSTM(args.vocab_size, args.max_seq_len) elif args.model_type == 'gru_net': model = GRU(args.vocab_size, args.max_seq_len) elif args.model_type == 'bigru_net': model = BiGRU(args.vocab_size, args.batch_size, args.max_seq_len) else: raise ValueError("Unknown model type!") inputs = [Input([None, args.max_seq_len], 'int64', name='doc')] optimizer = None labels = None if args.do_train: train_data_generator = processor.data_generator( batch_size=args.batch_size, places=place, phase='train', epoch=args.epoch, padding_size=args.max_seq_len) num_train_examples = processor.get_num_examples(phase="train") max_train_steps = args.epoch * num_train_examples // args.batch_size + 1 print("Num train examples: %d" % num_train_examples) print("Max train steps: %d" % max_train_steps) labels = [Input([None, 1], 'int64', name='label')] optimizer = fluid.optimizer.Adagrad(learning_rate=args.lr, parameter_list=model.parameters()) test_data_generator = None if args.do_val: test_data_generator = processor.data_generator( batch_size=args.batch_size, phase='dev', epoch=1, places=place, padding_size=args.max_seq_len) elif args.do_val: test_data_generator = processor.data_generator( batch_size=args.batch_size, phase='test', epoch=1, places=place, padding_size=args.max_seq_len) elif args.do_infer: infer_data_generator = processor.data_generator( batch_size=args.batch_size, phase='infer', epoch=1, places=place, padding_size=args.max_seq_len) model.prepare(optimizer, CrossEntropy(), Accuracy(topk=(1, )), inputs, labels, device=place) if args.do_train: if args.init_checkpoint: model.load(args.init_checkpoint) elif args.do_val or args.do_infer: if not args.init_checkpoint: raise ValueError("args 'init_checkpoint' should be set if" "only doing validation or infer!") model.load(args.init_checkpoint, reset_optimizer=True) if args.do_train: model.fit(train_data=train_data_generator, eval_data=test_data_generator, batch_size=args.batch_size, epochs=args.epoch, save_dir=args.checkpoints, eval_freq=args.eval_freq, save_freq=args.save_freq) elif args.do_val: eval_result = model.evaluate(eval_data=test_data_generator, batch_size=args.batch_size) print("Final eval result: acc: {:.4f}, loss: {:.4f}".format( eval_result['acc'], eval_result['loss'][0])) elif args.do_infer: preds = model.predict(test_data=infer_data_generator) preds = np.array(preds[0]).reshape((-1, args.num_labels)) if args.output_dir: with open(os.path.join(args.output_dir, 'predictions.json'), 'w') as w: for p in range(len(preds)): label = np.argmax(preds[p]) result = json.dumps({ 'index': p, 'label': label, 'probs': preds[p].tolist() }) w.write(result + '\n') print('Predictions saved at ' + os.path.join(args.output_dir, 'predictions.json'))