def setup(): args = parse_args() config = parse_config(args) np.random.seed(config.seed) torch.cuda.manual_seed_all(config.seed) torch.manual_seed(config.seed) paper_dataset = NIPS2015Dataset(data_folder='datasets') codec = get_codec() model = GPT2(config) if not os.path.exists('gpt2-pytorch_model.bin'): print("Downloading GPT-2 checkpoint...") url = 'https://s3.amazonaws.com/models.huggingface.co/bert/gpt2-pytorch_model.bin' r = requests.get(url, allow_redirects=True) open('gpt2-pytorch_model.bin', 'wb').write(r.content) model = load_weight( model, torch.load('gpt2-pytorch_model.bin', map_location=device)) model = model.to(device) model.eval() if not os.path.exists('submit'): os.makedirs('submit') return codec, model, paper_dataset, config
def main(args: Namespace) -> None: if args.seed_everything: seed_everything(0) # For reproducibility # Initialize tokenizer the same way we did when training (in MemesDataModule) tokenizer = GPT2TokenizerFast.from_pretrained(args.gpt2_model_type) tokenizer.add_special_tokens({'pad_token': tokenizer.eos_token}) # Validate memes_module = MemesDataModule(args) model = GPT2.load_from_checkpoint(args.checkpoint, args=args, tokenizer=tokenizer) Trainer().test(model, datamodule=memes_module)
def main(args: Namespace) -> None: if args.seed_everything: seed_everything(0) # For reproducibility datamodule = MemesDataModule(args) model = GPT2(args=args, tokenizer=datamodule.tokenizer) trainer = Trainer.from_argparse_args( args, callbacks=[ ProgressBar(), ModelCheckpoint(monitor='train_loss', save_top_k=args.max_epochs, save_weights_only=True) ]) # Save checkpoint after every epoch trainer.tune(model, datamodule=datamodule) trainer.fit(model, datamodule)
def load_model(model_path): """Load GPT2 model from TF2 save file""" # Load hyperparameters hparams = {} with open(path_join(model_path, "hparams.json"), "r") as file: hparams = json.load(file) # Initialize the GPT2 model gpt2 = GPT2(hparams["n_layer"], hparams["n_head"], hparams["n_vocab"], hparams["n_ctx"], hparams["n_embd"]) # Load weights gpt2.load_weights(path_join(model_path, "weights")) return gpt2
def setup(): args = parse_args() config = parse_config(args) np.random.seed(config.seed) torch.cuda.manual_seed_all(config.seed) torch.manual_seed(config.seed) codec = get_codec() model = GPT2(config) model = load_weight( model, torch.load('gpt2-pytorch_model.bin', map_location=device)) model = model.to(device) model.eval() if not os.path.exists('submit'): os.makedirs('submit') return codec, model, config
def v1_to_v2(model_path, save_path): """Load the GPT2 model from TF1 checkpoint file and save it using TF2""" hparams = {} # Load hyperparameters with open(path_join(model_path, "hparams.json"), "r") as file: hparams = json.load(file) # Initialize the GPT2 model gpt2 = GPT2(hparams["n_layer"], hparams["n_head"], hparams["n_vocab"], hparams["n_ctx"], hparams["n_embd"]) # Build the model using fake input fake_input = tf.constant([0], shape=[1, 1], dtype=tf.int32) _ = gpt2(fake_input) # Get the checkpoint containing the variables ckpt = tf.train.latest_checkpoint(model_path) # Get the checkpoint reader reader = py_checkpoint_reader.NewCheckpointReader(ckpt) # Load the variables load_weights("model", ["wte", "wpe"], gpt2.word_embedder, reader) load_weights("model/ln_f", ["g", "b"], gpt2.final_norm, reader) for layer_index in range(hparams["n_layer"]): load_weights("model/h%d/attn/c_attn" % layer_index, ["w", "b"], gpt2.blocks[layer_index].attn.expander, reader) load_weights("model/h%d/attn/c_proj" % layer_index, ["w", "b"], gpt2.blocks[layer_index].attn.compressor, reader) load_weights("model/h%d/ln_1" % layer_index, ["g", "b"], gpt2.blocks[layer_index].attn_norm, reader) load_weights("model/h%d/mlp/c_fc" % layer_index, ["w", "b"], gpt2.blocks[layer_index].position_wise.dense1, reader) load_weights("model/h%d/mlp/c_proj" % layer_index, ["w", "b"], gpt2.blocks[layer_index].position_wise.dense2, reader) load_weights("model/h%d/ln_2" % layer_index, ["g", "b"], gpt2.blocks[layer_index].position_wise_norm, reader) # Save model v2 save_model(gpt2, save_path)
def main(): config = Settings() # |TODO| go to Setting() train_filename = config.train_file # train_filename_1 = config.train_file_1 # train_filename_2 = config.train_file_2 test_filename = config.test_file dataset_path = os.path.join(os.getcwd(), config.path) if not os.path.exists(config.exp_dir): os.mkdir(config.exp_dir) model_dir = os.path.join(config.exp_dir, config.model_name) logger = SummaryWriter(model_dir) if config.data_type == 'success': # with open(os.path.join(dataset_path, train_filename), 'rb') as f: # train_dataset = pickle.load(f) # with open(os.path.join(dataset_path, test_filename), 'rb') as f: # test_dataset = pickle.load(f) dataset = glob.glob(f'{dataset_path}/{train_filename}/*.pickle') # test_dataset = glob.glob(f'{dataset_path}/{test_filename}/*.pickle') # train_dataset = dataset[:1500000] # test_dataset = dataset[-200000:] train_dataset = dataset[:-20000] test_dataset = dataset[-20000:] print('#trajectories of train_dataset:', len(train_dataset)) print('#trajectories of test_dataset:', len(test_dataset)) elif config.data_type == 'mcts': dataset = glob.glob(f'{dataset_path}/{train_filename}/*.pickle') train_dataset = dataset[:-20000] test_dataset = dataset[-20000:] # train_dataset = glob.glob(f'{dataset_path}/{train_filename}/*.pickle') # test_dataset = glob.glob(f'{dataset_path}/{test_filename}/*.pickle') if config.filter: filtered_data_train = [] filtered_data_test = [] total_reward_filt = [] total_reward_not_filt = [] avg_total_reward_not_filt = 0 avg_total_reward_filt = 0 for data in train_dataset: with open(data, 'rb') as f: traj = pickle.load(f) avg_total_reward_not_filt += traj[-1] total_reward_not_filt.append(traj[-1]) if traj[-1] > config.filter: filtered_data_train.append(data) avg_total_reward_filt += traj[-1] total_reward_filt.append(traj[-1]) for data in test_dataset: with open(data, 'rb') as f: traj = pickle.load(f) if traj[-1] > config.filter: filtered_data_test.append(data) total_reward_not_filt_std = np.std( np.asarray(total_reward_not_filt)) total_reward_filt_std = np.std(np.asarray(total_reward_filt)) print('Average of total reward(not filtered):', avg_total_reward_not_filt / len(train_dataset)) print('std of total reward(not filtered):', total_reward_not_filt_std) print('Average of total reward(filtered):', avg_total_reward_filt / len(filtered_data_train)) print('std of total reward(filtered):', total_reward_filt_std) train_dataset = filtered_data_train test_dataset = filtered_data_test print('#trajectories of train_dataset:', len(train_dataset)) print('#trajectories of test_dataset:', len(test_dataset)) # # For mixed dataset # train_dataset_1 = glob.glob(f'{dataset_path}/{train_filename_1}/*.pickle') # dataset_2 = glob.glob(f'{dataset_path}/{train_filename_2}/*.pickle') # train_dataset_2 = dataset_2[:100000] # test_dataset = dataset_2[100000:] # if config.filter: # filtered_data_train = [] # filtered_data_test = [] # total_reward_filt = [] # total_reward_not_filt = [] # avg_total_reward_not_filt = 0 # avg_total_reward_filt = 0 # for data in train_dataset_2: # with open(data, 'rb') as f: # traj = pickle.load(f) # avg_total_reward_not_filt += traj[-1] # total_reward_not_filt.append(traj[-1]) # if traj[-1] > config.filter: # filtered_data_train.append(data) # avg_total_reward_filt += traj[-1] # total_reward_filt.append(traj[-1]) # for data in test_dataset: # with open(data, 'rb') as f: # traj = pickle.load(f) # if traj[-1] > config.filter: # filtered_data_test.append(data) # total_reward_not_filt_std = np.std(np.asarray(total_reward_not_filt)) # total_reward_filt_std = np.std(np.asarray(total_reward_filt)) # print('Average of total reward(not filtered):', avg_total_reward_not_filt/len(train_dataset_2)) # print('std of total reward(not filtered):', total_reward_not_filt_std) # print('Average of total reward(filtered):', avg_total_reward_filt/len(filtered_data_train)) # print('std of total reward(filtered):', total_reward_filt_std) # train_dataset = train_dataset_1 + filtered_data_train # test_dataset = filtered_data_test # print('#trajectories of train_dataset:', len(train_dataset)) # print('#trajectories of test_dataset:', len(test_dataset)) # generate dataloader train_loader = get_loader(config, train_dataset) test_loader = get_loader(config, test_dataset) # model device = th.device(config.device) if config.model == 'GPT': model = GPT2(config).to(device) elif config.model == 'RNN': model = RNN(config).to(device) elif config.model == 'LSTM': model = LSTM(config).to(device) elif config.model == 'CVAE' or config.model == 'PolicyValueNet': model = CVAE(config).to(device) elif config.model == 'ValueNet': model = ValueNet(config).to(device) else: raise Exception( f'"{config.model}" is not support!! You should select "GPT", "RNN", "LSTM", "CVAE", "ValueNet", or "PolicyValueNet.' ) # optimizer optimizer = th.optim.AdamW(model.parameters(), lr=config.learning_rate, weight_decay=config.weight_decay) # learning rate scheduler if config.optimizer == 'AdamW': scheduler = th.optim.lr_scheduler.LambdaLR( optimizer, lambda step: min((step + 1) / config.warmup_step, 1)) elif config.optimizer == 'AdamWR': scheduler = CosineAnnealingWarmUpRestarts(optimizer=optimizer, T_0=config.T_0, T_mult=config.T_mult, eta_max=config.lr_max, T_up=config.warmup_step, gamma=config.lr_mult) else: raise Exception( f'"{config.optimizer}" is not support!! You should select "AdamW" or "AdamWR".' ) # Metric # |TODO| implement Chamfer distance if config.model == 'CVAE': loss_fn = ELBOLoss(config) eval_fn = ELBOLoss(config) elif config.model == 'ValueNet': loss_fn = RegressionLossValue(config) eval_fn = RegressionLossValue(config) elif config.model == 'PolicyValueNet': loss_fn = None eval_fn = None else: loss_fn = RegressionLossPolicy(config) eval_fn = RegressionLossPolicy(config) # Trainer & Evaluator trainer = Trainer(config=config, loader=train_loader, model=model, optimizer=optimizer, scheduler=scheduler, loss_fn=loss_fn, eval_fn=eval_fn) evaluator = Evaluator(config=config, loader=test_loader, model=model, eval_fn=eval_fn) # save configuration config.save(model_dir + '/config.yaml') # Logging model graph dummy = next(iter(test_loader)) for k in dummy: dummy[k].to(device).detach() logger.add_graph(ModelAsTuple(config, model), dummy) start_epoch = 1 best_error = 10000. # load checkpoint for resuming if config.resume is not None: filename = os.path.join(model_dir, config.resume) if os.path.isfile(filename): start_epoch, best_error, model, optimizer, scheduler = load_checkpoint( config, filename, model, optimizer, scheduler) start_epoch += 1 print("Loaded checkpoint '{}' (epoch {})".format( config.resume, start_epoch)) else: raise Exception("No checkpoint found at '{}'".format( config.resume)) # load checkpoint for pre-trained if config.pre_trained is not None: pre_trained_path = os.path.join(config.exp_dir, config.pre_trained) if os.path.isfile(pre_trained_path): start_epoch, best_error, model, optimizer, scheduler = load_checkpoint( config, pre_trained_path, model, optimizer, scheduler) start_epoch = 1 print("Loaded checkpoint '{}'".format(config.pre_trained)) else: raise Exception("No checkpoint found at '{}'".format( config.resume)) for epoch in range(start_epoch, config.epochs + 1): print(f'===== Start {epoch} epoch =====') # Training one epoch print("Training...") train_loss, train_val = trainer.train(epoch) # Logging if config.model == 'CVAE': logger.add_scalar('Loss(total)/train', train_loss['total'], epoch) logger.add_scalar('Loss(Reconstruction)/train', train_loss['Recon'], epoch) logger.add_scalar('Loss(KL_divergence)/train', train_loss['KL_div'], epoch) elif config.model == 'ValueNet': logger.add_scalar('Loss/train', train_loss['total'], epoch) elif config.model == 'PolicyValueNet': logger.add_scalar('Loss(total)/train', train_loss['total'], epoch) logger.add_scalar('Loss(action)/train', train_loss['action'], epoch) logger.add_scalar('Loss(accumulated reward)/train', train_loss['accumulated_reward'], epoch) # logger.add_scalar('Eval(action)/train', train_val['action'], epoch) else: logger.add_scalar('Loss(total)/train', train_loss['total'], epoch) logger.add_scalar('Loss(action)/train', train_loss['action'], epoch) # if config.use_reward: # logger.add_scalar('Loss(reward)/train', train_loss['reward'], epoch) # logger.add_scalar('Eval(action)/train', train_val['action'], epoch) # if config.use_reward: # logger.add_scalar('Eval(reward)/train', train_val['reward'], epoch) # |FIXME| debug for eff_grad: "RuntimeError: Boolean value of Tensor with more than one value is ambiguous" log_gradients(model, logger, epoch, log_grad=config.log_grad, log_param=config.log_para, eff_grad=config.eff_grad, print_num_para=config.print_num_para) # evaluating if epoch % config.test_eval_freq == 0: print("Validating...") test_val = evaluator.eval(epoch) # save the best model # |TODO| change 'action' to 'total' @ trainer.py & evaluator.py -> merge 'CVAE' & others if config.model == 'CVAE' or config.model == 'ValueNet' or config.model == 'PolicyValueNet': if test_val['total'] < best_error: best_error = test_val['total'] save_checkpoint('Saving the best model!', os.path.join(model_dir, 'best.pth'), epoch, best_error, model, optimizer, scheduler) else: if test_val['action'] < best_error: best_error = test_val['action'] save_checkpoint('Saving the best model!', os.path.join(model_dir, 'best.pth'), epoch, best_error, model, optimizer, scheduler) # Logging if config.model == 'CVAE': logger.add_scalar('Eval(total)/test', test_val['total'], epoch) logger.add_scalar('Eval(Reconstruction)/test', test_val['Recon'], epoch) logger.add_scalar('Eval(KL_divergence)/test', test_val['KL_div'], epoch) elif config.model == 'ValueNet': logger.add_scalar('Eval/test', test_val['total'], epoch) elif config.model == 'PolicyValueNet': logger.add_scalar('Eval(total)/test', test_val['total'], epoch) logger.add_scalar('Eval(action)/test', test_val['action'], epoch) logger.add_scalar('Eval(accumulated reward)/test', test_val['accumulated_reward'], epoch) else: logger.add_scalar('Eval(action)/test', test_val['action'], epoch) # if config.use_reward: # logger.add_scalar('Eval(reward)/test', test_val['reward'], epoch) # save the model if epoch % config.save_freq == 0: save_checkpoint('Saving...', os.path.join(model_dir, f'ckpt_epoch_{epoch}.pth'), epoch, best_error, model, optimizer, scheduler) print(f'===== End {epoch} epoch =====')
def main(): config = Settings() dataset_filename = config.dataset_file dataset_path = os.path.join(os.getcwd(), config.path) if not os.path.exists(config.exp_dir): os.mkdir(config.exp_dir) model_dir = os.path.join(config.exp_dir, config.model_name) logger = SummaryWriter(model_dir) with open(os.path.join(dataset_path, dataset_filename), 'rb') as f: dataset = pickle.load(f) print('#trajectories of dataset:', len(dataset['observation'])) # generate dataloader data_loader = get_loader_multi_target(config, dataset) # model device = th.device(config.device) if config.model == 'GPT': model = GPT2(config).to(device) elif config.model == 'RNN': model = RNN(config).to(device) elif config.model == 'LSTM': model = LSTM(config).to(device) elif config.model == 'CVAE': model = CVAE(config).to(device) else: raise Exception( f'"{config.model}" is not support!! You should select "GPT", "RNN", or "LSTM".' ) # optimizer optimizer = th.optim.AdamW(model.parameters(), lr=config.learning_rate, weight_decay=config.weight_decay) # learning rate scheduler if config.optimizer == 'AdamW': scheduler = th.optim.lr_scheduler.LambdaLR( optimizer, lambda step: min((step + 1) / config.warmup_step, 1)) elif config.optimizer == 'AdamWR': scheduler = CosineAnnealingWarmUpRestarts(optimizer=optimizer, T_0=config.T_0, T_mult=config.T_mult, eta_max=config.lr_max, T_up=config.warmup_step, gamma=config.lr_mult) else: raise Exception( f'"{config.optimizer}" is not support!! You should select "AdamW" or "AdamWR".' ) # Metric if config.model == 'CVAE': eval_fn = NNMSE else: eval_fn = NMSE # Trainer & Evaluator evaluator = MultiTargetEvaluator(config=config, loader=data_loader, model=model, eval_fn=eval_fn) # load checkpoint for resuming for ckpt in config.resume: filename = os.path.join(model_dir, ckpt) if os.path.isfile(filename): epoch, best_error, model, optimizer, scheduler = load_checkpoint( config, filename, model, optimizer, scheduler) print("Loaded checkpoint '{}' (epoch {})".format(ckpt, epoch)) else: raise Exception("No checkpoint found at '{}'".format(ckpt)) print(f'===== Evaluate {epoch} epoch =====') test_val = evaluator.eval(epoch) # Logging logger.add_scalar('Eval/Near-Nearest MSE', test_val, epoch) print(f'===== End {epoch} epoch =====')
def main(): config = Settings(model='GPT', model_name='9.23_dropout0.1_GPT', resume='best.pth') dataset_path = os.path.join(os.getcwd(), config.path) dataset_filename = config.test_file device = config.device model_dir = os.path.join(config.exp_dir, config.model_name) with open(os.path.join(dataset_path, dataset_filename), 'rb') as f: dataset = pickle.load(f) dataset = LightDarkDataset(config, dataset) data, targets = collect_data(config, dataset) # with open(os.path.join(dataset_path, 'light_dark_sample_len15.pickle'), 'rb') as f: # sample = pickle.load(f) # data, targets = sample['data'], sample['targets'] if config.model == 'GPT': model = GPT2(config).to(device) elif config.model == 'RNN': model = RNN(config).to(device) elif config.model == 'CVAE': model = CVAE(config).to(device) optimizer = th.optim.AdamW(model.parameters(), lr=config.learning_rate, weight_decay=config.weight_decay) if config.optimizer == 'AdamW': scheduler = th.optim.lr_scheduler.LambdaLR( optimizer, lambda step: min((step + 1) / config.warmup_step, 1)) elif config.optimizer == 'AdamWR': scheduler = CosineAnnealingWarmUpRestarts(optimizer=optimizer, T_0=config.T_0, T_mult=config.T_mult, eta_max=config.lr_max, T_up=config.warmup_step, gamma=config.lr_mult) else: # |FIXME| using error?exception?logging? print( f'"{config.optimizer}" is not support!! You should select "AdamW" or "AdamWR".' ) return # load checkpoint for resuming if config.resume is not None: filename = os.path.join(model_dir, config.resume) if os.path.isfile(filename): start_epoch, best_error, model, optimizer, scheduler = load_checkpoint( config, filename, model, optimizer, scheduler) start_epoch += 1 print("Loaded checkpoint '{}' (epoch {})".format( config.resume, start_epoch)) else: # |FIXME| using error?exception?logging? print("No checkpoint found at '{}'".format(config.resume)) return pred = [] total_time = 0. for d in data: for i in range(config.num_output): tmp_pred, time = predict_action(config, model, d) pred.append(tmp_pred) total_time += time targets = np.asarray(targets).reshape(-1, 2) pred = np.asarray(pred).reshape(-1, 2) print( f'Inference time: {total_time / (config.num_input * config.num_output)}' ) plt.xlim(-7, 7) plt.ylim(-7, 7) plt.scatter(targets[:, 0], targets[:, 1], c='red') plt.scatter(pred[:, 0], pred[:, 1], c='blue') plt.show()
def main(): config = Settings() dataset_path = os.path.join(os.getcwd(), config.path) dataset_filename = config.test_file device = config.device # model_dir_RNN = os.path.join(config.exp_dir, config.model_name_RNN) model_dir_GPT = os.path.join(config.exp_dir, config.model_name_GPT) with open(os.path.join(dataset_path, dataset_filename), 'rb') as f: dataset = pickle.load(f) dataset = LightDarkDataset(config, dataset) data = collect_data(config, dataset) # model_RNN = RNN(config).to(device) model_GPT = GPT2(config).to(device) # optimizer_RNN = th.optim.AdamW(model_RNN.parameters(), # lr=config.learning_rate, # weight_decay=config.weight_decay) optimizer_GPT = th.optim.AdamW(model_GPT.parameters(), lr=config.learning_rate, weight_decay=config.weight_decay) if config.optimizer == 'AdamW': # scheduler_RNN = th.optim.lr_scheduler.LambdaLR(optimizer_RNN, lambda step: min((step+1)/config.warmup_step, 1)) scheduler_GPT = th.optim.lr_scheduler.LambdaLR( optimizer_GPT, lambda step: min( (step + 1) / config.warmup_step, 1)) elif config.optimizer == 'AdamWR': # scheduler_RNN = CosineAnnealingWarmUpRestarts( # optimizer=optimizer_RNN, # T_0=config.T_0, # T_mult=config.T_mult, # eta_max=config.lr_max, # T_up=config.warmup_step, # gamma=config.lr_mult # ) scheduler_GPT = CosineAnnealingWarmUpRestarts(optimizer=optimizer_GPT, T_0=config.T_0, T_mult=config.T_mult, eta_max=config.lr_max, T_up=config.warmup_step, gamma=config.lr_mult) else: # |FIXME| using error?exception?logging? print( f'"{config.optimizer}" is not support!! You should select "AdamW" or "AdamWR".' ) return # load checkpoint for resuming if config.resume is not None: # filename_RNN = os.path.join(model_dir_RNN, config.resume) filename_GPT = os.path.join(model_dir_GPT, config.resume) # if os.path.isfile(filename_RNN): # start_epoch_RNN, best_error_RNN, model_RNN, optimizer_RNN, scheduler_RNN = load_checkpoint(config, filename_RNN, model_RNN, optimizer_RNN, scheduler_RNN) # start_epoch_RNN += 1 # print("[RNN]Loaded checkpoint '{}' (epoch {})".format(config.resume, start_epoch_RNN)) # else: # # |FIXME| using error?exception?logging? # print("No checkpoint found at '{}'".format(config.resume)) # return if os.path.isfile(filename_GPT): start_epoch_GPT, best_error_GPT, model_GPT, optimizer_GPT, scheduler_GPT = load_checkpoint( config, filename_GPT, model_GPT, optimizer_GPT, scheduler_GPT) start_epoch_GPT += 1 print("[GPT]Loaded checkpoint '{}' (epoch {})".format( config.resume, start_epoch_GPT)) else: # |FIXME| using error?exception?logging? print("No checkpoint found at '{}'".format(config.resume)) return out = [] for d in data: # pred_RNN, time_RNN = predict_action(config, model_RNN, data) pred_GPT, time_GPT = predict_action(config, model_GPT, d) out.append(pred_GPT) print(pred_GPT) out = np.array(out) # print(f'Inference time for RNN: {time_RNN}') # print(f'Inference time for GPT: {time_GPT}') plt.xlim(-7, 7) plt.ylim(-7, 7) # plt.scatter(targets[:, 0], targets[:, 1], c='red') # plt.scatter(pred_RNN[0], pred_RNN[1], c='green') plt.scatter(out[:, 0], out[:, 1], c='blue') plt.show()
def main(): # config_RNN = Settings(model='RNN', model_name='9.23_dropout0.1_RNN', resume='best.pth') # config_GPT = Settings(model='GPT', model_name='10.3_GPT_dim8_layer6', resume='best.pth') config_CVAE = Settings(model='CVAE', model_name='10.10_CVAE_dim16', resume='best.pth') dataset_path = os.path.join(os.getcwd(), config_CVAE.path) dataset_filename = config_CVAE.test_file device = config_CVAE.device # model_dir_RNN = os.path.join(config_RNN.exp_dir, config_RNN.model_name) model_dir_GPT = os.path.join(config_CVAE.exp_dir, config_CVAE.model_name) model_dir_CVAE = os.path.join(config_CVAE.exp_dir, config_CVAE.model_name) with open(os.path.join(dataset_path, dataset_filename), 'rb') as f: dataset = pickle.load(f) dataset = LightDarkDataset(config_CVAE, dataset) data, targets = collect_data(config_CVAE, dataset) # with open(os.path.join(dataset_path, 'light_dark_sample_len15.pickle'), 'rb') as f: # sample = pickle.load(f) # data, targets = sample['data'], sample['targets'] # model_RNN = RNN(config_RNN).to(device) model_GPT = GPT2(config_CVAE).to(device) model_CVAE = CVAE(config_CVAE).to(device) # optimizer_RNN = th.optim.AdamW(model_RNN.parameters(), # lr=config_RNN.learning_rate, # weight_decay=config_RNN.weight_decay) # optimizer_GPT = th.optim.AdamW(model_GPT.parameters(), # lr=config_GPT.learning_rate, # weight_decay=config_GPT.weight_decay) optimizer_CVAE = th.optim.AdamW(model_CVAE.parameters(), lr=config_CVAE.learning_rate, weight_decay=config_CVAE.weight_decay) if config_CVAE.optimizer == 'AdamW': # scheduler_RNN = th.optim.lr_scheduler.LambdaLR(optimizer_RNN, lambda step: min((step+1)/config_RNN.warmup_step, 1)) # scheduler_GPT = th.optim.lr_scheduler.LambdaLR(optimizer_GPT, lambda step: min((step+1)/config_GPT.warmup_step, 1)) scheduler_CVAE = th.optim.lr_scheduler.LambdaLR( optimizer_CVAE, lambda step: min( (step + 1) / config_CVAE.warmup_step, 1)) elif config_CVAE.optimizer == 'AdamWR': # scheduler_RNN = CosineAnnealingWarmUpRestarts( # optimizer=optimizer_RNN, # T_0=config_RNN.T_0, # T_mult=config_RNN.T_mult, # eta_max=config_RNN.lr_max, # T_up=config_RNN.warmup_step, # gamma=config_RNN.lr_mult # ) # scheduler_GPT = CosineAnnealingWarmUpRestarts( # optimizer=optimizer_GPT, # T_0=config_GPT.T_0, # T_mult=config_GPT.T_mult, # eta_max=config_GPT.lr_max, # T_up=config_GPT.warmup_step, # gamma=config_GPT.lr_mult # ) scheduler_CVAE = CosineAnnealingWarmUpRestarts( optimizer=optimizer_CVAE, T_0=config_CVAE.T_0, T_mult=config_CVAE.T_mult, eta_max=config_CVAE.lr_max, T_up=config_CVAE.warmup_step, gamma=config_CVAE.lr_mult) else: # |FIXME| using error?exception?logging? print( f'"{config_CVAE.optimizer}" is not support!! You should select "AdamW" or "AdamWR".' ) return # load checkpoint for resuming if config_CVAE.resume is not None: # filename_RNN = os.path.join(model_dir_RNN, config_RNN.resume) # filename_GPT = os.path.join(model_dir_GPT, config_GPT.resume) filename_CVAE = os.path.join(model_dir_CVAE, config_CVAE.resume) # if os.path.isfile(filename_RNN): # start_epoch_RNN, best_error_RNN, model_RNN, optimizer_RNN, scheduler_RNN = load_checkpoint(config_RNN, filename_RNN, model_RNN, optimizer_RNN, scheduler_RNN) # start_epoch_RNN += 1 # print("[RNN]Loaded checkpoint '{}' (epoch {})".format(config_RNN.resume, start_epoch_RNN)) # else: # # |FIXME| using error?exception?logging? # print("No checkpoint found at '{}'".format(config_RNN.resume)) # return # if os.path.isfile(filename_GPT): # start_epoch_GPT, best_error_GPT, model_GPT, optimizer_GPT, scheduler_GPT = load_checkpoint(config_GPT, filename_GPT, model_GPT, optimizer_GPT, scheduler_GPT) # start_epoch_GPT += 1 # print("[GPT]Loaded checkpoint '{}' (epoch {})".format(config_GPT.resume, start_epoch_GPT)) # else: # # |FIXME| using error?exception?logging? # print("No checkpoint found at '{}'".format(config_GPT.resume)) # return if os.path.isfile(filename_CVAE): start_epoch_CVAE, best_error_CVAE, model_CVAE, optimizer_CVAE, scheduler_CVAE = load_checkpoint( config_CVAE, filename_CVAE, model_CVAE, optimizer_CVAE, scheduler_CVAE) start_epoch_CVAE += 1 print("[CVAE]Loaded checkpoint '{}' (epoch {})".format( config_CVAE.resume, start_epoch_CVAE)) else: # |FIXME| using error?exception?logging? print("No checkpoint found at '{}'".format(config_CVAE.resume)) return # pred_RNN = [] pred_GPT = [] pred_CVAE = [] # total_time_RNN = 0. total_time_GPT = 0. total_time_CVAE = 0. for d in data: for i in range(config_CVAE.num_output): # tmp_pred_RNN, time_RNN = predict_action(config_RNN, model_RNN, d) # tmp_pred_GPT, time_GPT = predict_action(config_GPT, model_GPT, d) tmp_pred_CVAE, time_CVAE = predict_action(config_CVAE, model_CVAE, d) # pred_RNN.append(tmp_pred_RNN) # pred_GPT.append(tmp_pred_GPT) pred_CVAE.append(tmp_pred_CVAE) # total_time_RNN += time_RNN # total_time_GPT += time_GPT total_time_CVAE += time_CVAE targets = np.asarray(targets).reshape(-1, 2) # pred_RNN = np.asarray(pred_RNN).reshape(-1, 2) pred_GPT = np.asarray(pred_GPT).reshape(-1, 2) pred_CVAE = np.asarray(pred_CVAE).reshape(-1, 2) # print(f'Inference time for RNN: {total_time_RNN / (config_RNN.num_input * config_RNN.num_output)}') # print(f'Inference time for GPT: {total_time_GPT / (config_GPT.num_input * config_GPT.num_output)}') print( f'Inference time for CVAE: {total_time_CVAE / (config_CVAE.num_input * config_CVAE.num_output)}' ) plt.xlim(-7, 7) plt.ylim(-7, 7) plt.scatter(targets[:, 0], targets[:, 1], c='red') # plt.scatter(pred_RNN[:,0], pred_RNN[:,1], c='green') # plt.scatter(pred_GPT[:,0], pred_GPT[:,1], c='blue') plt.scatter(pred_CVAE[:, 0], pred_CVAE[:, 1], c='black') plt.show()