def build_dynamics_model(config): dyna_net = None dyna_net_type = config['dynamics_net']["model_type"] if dyna_net_type == "mlp": dyna_net = DynaNetMLP(config) elif dyna_net_type == "mlp_weighted": dyna_net = DynaNetMLPWeighted(config) elif dyna_net_type == "mlp_weight_matrix": dyna_net = DynaNetMLPWeightMatrix(config) else: raise ValueError("unsupported dynamics net type") return dyna_net
def build_visual_dynamics_model(config): vision_net = None dyna_net = None vision_net_type = config['vision_net']['model_type'] if vision_net_type == "PrecomputedDescriptorNet": vision_net = PrecomputedDescriptorNet(config) vision_net.initialize_weights() else: raise ValueError("unsupported vision net type") dyna_net_type = config['dynamics_net']["model_type"] if dyna_net_type == "mlp": dyna_net = DynaNetMLP(config) else: raise ValueError("unsupported dynamics net type") visual_dynamics_net = VisualDynamicsNet(config, vision_net, dyna_net) return visual_dynamics_net
def train_dynamics( config, train_dir, # str: directory to save output ): # set random seed for reproduction set_seed(config['train']['random_seed']) st_epoch = config['train'][ 'resume_epoch'] if config['train']['resume_epoch'] > 0 else 0 tee = Tee(os.path.join(train_dir, 'train_st_epoch_%d.log' % st_epoch), 'w') tensorboard_dir = os.path.join(train_dir, "tensorboard") if not os.path.exists(tensorboard_dir): os.makedirs(tensorboard_dir) writer = SummaryWriter(log_dir=tensorboard_dir) # save the config save_yaml(config, os.path.join(train_dir, "config.yaml")) print(config) # load the data episodes = load_episodes_from_config(config) action_function = ActionFunctionFactory.function_from_config(config) observation_function = ObservationFunctionFactory.function_from_config( config) datasets = {} dataloaders = {} data_n_batches = {} for phase in ['train', 'valid']: print("Loading data for %s" % phase) datasets[phase] = MultiEpisodeDataset( config, action_function=action_function, observation_function=observation_function, episodes=episodes, phase=phase) dataloaders[phase] = DataLoader( datasets[phase], batch_size=config['train']['batch_size'], shuffle=True if phase == 'train' else False, num_workers=config['train']['num_workers']) data_n_batches[phase] = len(dataloaders[phase]) use_gpu = torch.cuda.is_available() # compute normalization parameters if not starting from pre-trained network . . . ''' define model for dynamics prediction ''' model_dy = None if config['train']['resume_epoch'] >= 0: # if resume from a pretrained checkpoint state_dict_path = os.path.join( train_dir, 'net_dy_epoch_%d_iter_%d_state_dict.pth' % (config['train']['resume_epoch'], config['train']['resume_iter'])) print("Loading saved ckp from %s" % state_dict_path) # why is this needed if we already do torch.load??? model_dy.load_state_dict(torch.load(state_dict_path)) # don't we also need to load optimizer state from pre-trained??? else: # not starting from pre-trained create the network and compute the # normalization parameters model_dy = DynaNetMLP(config) # compute normalization params stats = datasets["train"].compute_dataset_statistics() obs_mean = stats['observations']['mean'] obs_std = stats['observations']['std'] observations_normalizer = DataNormalizer(obs_mean, obs_std) action_mean = stats['actions']['mean'] action_std = stats['actions']['std'] actions_normalizer = DataNormalizer(action_mean, action_std) model_dy.action_normalizer = actions_normalizer model_dy.state_normalizer = observations_normalizer print("model_dy #params: %d" % count_trainable_parameters(model_dy)) # criterion criterionMSE = nn.MSELoss() # optimizer params = model_dy.parameters() optimizer = optim.Adam(params, lr=config['train']['lr'], betas=(config['train']['adam_beta1'], 0.999)) scheduler = ReduceLROnPlateau(optimizer, 'min', factor=0.9, patience=10, verbose=True) if use_gpu: model_dy = model_dy.cuda() best_valid_loss = np.inf global_iteration = 0 epoch_counter_external = 0 try: for epoch in range(st_epoch, config['train']['n_epoch']): phases = ['train', 'valid'] epoch_counter_external = epoch writer.add_scalar("Training Params/epoch", epoch, global_iteration) for phase in phases: model_dy.train(phase == 'train') meter_loss_rmse = AverageMeter() # bar = ProgressBar(max_value=data_n_batches[phase]) loader = dataloaders[phase] for i, data in enumerate(loader): global_iteration += 1 with torch.set_grad_enabled(phase == 'train'): n_his, n_roll = config['train']['n_history'], config[ 'train']['n_rollout'] n_samples = n_his + n_roll if config['env']['type'] in ['PusherSlider']: states = data['observations'] actions = data['actions'] if use_gpu: states = states.cuda() actions = actions.cuda() # states, actions = data assert states.size(1) == n_samples # normalize states and actions once for entire rollout states = model_dy.state_normalizer.normalize( states) actions = model_dy.action_normalizer.normalize( actions) B = states.size(0) loss_mse = 0. # state_cur: B x n_his x state_dim state_cur = states[:, :n_his] for j in range(n_roll): state_des = states[:, n_his + j] # action_cur: B x n_his x action_dim action_cur = actions[:, j:j + n_his] if actions is not None else None # state_pred: B x state_dim # state_cur: B x n_his x state_dim # state_pred: B x state_dim state_pred = model_dy(state_cur, action_cur) loss_mse_cur = criterionMSE( state_pred, state_des) loss_mse += loss_mse_cur / n_roll # update state_cur # state_pred.unsqueeze(1): B x 1 x state_dim state_cur = torch.cat([ state_cur[:, 1:], state_pred.unsqueeze(1) ], 1) meter_loss_rmse.update(np.sqrt(loss_mse.item()), B) if phase == 'train': optimizer.zero_grad() loss_mse.backward() optimizer.step() if i % config['train']['log_per_iter'] == 0: log = '%s [%d/%d][%d/%d] LR: %.6f' % ( phase, epoch, config['train']['n_epoch'], i, data_n_batches[phase], get_lr(optimizer)) log += ', rmse: %.6f (%.6f)' % (np.sqrt( loss_mse.item()), meter_loss_rmse.avg) print(log) # log data to tensorboard # only do it once we have reached 500 iterations if global_iteration > 500: writer.add_scalar("Params/learning rate", get_lr(optimizer), global_iteration) writer.add_scalar("Loss/train", loss_mse.item(), global_iteration) writer.add_scalar("RMSE average loss/train", meter_loss_rmse.avg, global_iteration) if phase == 'train' and i % config['train'][ 'ckp_per_iter'] == 0: save_model( model_dy, '%s/net_dy_epoch_%d_iter_%d' % (train_dir, epoch, i)) log = '%s [%d/%d] Loss: %.6f, Best valid: %.6f' % ( phase, epoch, config['train']['n_epoch'], meter_loss_rmse.avg, best_valid_loss) print(log) if phase == 'valid': scheduler.step(meter_loss_rmse.avg) writer.add_scalar("RMSE average loss/valid", meter_loss_rmse.avg, global_iteration) if meter_loss_rmse.avg < best_valid_loss: best_valid_loss = meter_loss_rmse.avg save_model(model_dy, '%s/net_best_dy' % (train_dir)) writer.flush() # flush SummaryWriter events to disk except KeyboardInterrupt: # save network if we have a keyboard interrupt save_model( model_dy, '%s/net_dy_epoch_%d_keyboard_interrupt' % (train_dir, epoch_counter_external)) writer.flush() # flush SummaryWriter events to disk
def train_dynamics(config, data_path, train_dir): # access dict values as attributes config = edict(config) # set random seed for reproduction set_seed(config.train.random_seed) st_epoch = config.train.resume_epoch if config.train.resume_epoch > 0 else 0 tee = Tee(os.path.join(train_dir, 'train_st_epoch_%d.log' % st_epoch), 'w') print(config) datasets = {} dataloaders = {} data_n_batches = {} for phase in ['train', 'valid']: print("Loading data for %s" % phase) datasets[phase] = MultiEpisodeDataset(config, data_path, phase=phase) dataloaders[phase] = DataLoader( datasets[phase], batch_size=config.train.batch_size, shuffle=True if phase == 'train' else False, num_workers=config.train.num_workers) data_n_batches[phase] = len(dataloaders[phase]) use_gpu = torch.cuda.is_available() ''' define model for dynamics prediction ''' model_dy = DynaNetMLP(config) print("model_dy #params: %d" % count_trainable_parameters(model_dy)) if config.train.resume_epoch >= 0: # if resume from a pretrained checkpoint model_dy_path = os.path.join( train_dir, 'net_dy_epoch_%d_iter_%d.pth' % ( config.train.resume_epoch, config.train.resume_iter)) print("Loading saved ckp from %s" % model_dy_path) model_dy.load_state_dict(torch.load(model_dy_path)) # criterion criterionMSE = nn.MSELoss() # optimizer params = model_dy.parameters() optimizer = optim.Adam(params, lr=config.train.lr, betas=(config.train.adam_beta1, 0.999)) scheduler = ReduceLROnPlateau(optimizer, 'min', factor=0.9, patience=10, verbose=True) if use_gpu: model_dy = model_dy.cuda() best_valid_loss = np.inf for epoch in range(st_epoch, config.train.n_epoch): phases = ['train', 'valid'] for phase in phases: model_dy.train(phase == 'train') meter_loss_rmse = AverageMeter() bar = ProgressBar(max_value=data_n_batches[phase]) loader = dataloaders[phase] for i, data in bar(enumerate(loader)): if use_gpu: if isinstance(data, list): data = [d.cuda() for d in data] else: data = data.cuda() with torch.set_grad_enabled(phase == 'train'): n_his, n_roll = config.train.n_history, config.train.n_rollout n_samples = n_his + n_roll if config.env.type in ['PusherSlider']: states, actions = data assert states.size(1) == n_samples B = states.size(0) loss_mse = 0. # state_cur: B x n_his x state_dim state_cur = states[:, :n_his] for j in range(n_roll): state_des = states[:, n_his + j] # action_cur: B x n_his x action_dim action_cur = actions[:, j : j + n_his] if actions is not None else None # state_pred: B x state_dim state_pred = model_dy(state_cur, action_cur) loss_mse_cur = criterionMSE(state_pred, state_des) loss_mse += loss_mse_cur / config.train.n_rollout # update state_cur state_cur = torch.cat([state_cur[:, 1:], state_pred.unsqueeze(1)], 1) meter_loss_rmse.update(np.sqrt(loss_mse.item()), B) if phase == 'train': optimizer.zero_grad() loss_mse.backward() optimizer.step() if i % config.train.log_per_iter == 0: log = '%s [%d/%d][%d/%d] LR: %.6f' % ( phase, epoch, config.train.n_epoch, i, data_n_batches[phase], get_lr(optimizer)) log += ', rmse: %.6f (%.6f)' % ( np.sqrt(loss_mse.item()), meter_loss_rmse.avg) print(log) if phase == 'train' and i % config.train.ckp_per_iter == 0: torch.save(model_dy.state_dict(), '%s/net_dy_epoch_%d_iter_%d.pth' % (train_dir, epoch, i)) log = '%s [%d/%d] Loss: %.6f, Best valid: %.6f' % ( phase, epoch, config.train.n_epoch, meter_loss_rmse.avg, best_valid_loss) print(log) if phase == 'valid': scheduler.step(meter_loss_rmse.avg) if meter_loss_rmse.avg < best_valid_loss: best_valid_loss = meter_loss_rmse.avg torch.save(model_dy.state_dict(), '%s/net_best_dy.pth' % (train_dir))
def train_dynamics(config, train_dir, data_dir, model_dy, global_iteration, writer): # load the data multi_episode_dict = DrakeSimEpisodeReader.load_dataset( data_dir, load_image_data=False) ''' for episode_name in list(multi_episode_dict.keys()): print("episode name", episode_name) episode = multi_episode_dict[episode_name] obs = episode.get_observation(34) print(obs) ''' action_function = ActionFunctionFactory.function_from_config(config) observation_function = ObservationFunctionFactory.function_from_config( config) datasets = {} dataloaders = {} data_n_batches = {} for phase in ['train', 'valid']: print("Loading data for %s" % phase) datasets[phase] = MultiEpisodeDataset( config, action_function=action_function, observation_function=observation_function, episodes=multi_episode_dict, phase=phase) # print(config['train']) dataloaders[phase] = DataLoader( datasets[phase], batch_size=config['train']['batch_size'], shuffle=True if phase == 'train' else False, num_workers=config['train']['num_workers'], drop_last=True) data_n_batches[phase] = len(dataloaders[phase]) use_gpu = torch.cuda.is_available() ''' define model for dynamics prediction ''' if model_dy is None: model_dy = DynaNetMLP(config) # criterion MSELoss = nn.MSELoss() L1Loss = nn.L1Loss() # optimizer params = model_dy.parameters() lr = float(config['train']['lr']) optimizer = optim.Adam(params, lr=lr, betas=(config['train']['adam_beta1'], 0.999)) # setup scheduler sc = config['train']['lr_scheduler'] scheduler = None if config['train']['lr_scheduler']['enabled']: if config['train']['lr_scheduler']['type'] == "ReduceLROnPlateau": scheduler = ReduceLROnPlateau(optimizer, mode='min', factor=sc['factor'], patience=sc['patience'], threshold_mode=sc['threshold_mode'], cooldown=sc['cooldown'], verbose=True) elif config['train']['lr_scheduler']['type'] == "StepLR": step_size = config['train']['lr_scheduler']['step_size'] gamma = config['train']['lr_scheduler']['gamma'] scheduler = StepLR(optimizer, step_size=step_size, gamma=gamma) else: raise ValueError("unknown scheduler type: %s" % (config['train']['lr_scheduler']['type'])) if use_gpu: print("using gpu") model_dy = model_dy.cuda() best_valid_loss = np.inf counters = {'train': 0, 'valid': 0} try: for epoch in range(config['train']['n_epoch']): phases = ['train', 'valid'] writer.add_scalar("Training Params/epoch", epoch, global_iteration) for phase in phases: model_dy.train(phase == 'train') meter_loss_rmse = AverageMeter() step_duration_meter = AverageMeter() # bar = ProgressBar(max_value=data_n_batches[phase]) loader = dataloaders[phase] for i, data in enumerate(loader): loss_container = dict() # store the losses for this step step_start_time = time.time() global_iteration += 1 counters[phase] += 1 with torch.set_grad_enabled(phase == 'train'): n_his, n_roll = config['train']['n_history'], config[ 'train']['n_rollout'] n_samples = n_his + n_roll if DEBUG: print("global iteration: %d" % global_iteration) print("n_samples", n_samples) # [B, n_samples, obs_dim] observations = data['observations'] # [B, n_samples, action_dim] actions = data['actions'] B = actions.shape[0] if use_gpu: observations = observations.cuda() actions = actions.cuda() # states, actions = data assert actions.shape[1] == n_samples loss_mse = 0. # we don't have any visual observations, so states are observations states = observations # [B, n_his, state_dim] state_init = states[:, :n_his] # We want to rollout n_roll steps # actions = [B, n_his + n_roll, -1] # so we want action_seq.shape = [B, n_roll, -1] action_start_idx = 0 action_end_idx = n_his + n_roll - 1 action_seq = actions[:, action_start_idx: action_end_idx, :] if DEBUG: print("states.shape", states.shape) print("state_init.shape", state_init.shape) print("actions.shape", actions.shape) print("action_seq.shape", action_seq.shape) # try using models_dy.rollout_model instead of doing this manually rollout_data = rollout_model(state_init=state_init, action_seq=action_seq, dynamics_net=model_dy, compute_debug_data=False) # [B, n_roll, state_dim] state_rollout_pred = rollout_data['state_pred'] # [B, n_roll, state_dim] state_rollout_gt = states[:, n_his:] if DEBUG: print("state_rollout_gt.shape", state_rollout_gt.shape) print("state_rollout_pred.shape", state_rollout_pred.shape) # the loss function is between # [B, n_roll, state_dim] state_pred_err = state_rollout_pred - state_rollout_gt # everything is in 3D space now so no need to do any scaling # all the losses would be in meters . . . . loss_mse = MSELoss(state_rollout_pred, state_rollout_gt) loss_l1 = L1Loss(state_rollout_pred, state_rollout_gt) meter_loss_rmse.update(np.sqrt(loss_mse.item()), B) # compute losses at final step of the rollout mse_final_step = MSELoss(state_rollout_pred[:, -1, :], state_rollout_gt[:, -1, :]) l2_final_step = torch.norm(state_pred_err[:, -1], dim=-1).mean() l1_final_step = L1Loss(state_rollout_pred[:, -1, :], state_rollout_gt[:, -1, :]) loss_container['mse'] = loss_mse loss_container['l1'] = loss_l1 loss_container['mse_final_step'] = mse_final_step loss_container['l1_final_step'] = l1_final_step loss_container['l2_final_step'] = l2_final_step step_duration_meter.update(time.time() - step_start_time) if phase == 'train': optimizer.zero_grad() loss_mse.backward() optimizer.step() if i % config['train']['log_per_iter'] == 0: log = '%s %d [%d/%d][%d/%d] LR: %.6f' % ( phase, global_iteration, epoch, config['train']['n_epoch'], i, data_n_batches[phase], get_lr(optimizer)) log += ', rmse: %.6f (%.6f)' % (np.sqrt( loss_mse.item()), meter_loss_rmse.avg) log += ', step time %.6f' % (step_duration_meter.avg) step_duration_meter.reset() print(log) # log data to tensorboard # only do it once we have reached 100 iterations if global_iteration > 100: writer.add_scalar("Params/learning rate", get_lr(optimizer), global_iteration) writer.add_scalar("Loss_MSE/%s" % (phase), loss_mse.item(), global_iteration) writer.add_scalar("L1/%s" % (phase), loss_l1.item(), global_iteration) writer.add_scalar("RMSE average loss/%s" % (phase), meter_loss_rmse.avg, global_iteration) writer.add_scalar("n_taj", len(multi_episode_dict), global_iteration) for loss_type, loss_obj in loss_container.items(): plot_name = "Loss/%s/%s" % (loss_type, phase) writer.add_scalar(plot_name, loss_obj.item(), global_iteration) if phase == 'train' and global_iteration % config['train'][ 'ckp_per_iter'] == 0: save_model( model_dy, '%s/net_dy_iter_%d' % (train_dir, global_iteration)) log = '%s %d [%d/%d] Loss: %.6f, Best valid: %.6f' % ( phase, global_iteration, epoch, config['train']['n_epoch'], meter_loss_rmse.avg, best_valid_loss) print(log) if phase == "train": if (scheduler is not None) and ( config['train']['lr_scheduler']['type'] == "StepLR"): scheduler.step() if phase == 'valid': if (scheduler is not None) and ( config['train']['lr_scheduler']['type'] == "ReduceLROnPlateau"): scheduler.step(meter_loss_rmse.avg) if meter_loss_rmse.avg < best_valid_loss: best_valid_loss = meter_loss_rmse.avg save_model(model_dy, '%s/net_best_dy' % (train_dir)) writer.flush() # flush SummaryWriter events to disk except KeyboardInterrupt: # save network if we have a keyboard interrupt save_model( model_dy, '%s/net_dy_iter_%d_keyboard_interrupt' % (train_dir, global_iteration)) writer.flush() # flush SummaryWriter events to disk return model_dy, global_iteration
def test_dynanet_mlp(): # just try doing a single forward pass dataset, config = create_pusher_slider_dataset() stats = dataset.compute_dataset_statistics() n_history = config["train"]["n_history"] # obs_mean_repeat = stats['observations']['mean'].repeat(n_history, 1) # obs_std_repeat = stats['observations']['std'].repeat(n_history, 1) obs_mean_repeat = stats['observations']['mean'] obs_std_repeat = stats['observations']['std'] observations_normalizer = DataNormalizer(obs_mean_repeat, obs_std_repeat) # action_mean_repeat = stats['actions']['mean'].repeat(n_history, 1) # action_std_repeat = stats['actions']['std'].repeat(n_history, 1) action_mean_repeat = stats['actions']['mean'] action_std_repeat = stats['actions']['std'] actions_normalizer = DataNormalizer(action_mean_repeat, action_std_repeat) config["dataset"]["state_dim"] = 5 config["dataset"]["action_dim"] = 2 model = DynaNetMLP(config) # print summary of model before adding new modules print("\n\n -----summary of model BEFORE adding normalization modules") print("num trainable parameters", count_trainable_parameters(model)) print("num non-trainable parameters ", count_non_trainable_parameters(model)) print("\n\n") # summary of model after adding new params model.set_action_normalizer(actions_normalizer) model.set_state_normalizer(observations_normalizer) print("\n\n -----summary of model AFTER adding normalization modules") print("num trainable parameters", count_trainable_parameters(model)) print("num non-trainable parameters ", count_non_trainable_parameters(model)) print("\n\n") # unsqueeze to mimic dataloader with batch size of 1 data = dataset[0] # test the getitem observations = data['observations'].unsqueeze(0) actions = data['actions'].unsqueeze(0) obs_slice = observations[:, :n_history, :] action_slice = actions[:, :n_history, :] print("action_slice.shape", action_slice.shape) print("obs_slice.shape", obs_slice.shape) # run the model forwards one timestep output = model.forward(obs_slice, action_slice) print("output.shape", output.shape) # save the model with torch.save and torch.load save_dir = os.path.join(get_project_root(), 'sandbox') model_save_file = os.path.join(save_dir, "model.pth") torch.save(model, model_save_file) # load the model model_load = torch.load(model_save_file) print("\n\n -----summary of model LOADED from disk") print("num trainable parameters", count_trainable_parameters(model_load)) print("num non-trainable parameters ", count_non_trainable_parameters(model_load)) print("\n\n") # now try doing the same but with the state dict # my hunch is that this won't work . . . params_save_file = os.path.join(save_dir, "model_params.pth") torch.save(model.state_dict(), params_save_file) # load the model model_load = DynaNetMLP(config) state_dict = torch.load(params_save_file) for param_tensor in state_dict: print(param_tensor, "\t", state_dict[param_tensor].size()) # try creating some dummy DataNormalizer objects # model_load.set_state_normalizer(DataNormalizer(0.0,1.0)) # model_load.set_action_normalizer(DataNormalizer(0.0,1.0)) model_load.load_state_dict(state_dict) print("\n\n -----summary of model LOADED from disk with state_dict method") print("num trainable parameters", count_trainable_parameters(model_load)) print("num non-trainable parameters ", count_non_trainable_parameters(model_load)) print("\n\n") print("model_load._action_normalizer._mean", model_load.action_normalizer._mean) print("model._action_normalizer._mean", model.action_normalizer._mean)
def mpc_w_learned_dynamics(config, train_dir, mpc_dir, state_dict_path=None, keypoint_observation=False): # set random seed for reproduction set_seed(config['train']['random_seed']) tee = Tee(os.path.join(mpc_dir, 'mpc.log'), 'w') print(config) use_gpu = torch.cuda.is_available() ''' model ''' if config['dynamics']['model_type'] == 'mlp': model_dy = DynaNetMLP(config) else: raise AssertionError("Unknown model type %s" % config['dynamics']['model_type']) # print model #params print("model #params: %d" % count_trainable_parameters(model_dy)) if state_dict_path is None: if config['mpc']['mpc_dy_epoch'] == -1: state_dict_path = os.path.join(train_dir, 'net_best_dy.pth') else: state_dict_path = os.path.join( train_dir, 'net_dy_epoch_%d_iter_%d.pth' % \ (config['mpc']['mpc_dy_epoch'], config['mpc']['mpc_dy_iter'])) print("Loading saved ckp from %s" % state_dict_path) model_dy.load_state_dict(torch.load(state_dict_path)) model_dy.eval() if use_gpu: model_dy.cuda() criterionMSE = nn.MSELoss() # generate action/observation functions action_function = ActionFunctionFactory.function_from_config(config) observation_function = ObservationFunctionFactory.function_from_config( config) # planner planner = planner_from_config(config) ''' env ''' # set up goal obs_goals = np.array([[ 262.9843, 267.3102, 318.9369, 351.1229, 360.2048, 323.5128, 305.6385, 240.4460, 515.4230, 347.8708 ], [ 381.8694, 273.6327, 299.6685, 331.0925, 328.7724, 372.0096, 411.0972, 314.7053, 517.7299, 268.4953 ], [ 284.8728, 275.7985, 374.0677, 320.4990, 395.4019, 275.4633, 306.2896, 231.4310, 507.0849, 312.4057 ], [ 313.1638, 271.4258, 405.0255, 312.2325, 424.7874, 266.3525, 333.6973, 225.7708, 510.1232, 305.3802 ], [ 308.6859, 270.9629, 394.2789, 323.2781, 419.7905, 280.1602, 333.8901, 228.1624, 519.1964, 321.5318 ], [ 386.8067, 284.8947, 294.2467, 323.2223, 313.3221, 368.9970, 405.9415, 330.9298, 495.9970, 268.9920 ], [ 432.0219, 299.6021, 340.8581, 339.4676, 360.2354, 384.5515, 451.4394, 345.2190, 514.6357, 291.2043 ], [ 351.3389, 264.5325, 267.5279, 318.2321, 293.7460, 360.0423, 378.4428, 306.9586, 516.4390, 259.7810 ], [ 521.1902, 254.0693, 492.7884, 349.7861, 539.6320, 364.5190, 569.2258, 268.8824, 506.9431, 286.9752 ], [ 264.8554, 275.9547, 338.1317, 345.3435, 372.7012, 308.4648, 299.3454, 239.9245, 506.2117, 373.8413 ]]) for mpc_idx in range(config['mpc']['num_episodes']): if keypoint_observation: mpc_episode_keypoint_observation(config, mpc_idx, model_dy, mpc_dir, planner, obs_goals[mpc_idx], action_function, observation_function, use_gpu=use_gpu) else: # not supported for now raise AssertionError("currently only support keypoint observation")
def eval_dynamics(config, train_dir, eval_dir, state_dict_path=None, keypoint_observation=False, debug=False, render_human=False): # set random seed for reproduction set_seed(config['train']['random_seed']) tee = Tee(os.path.join(eval_dir, 'eval.log'), 'w') print(config) use_gpu = torch.cuda.is_available() ''' model ''' model_dy = DynaNetMLP(config) # print model #params print("model #params: %d" % count_trainable_parameters(model_dy)) if state_dict_path is None: if config['eval']['eval_dy_epoch'] == -1: state_dict_path = os.path.join(train_dir, 'net_best_dy.pth') else: state_dict_path = os.path.join( train_dir, 'net_dy_epoch_%d_iter_%d.pth' % \ (config['eval']['eval_dy_epoch'], config['eval']['eval_dy_iter'])) print("Loading saved ckp from %s" % state_dict_path) model_dy.load_state_dict(torch.load(state_dict_path)) model_dy.eval() if use_gpu: model_dy.cuda() criterionMSE = nn.MSELoss() bar = ProgressBar() st_idx = config['eval']['eval_st_idx'] ed_idx = config['eval']['eval_ed_idx'] # load the data episodes = load_episodes_from_config(config) # generate action/observation functions action_function = ActionFunctionFactory.function_from_config(config) observation_function = ObservationFunctionFactory.function_from_config( config) dataset = MultiEpisodeDataset(config, action_function=action_function, observation_function=observation_function, episodes=episodes, phase="valid") episode_names = dataset.get_episode_names() episode_names.sort() num_episodes = None # for backwards compatibility if "num_episodes" in config["eval"]: num_episodes = config["eval"]["num_episodes"] else: num_episodes = 10 episode_list = [] if debug: episode_list = [episode_names[0]] else: episode_list = episode_names[:num_episodes] for roll_idx, episode_name in enumerate(episode_list): print("episode_name", episode_name) if keypoint_observation: eval_episode_keypoint_observations(config, dataset, episode_name, roll_idx, model_dy, eval_dir, start_idx=9, n_prediction=30, render_human=render_human) else: eval_episode(config, dataset, episode_name, roll_idx, model_dy, eval_dir, start_idx=9, n_prediction=30, render_human=render_human)