def rollout_action_sequence( env, # gym env action_seq, # [N, action_dim] ): """ Rollout the action sequence using the simulator Record the observations """ episode_reader = OnlineEpisodeReader() obs_list = [] N = action_seq.shape[0] for i in range(N): action = action_seq[i] obs, reward, done, info = env.step(action) episode_reader.add_observation_action(obs, action) obs_list.append(obs) return { 'observations': obs_list, 'episode_reader': episode_reader, }
def main(): # load dynamics model model_dict = load_model_state_dict() model = model_dict['model_dy'] model_dd = model_dict['model_dd'] config = model.config env_config = load_yaml(os.path.join(get_project_root(), 'experiments/exp_20_mugs/config.yaml')) env_config['env']['observation']['depth_int16'] = True n_history = config['train']['n_history'] initial_cond = generate_initial_condition(env_config, push_length=PUSH_LENGTH) env_config = initial_cond['config'] # enable the right observations camera_name = model_dict['metadata']['camera_name'] spatial_descriptor_data = model_dict['spatial_descriptor_data'] ref_descriptors = spatial_descriptor_data['spatial_descriptors'] K = ref_descriptors.shape[0] ref_descriptors = torch.Tensor(ref_descriptors).cuda() # put them on the GPU print("ref_descriptors\n", ref_descriptors) print("ref_descriptors.shape", ref_descriptors.shape) # create the environment # create the environment env = DrakeMugsEnv(env_config) env.reset() T_world_camera = env.camera_pose(camera_name) camera_K_matrix = env.camera_K_matrix(camera_name) # create another environment for doing rollouts env2 = DrakeMugsEnv(env_config, visualize=False) env2.reset() action_function = ActionFunctionFactory.function_from_config(config) observation_function = ObservationFunctionFactory.drake_pusher_position_3D(config) visual_observation_function = \ VisualObservationFunctionFactory.descriptor_keypoints_3D(config=config, camera_name=camera_name, model_dd=model_dd, ref_descriptors=ref_descriptors, K_matrix=camera_K_matrix, T_world_camera=T_world_camera, ) episode = OnlineEpisodeReader() mpc_input_builder = DynamicsModelInputBuilder(observation_function=observation_function, visual_observation_function=visual_observation_function, action_function=action_function, episode=episode) vis = meshcat_utils.make_default_visualizer_object() vis.delete() reset_environment(env, initial_cond['q_pusher'], initial_cond['q_slider']) obs_init = env.get_observation() #### ROLLOUT USING LEARNED MODEL + GROUND TRUTH ACTIONS ############ reset_environment(env, initial_cond['q_pusher'], initial_cond['q_slider']) # add just some large number of these episode.clear() for i in range(n_history): action_zero = np.zeros(2) obs_tmp = env.get_observation() episode.add_observation_action(obs_tmp, action_zero) def goal_func(obs_tmp): state_tmp = mpc_input_builder.get_state_input_single_timestep({'observation': obs_tmp})['state'] return model.compute_z_state(state_tmp.unsqueeze(0))['z_object'].flatten() # idx = episode.get_latest_idx() obs_raw = episode.get_observation(idx) z_object_goal = goal_func(obs_raw) z_keypoints_init_W = keypoints_3D_from_dynamics_model_output(z_object_goal, K) z_keypoints_init_W = torch_utils.cast_to_numpy(z_keypoints_init_W) z_keypoints_obj = keypoints_world_frame_to_object_frame(z_keypoints_init_W, T_W_obj=slider_pose_from_observation(obs_init)) color = [1, 0, 0] meshcat_utils.visualize_points(vis=vis, name="keypoints_W", pts=z_keypoints_init_W, color=color, size=0.02, ) # input("press Enter to continue") # rollout single action sequence using the simulator action_sequence_np = torch_utils.cast_to_numpy(initial_cond['action_sequence']) N = action_sequence_np.shape[0] obs_rollout_gt = env_utils.rollout_action_sequence(env, action_sequence_np)[ 'observations'] # using the vision model to get "goal" keypoints z_object_goal = goal_func(obs_rollout_gt[-1]) z_object_goal_np = torch_utils.cast_to_numpy(z_object_goal) z_keypoints_goal = keypoints_3D_from_dynamics_model_output(z_object_goal, K) z_keypoints_goal = torch_utils.cast_to_numpy(z_keypoints_goal) # visualize goal keypoints color = [0, 1, 0] meshcat_utils.visualize_points(vis=vis, name="goal_keypoints", pts=z_keypoints_goal, color=color, size=0.02, ) # input("press Enter to continue") #### ROLLOUT USING LEARNED MODEL + GROUND TRUTH ACTIONS ############ reset_environment(env, initial_cond['q_pusher'], initial_cond['q_slider']) # add just some large number of these episode.clear() for i in range(n_history): action_zero = np.zeros(2) obs_tmp = env.get_observation() episode.add_observation_action(obs_tmp, action_zero) # [n_history, state_dim] idx = episode.get_latest_idx() dyna_net_input = mpc_input_builder.get_dynamics_model_input(idx, n_history=n_history) state_init = dyna_net_input['states'].cuda() # [n_history, state_dim] action_init = dyna_net_input['actions'] # [n_history, action_dim] print("state_init.shape", state_init.shape) print("action_init.shape", action_init.shape) action_seq_gt_torch = torch_utils.cast_to_torch(initial_cond['action_sequence']) action_input = torch.cat((action_init[:(n_history-1)], action_seq_gt_torch), dim=0).cuda() print("action_input.shape", action_input.shape) # rollout using the ground truth actions and learned model # need to add the batch dim to do that z_init = model.compute_z_state(state_init)['z'] rollout_pred = rollout_model(state_init=z_init.unsqueeze(0), action_seq=action_input.unsqueeze(0), dynamics_net=model, compute_debug_data=True) state_pred_rollout = rollout_pred['state_pred'] print("state_pred_rollout.shape", state_pred_rollout.shape) for i in range(N): # vis GT for now name = "GT_3D/%d" % (i) T_W_obj = slider_pose_from_observation(obs_rollout_gt[i]) # print("T_W_obj", T_W_obj) # green color = np.array([0, 1, 0]) * get_color_intensity(i, N) meshcat_utils.visualize_points(vis=vis, name=name, pts=z_keypoints_obj, color=color, size=0.01, T=T_W_obj) # red color = np.array([0, 0, 1]) * get_color_intensity(i, N) state_pred = state_pred_rollout[:, i, :] pts_pred = keypoints_3D_from_dynamics_model_output(state_pred, K).squeeze() pts_pred = pts_pred.detach().cpu().numpy() name = "pred_3D/%d" % (i) meshcat_utils.visualize_points(vis=vis, name=name, pts=pts_pred, color=color, size=0.01, ) # input("finished visualizing GT rollout\npress Enter to continue") index_dict = get_object_and_robot_state_indices(config) object_indices = index_dict['object_indices'] # reset the environment and use the MPC controller to stabilize this # now setup the MPC to try to stabilize this . . . . reset_environment(env, initial_cond['q_pusher'], initial_cond['q_slider']) episode.clear() # add just some large number of these for i in range(n_history): action_zero = np.zeros(2) obs_tmp = env.get_observation() episode.add_observation_action(obs_tmp, action_zero) # input("press Enter to continue") # make a planner config planner_config = copy.copy(config) config_tmp = load_yaml(os.path.join(get_project_root(), 'experiments/drake_pusher_slider/eval_config.yaml')) planner_config['mpc'] = config_tmp['mpc'] planner = None if PLANNER_TYPE == "random_shooting": planner = RandomShootingPlanner(planner_config) elif PLANNER_TYPE == "mppi": planner = PlannerMPPI(planner_config) else: raise ValueError("unknown planner type: %s" % (PLANNER_TYPE)) mpc_out = None action_seq_mpc = None state_pred_mpc = None counter = -1 while True: counter += 1 print("\n\n-----Running MPC Optimization: Counter (%d)-------" % (counter)) obs_cur = env.get_observation() episode.add_observation_only(obs_cur) if counter == 0 or REPLAN: print("replanning") ####### Run the MPC ########## # [1, state_dim] n_look_ahead = N - counter if USE_FIXED_MPC_HORIZON: n_look_ahead = MPC_HORIZON if n_look_ahead == 0: break # start_time = time.time() # idx of current observation idx = episode.get_latest_idx() mpc_start_time = time.time() mpc_input_data = mpc_input_builder.get_dynamics_model_input(idx, n_history=n_history) state_cur = mpc_input_data['states'] action_his = mpc_input_data['actions'] if mpc_out is not None: action_seq_rollout_init = mpc_out['action_seq'][1:] else: action_seq_rollout_init = None # run MPPI z_cur = None with torch.no_grad(): z_cur = model.compute_z_state(state_cur.unsqueeze(0).cuda())['z'].squeeze(0) mpc_out = planner.trajectory_optimization(state_cur=z_cur, action_his=action_his, obs_goal=z_object_goal_np, model_dy=model, action_seq_rollout_init=action_seq_rollout_init, n_look_ahead=n_look_ahead, eval_indices=object_indices, rollout_best_action_sequence=True, verbose=True, ) print("MPC step took %.4f seconds" %(time.time() - mpc_start_time)) action_seq_mpc = mpc_out['action_seq'].cpu().numpy() # Rollout with ground truth simulator dynamics action_seq_mpc = torch_utils.cast_to_numpy(mpc_out['action_seq']) env2.set_simulator_state_from_observation_dict(env2.get_mutable_context(), obs_cur) obs_mpc_gt = env_utils.rollout_action_sequence(env2, action_seq_mpc)['observations'] state_pred_mpc = torch_utils.cast_to_numpy(mpc_out['state_pred']) vis['mpc_3D'].delete() vis['mpc_GT_3D'].delete() L = len(obs_mpc_gt) print("L", L) if L == 0: break for i in range(L): # red color = np.array([1, 0, 0]) * get_color_intensity(i, L) state_pred = state_pred_mpc[i, :] state_pred = np.expand_dims(state_pred, 0) # may need to expand dims here pts_pred = keypoints_3D_from_dynamics_model_output(state_pred, K).squeeze() name = "mpc_3D/%d" % (i) meshcat_utils.visualize_points(vis=vis, name=name, pts=pts_pred, color=color, size=0.01, ) # ground truth rollout of the MPC action_seq name = "mpc_GT_3D/%d" % (i) T_W_obj = slider_pose_from_observation(obs_mpc_gt[i]) # green color = np.array([1, 1, 0]) * get_color_intensity(i, L) meshcat_utils.visualize_points(vis=vis, name=name, pts=z_keypoints_obj, color=color, size=0.01, T=T_W_obj) action_cur = action_seq_mpc[0] print("action_cur", action_cur) # print("action_GT", initial_cond['action']) input("press Enter to continue") # add observation actions to the episode obs_cur = env.get_observation() episode.replace_observation_action(obs_cur, action_cur) # step the simulator env.step(action_cur) # visualize current keypoint positions obs_cur = env.get_observation() T_W_obj = slider_pose_from_observation(obs_cur) # yellow color = np.array([1, 1, 0]) meshcat_utils.visualize_points(vis=vis, name="keypoint_cur", pts=z_keypoints_obj, color=color, size=0.02, T=T_W_obj) action_seq_mpc = action_seq_mpc[1:] state_pred_mpc = state_pred_mpc[1:] obs_final = env.get_observation() pose_error = compute_pose_error(obs_rollout_gt[-1], obs_final) print("position_error: %.3f" %(pose_error['position_error'])) print("angle error degrees: %.3f" %(pose_error['angle_error_degrees']))
def evaluate_mpc_z_state( model_dir, config_planner_mpc=None, save_dir=None, planner_type=None, env_config=None, strict=True, generate_initial_condition_func=None, env_type="DrakePusherSliderEnv", ): assert save_dir is not None assert planner_type is not None assert env_config is not None assert generate_initial_condition_func is not None model_dict = load_model(model_dir, strict=strict) model_dy = model_dict['model_dy']['model_dy'] config = model_dict['model_dy']['config'] model_config = config model_kp = model_dict['model_kp']['model'] config_kp = model_kp.config camera_name = config_kp['perception']['camera_name'] # create the environment # create the environment env = None if env_type == "DrakePusherSliderEnv": env = DrakePusherSliderEnv(env_config, visualize=False) elif env_type == "DrakeMugsEnv": env = DrakeMugsEnv(env_config, visualize=False) else: raise ValueError("unknown env type: %s" % (env_type)) env.reset() T_world_camera = env.camera_pose(camera_name) camera_K_matrix = env.camera_K_matrix(camera_name) mask_labels = env.get_labels_to_mask_list() action_function = ActionFunctionFactory.function_from_config(config) observation_function = ObservationFunctionFactory.drake_pusher_position_3D( config) visual_observation_function = \ VisualObservationFunctionFactory.function_from_config(config, camera_name=camera_name, model_kp=model_kp, K_matrix=camera_K_matrix, T_world_camera=T_world_camera, mask_labels=mask_labels) episode = OnlineEpisodeReader() mpc_input_builder = DynamicsModelInputBuilder( observation_function=observation_function, visual_observation_function=visual_observation_function, action_function=action_function, episode=episode) def goal_func(obs_tmp): state_tmp = mpc_input_builder.get_state_input_single_timestep( {'observation': obs_tmp})['state'] # z_dict= model_dy.compute_z_state(state_tmp.unsqueeze(0)) # print("z_dict['z_object'].shape", z_dict['z_object'].shape) return model_dy.compute_z_state( state_tmp.unsqueeze(0))['z_object_flat'] index_dict = get_object_and_robot_state_indices(model_config) object_indices = index_dict['object_indices'] # make a planner config, same as model config but with mpc and eval sections # replaced planner_config = copy.copy(model_config) if config_planner_mpc is not None: planner_config['mpc'] = config_planner_mpc['mpc'] planner_config['eval'] = config_planner_mpc['eval'] planner = None if planner_type == "random_shooting": planner = RandomShootingPlanner(planner_config) elif planner_type == "mppi": planner = PlannerMPPI(planner_config) else: raise ValueError("unknown planner type: %s" % (planner_type)) # run a single iteration mpc_eval_drake_pusher_slider.evaluate_mpc( model_dy=model_dy, env=env, episode=episode, mpc_input_builder=mpc_input_builder, planner=planner, eval_indices=object_indices, goal_func=goal_func, config=planner_config, wait_for_user_input=False, save_dir=save_dir, model_name="test", experiment_name="test", generate_initial_condition_func=generate_initial_condition_func) return {'save_dir': save_dir}
def _on_plan_msg(self, msg): """ Received a plan - apply the 'visual_observation_function' to each image in the plan - store it in the same data struct - (maybe) make a 'Plan' object? this is probably wise - setup a new controller? maybe just re-use the old one? maybe wrap in try/finally block so we don't leave the python side hanging? :param msg: :type msg: :return: :rtype: """ assert self._state == ControllerState.STOPPED print("\n\n----- RECEIVED PLAN ------") plan_data = msg['data'] msg_data = msg['data'] print("len(plan_data)", len(plan_data)) self._state_dict = dict() sd = self._state_dict episode = OnlineEpisodeReader(no_copy=True) sd['episode'] = episode input_builder = DynamicsModelInputBuilder( observation_function=self._observation_function, visual_observation_function=self._visual_observation_function, action_function=self._action_function, episode=episode) plan_container = PlanContainer( data=msg_data, dynamics_model_input_builder=input_builder, model_dy=self._model_dy, debug=False) self._state_dict = { 'episode': episode, 'input_builder': input_builder, 'plan_msg': msg, 'plan': plan_container, 'action_counter': 0, 'timestamp_system': [], # list of when we took the actions 'mpc_out': None, } self._state = ControllerState.PLAN_READY resp = { 'type': 'PLAN', 'data': 'READY', } print("----FINISHED PROCESSING PLAN-----") return resp
def evaluate_mpc(model_dir, config_planner_mpc=None, save_dir=None, planner_type=None, env_config=None, strict=True, generate_initial_condition_func=None, ): assert save_dir is not None assert planner_type is not None assert env_config is not None assert generate_initial_condition_func is not None model_data = model_builder.load_dynamics_model_from_folder(model_dir, strict=strict) model_dy = model_data['model_dy'] model_dy = model_dy.eval() model_dy = model_dy.cuda() model_config = model_dy.config # create the environment env = DrakePusherSliderEnv(env_config, visualize=False) env.reset() observation_function = ObservationFunctionFactory.function_from_config(model_config) action_function = ActionFunctionFactory.function_from_config(model_config) episode = OnlineEpisodeReader() mpc_input_builder = DynamicsModelInputBuilder(observation_function=observation_function, action_function=action_function, visual_observation_function=None, episode=episode) pts_GT = np.array(model_config['dataset']['observation_function']['GT_3D_object_points']) K = pts_GT.shape[0] # num keypoints eval_indices = np.arange(3 * K) # extract the keypoints def goal_func(obs_local): """ Helper function for getting the goal state from an observation """ return observation_function(obs_local)[eval_indices] # make a planner config, same as model config but with mpc and eval sections # replaced planner_config = copy.copy(model_config) if config_planner_mpc is not None: planner_config['mpc'] = config_planner_mpc['mpc'] planner_config['eval'] = config_planner_mpc['eval'] planner = None if planner_type == "random_shooting": planner = RandomShootingPlanner(planner_config) elif planner_type == "mppi": planner = PlannerMPPI(planner_config) else: raise ValueError("unknown planner type: %s" %(planner_type)) # run a single iteration mpc_eval_drake_pusher_slider.evaluate_mpc(model_dy=model_dy, env=env, episode=episode, mpc_input_builder=mpc_input_builder, planner=planner, eval_indices=eval_indices, goal_func=goal_func, config=planner_config, wait_for_user_input=False, save_dir=save_dir, model_name="test", experiment_name="test", generate_initial_condition_func=generate_initial_condition_func, ) return {'save_dir': save_dir}
def evaluate_mpc( model_dir, config_planner_mpc=None, save_dir=None, planner_type=None, env_config=None, strict=True, generate_initial_condition_func=None, ): assert save_dir is not None assert planner_type is not None assert env_config is not None assert generate_initial_condition_func is not None model_data = load_model(model_dir, strict=strict) model_dy = model_data['model_dy'] model_config = model_dy.config config = model_config model_dd = model_data['model_dd'] # create the environment env = DrakePusherSliderEnv(env_config, visualize=False) env.reset() camera_name = model_data['metadata']['camera_name'] # sanity check camera_name_in_training = model_config['dataset'][ 'visual_observation_function']['camera_name'] assert camera_name == camera_name_in_training, "camera_names don't match: camera_name = %s, camera_name_in_trainig = %s" % ( camera_name, camera_name_in_training) T_world_camera = env.camera_pose(camera_name) camera_K_matrix = env.camera_K_matrix(camera_name) spatial_descriptor_data = model_data['spatial_descriptor_data'] ref_descriptors = torch.Tensor( spatial_descriptor_data['spatial_descriptors']).cuda() K = ref_descriptors.shape[0] action_function = ActionFunctionFactory.function_from_config(config) observation_function = ObservationFunctionFactory.drake_pusher_position_3D( config) visual_observation_function = \ VisualObservationFunctionFactory.function_from_config(config, camera_name=camera_name, model_dd=model_dd, ref_descriptors=ref_descriptors, K_matrix=camera_K_matrix, T_world_camera=T_world_camera, ) episode = OnlineEpisodeReader() mpc_input_builder = DynamicsModelInputBuilder( observation_function=observation_function, visual_observation_function=visual_observation_function, action_function=action_function, episode=episode) def goal_func(obs_local): keypoints_dict = visual_observation_function(obs_local) return keypoints_dict['tensor'] eval_indices = np.arange(3 * K) # extract the keypoints # make a planner config, same as model config but with mpc and eval sections # replaced planner_config = copy.copy(model_config) if config_planner_mpc is not None: planner_config['mpc'] = config_planner_mpc['mpc'] planner_config['eval'] = config_planner_mpc['eval'] planner = None if planner_type == "random_shooting": planner = RandomShootingPlanner(planner_config) elif planner_type == "mppi": planner = PlannerMPPI(planner_config) else: raise ValueError("unknown planner type: %s" % (planner_type)) # run a single iteration mpc_eval_drake_pusher_slider.evaluate_mpc( model_dy=model_dy, env=env, episode=episode, mpc_input_builder=mpc_input_builder, planner=planner, eval_indices=eval_indices, goal_func=goal_func, config=planner_config, wait_for_user_input=False, save_dir=save_dir, model_name="test", experiment_name="test", generate_initial_condition_func=generate_initial_condition_func) return {'save_dir': save_dir}
def main(): # load dynamics model model_dict = load_autoencoder_model() model = model_dict['model_dy'] model_ae = model_dict['model_ae'] visual_observation_function = model_dict['visual_observation_function'] config = model.config env_config = load_yaml( os.path.join(get_project_root(), 'experiments/exp_18_box_on_side/config.yaml')) env_config['env']['observation']['depth_int16'] = True n_history = config['train']['n_history'] # create the environment # create the environment env = DrakePusherSliderEnv(env_config) env.reset() # create another environment for doing rollouts env2 = DrakePusherSliderEnv(env_config, visualize=False) env2.reset() action_function = ActionFunctionFactory.function_from_config(config) observation_function = ObservationFunctionFactory.drake_pusher_position_3D( config) episode = OnlineEpisodeReader() mpc_input_builder = DynamicsModelInputBuilder( observation_function=observation_function, visual_observation_function=visual_observation_function, action_function=action_function, episode=episode) vis = meshcat_utils.make_default_visualizer_object() vis.delete() initial_cond = get_initial_state() reset_environment(env, initial_cond['q_pusher'], initial_cond['q_slider']) obs_init = env.get_observation() # visualize starting position of the object print("obs_init.keys()", obs_init.keys()) print("obs_init['slider']['position']", obs_init['slider']['position']) T = DrakePusherSliderEnv.object_position_from_observation(obs_init) vis['start_pose'].set_object(triad(scale=0.1)) vis['state_pose'].set_transform(T) #### ROLLOUT USING LEARNED MODEL + GROUND TRUTH ACTIONS ############ reset_environment(env, initial_cond['q_pusher'], initial_cond['q_slider']) # add just some large number of these episode.clear() for i in range(n_history): action_zero = np.zeros(2) obs_tmp = env.get_observation() episode.add_observation_action(obs_tmp, action_zero) #### ROLLOUT THE ACTION SEQUENCE USING THE SIMULATOR ########## # rollout single action sequence using the simulator gt_rollout_data = env_utils.rollout_action_sequence( env, initial_cond['action_sequence'].cpu().numpy()) env_obs_rollout_gt = gt_rollout_data['observations'] gt_rollout_episode = gt_rollout_data['episode_reader'] for i, env_obs in enumerate(gt_rollout_data['observations']): T = DrakePusherSliderEnv.object_position_from_observation(env_obs) vis_name = "GT_trajectory/%d" % (i) vis[vis_name].set_object(triad(scale=0.1)) vis[vis_name].set_transform(T) action_state_gt = mpc_input_builder.get_action_state_tensors( start_idx=0, num_timesteps=N, episode=gt_rollout_episode) state_rollout_gt = action_state_gt['states'] action_rollout_gt = action_state_gt['actions'] z_object_rollout_gt = model.compute_z_state( state_rollout_gt)['z_object_flat'] print('state_rollout_gt.shape', state_rollout_gt.shape) print("z_object_rollout_gt.shape", z_object_rollout_gt.shape) def goal_func(obs_tmp): state_tmp = mpc_input_builder.get_state_input_single_timestep( {'observation': obs_tmp})['state'] return model.compute_z_state( state_tmp.unsqueeze(0))['z_object'].flatten() # using the vision model to get "goal" keypoints z_object_goal = goal_func(env_obs_rollout_gt[-1]) z_object_goal_np = torch_utils.cast_to_numpy(z_object_goal) # input("press Enter to continue") #### ROLLOUT USING LEARNED MODEL + GROUND TRUTH ACTIONS ############ reset_environment(env, initial_cond['q_pusher'], initial_cond['q_slider']) # add just some large number of these episode.clear() for i in range(n_history): action_zero = np.zeros(2) obs_tmp = env.get_observation() episode.add_observation_action(obs_tmp, action_zero) # [n_history, state_dim] idx = episode.get_latest_idx() dyna_net_input = mpc_input_builder.get_dynamics_model_input( idx, n_history=n_history) state_init = dyna_net_input['states'].cuda() # [n_history, state_dim] action_init = dyna_net_input['actions'] # [n_history, action_dim] print("state_init.shape", state_init.shape) print("action_init.shape", action_init.shape) print("n_history", n_history) action_seq_gt_torch = initial_cond['action_sequence'] action_input = torch.cat( (action_init[:(n_history - 1)], action_seq_gt_torch), dim=0).cuda() print("action_input.shape", action_input.shape) # rollout using the ground truth actions and learned model # need to add the batch dim to do that z_init = model.compute_z_state(state_init)['z'] rollout_pred = rollout_model(state_init=z_init.unsqueeze(0), action_seq=action_input.unsqueeze(0), dynamics_net=model, compute_debug_data=True) state_pred_rollout = rollout_pred['state_pred'].squeeze(0) print("state_pred_rollout.shape", state_pred_rollout.shape) # input("press Enter to continue") # check L2 distance between predicted and actual # basically comparing state_pred_rollout and state_rollout_gt print("state_rollout_gt[-1]\n", state_rollout_gt[-1]) print("state_pred_rollout[-1]\n", state_pred_rollout[-1]) index_dict = get_object_and_robot_state_indices(config) object_indices = index_dict['object_indices'] # reset the environment and use the MPC controller to stabilize this # now setup the MPC to try to stabilize this . . . . reset_environment(env, initial_cond['q_pusher'], initial_cond['q_slider']) episode.clear() # add just some large number of these for i in range(n_history): action_zero = np.zeros(2) obs_tmp = env.get_observation() episode.add_observation_action(obs_tmp, action_zero) # input("press Enter to continue") # make a planner config planner_config = copy.copy(config) config_tmp = load_yaml( os.path.join(get_project_root(), 'experiments/drake_pusher_slider/eval_config.yaml')) planner_config['mpc'] = config_tmp['mpc'] planner_config['mpc']['mppi']['terminal_cost_only'] = TERMINAL_COST_ONLY planner_config['mpc']['random_shooting'][ 'terminal_cost_only'] = TERMINAL_COST_ONLY planner = None if PLANNER_TYPE == "random_shooting": planner = RandomShootingPlanner(planner_config) elif PLANNER_TYPE == "mppi": planner = PlannerMPPI(planner_config) else: raise ValueError("unknown planner type: %s" % (PLANNER_TYPE)) mpc_out = None action_seq_mpc = None state_pred_mpc = None counter = -1 while True: counter += 1 print("\n\n-----Running MPC Optimization: Counter (%d)-------" % (counter)) obs_cur = env.get_observation() episode.add_observation_only(obs_cur) if counter == 0 or REPLAN: print("replanning") ####### Run the MPC ########## # [1, state_dim] n_look_ahead = N - counter if USE_FIXED_MPC_HORIZON: n_look_ahead = MPC_HORIZON elif USE_SHORT_HORIZON_MPC: n_look_ahead = min(MPC_HORIZON, N - counter) if n_look_ahead == 0: break start_idx = counter end_idx = counter + n_look_ahead print("start_idx:", start_idx) print("end_idx:", end_idx) print("n_look_ahead", n_look_ahead) # start_time = time.time() # idx of current observation idx = episode.get_latest_idx() mpc_start_time = time.time() mpc_input_data = mpc_input_builder.get_dynamics_model_input( idx, n_history=n_history) state_cur = mpc_input_data['states'] action_his = mpc_input_data['actions'] if SEED_WITH_NOMINAL_ACTIONS: action_seq_rollout_init = action_seq_gt_torch[ start_idx:end_idx] else: if mpc_out is not None: action_seq_rollout_init = mpc_out['action_seq'][1:] print("action_seq_rollout_init.shape", action_seq_rollout_init.shape) if action_seq_rollout_init.shape[0] < n_look_ahead: num_steps = n_look_ahead - action_seq_rollout_init.shape[ 0] action_seq_zero = torch.zeros([num_steps, 2]) action_seq_rollout_init = torch.cat( (action_seq_rollout_init, action_seq_zero), dim=0) print("action_seq_rollout_init.shape", action_seq_rollout_init.shape) else: action_seq_rollout_init = None # run MPPI z_cur = None with torch.no_grad(): z_cur = model.compute_z_state( state_cur.unsqueeze(0).cuda())['z'].squeeze(0) if action_seq_rollout_init is not None: n_look_ahead = action_seq_rollout_init.shape[0] obs_goal = None print("z_object_rollout_gt.shape", z_object_rollout_gt.shape) if TRAJECTORY_GOAL: obs_goal = z_object_rollout_gt[start_idx:end_idx] print("n_look_ahead", n_look_ahead) print("obs_goal.shape", obs_goal.shape) # add the final goal state on as needed if obs_goal.shape[0] < n_look_ahead: obs_goal_final = z_object_rollout_gt[-1].unsqueeze(0) num_repeat = n_look_ahead - obs_goal.shape[0] obs_goal_final_expand = obs_goal_final.expand( [num_repeat, -1]) obs_goal = torch.cat((obs_goal, obs_goal_final_expand), dim=0) else: obs_goal = z_object_rollout_gt[-1] obs_goal = torch_utils.cast_to_numpy(obs_goal) print("obs_goal.shape", obs_goal.shape) seed = 1 set_seed(seed) mpc_out = planner.trajectory_optimization( state_cur=z_cur, action_his=action_his, obs_goal=obs_goal, model_dy=model, action_seq_rollout_init=action_seq_rollout_init, n_look_ahead=n_look_ahead, eval_indices=object_indices, rollout_best_action_sequence=True, verbose=True, add_grid_action_samples=True, ) print("MPC step took %.4f seconds" % (time.time() - mpc_start_time)) action_seq_mpc = torch_utils.cast_to_numpy(mpc_out['action_seq']) state_pred_mpc = torch_utils.cast_to_numpy(mpc_out['state_pred']) # Rollout with ground truth simulator dynamics env2.set_simulator_state_from_observation_dict( env2.get_mutable_context(), obs_cur) obs_mpc_gt = env_utils.rollout_action_sequence( env2, action_seq_mpc)['observations'] vis['mpc_3D'].delete() vis['mpc_GT_3D'].delete() L = len(obs_mpc_gt) print("L", L) if L == 0: break for i in range(L): # ground truth rollout of the MPC action_seq name = "mpc_GT_3D/%d" % (i) T_W_obj = DrakePusherSliderEnv.object_position_from_observation( obs_mpc_gt[i]) vis[name].set_object(triad(scale=0.1)) vis[name].set_transform(T_W_obj) action_cur = action_seq_mpc[0] print("action_cur", action_cur) print("action_GT", initial_cond['action']) input("press Enter to continue") # add observation actions to the episode obs_cur = env.get_observation() episode.replace_observation_action(obs_cur, action_cur) # step the simulator env.step(action_cur) # update the trajectories, in case we aren't replanning action_seq_mpc = action_seq_mpc[1:] state_pred_mpc = state_pred_mpc[1:] pose_error = compute_pose_error(env_obs_rollout_gt[-1], obs_cur) print("position_error: %.3f" % (pose_error['position_error'])) print("angle error degrees: %.3f" % (pose_error['angle_error_degrees'])) obs_final = env.get_observation() pose_error = compute_pose_error(env_obs_rollout_gt[-1], obs_final) print("position_error: %.3f" % (pose_error['position_error'])) print("angle error degrees: %.3f" % (pose_error['angle_error_degrees']))
def main(): d = load_model_and_data() model_dy = d['model_dy'] dataset = d['dataset'] config = d['config'] multi_episode_dict = d['multi_episode_dict'] planner = d['planner'] planner_config = planner.config idx_dict = get_object_and_robot_state_indices(config) object_indices = idx_dict['object_indices'] robot_indices = idx_dict['robot_indices'] n_his = config['train']['n_history'] # save_dir = os.path.join(get_project_root(), 'sandbox/mpc/', get_current_YYYY_MM_DD_hh_mm_ss_ms()) save_dir = os.path.join(get_project_root(), 'sandbox/mpc/push_right_box_horizontal') print("save_dir", save_dir) if not os.path.exists(save_dir): os.makedirs(save_dir) # rotate # episode_names = dataset.get_episode_names() # print("len(episode_names)", len(episode_names)) # episode_name = episode_names[0] # start_idx = 1 # n_roll = 15 # # straight + rotate # episode_name = "2020-06-29-21-04-16" # print('episode_name', episode_name) # start_idx = 1 # n_roll = 15 # this is a nice straight push . . . # push with box in horizontal position episode_name = "2020-06-29-22-03-45" start_idx = 2 n_roll = 10 # # validation set episodes # episode_names = dataset.get_episode_names() # print("len(episode_names)", len(episode_names)) # episode_name = episode_names[1] # start_idx = 2 # n_roll = 15 camera_name = "d415_01" episode = multi_episode_dict[episode_name] print("episode_name", episode_name) vis = meshcat_utils.make_default_visualizer_object() vis.delete() idx_list = list(range(start_idx, start_idx + n_roll + 1)) idx_list_GT = idx_list goal_idx = idx_list[-1] print("idx_list", idx_list) # visualize ground truth rollout if True: for display_idx, episode_idx in enumerate(idx_list): visualize_episode_data_single_timestep( vis=vis, dataset=dataset, episode=episode, camera_name=camera_name, episode_idx=episode_idx, display_idx=episode_idx, ) data_goal = dataset._getitem(episode, goal_idx, rollout_length=1, n_history=1) states_goal = data_goal['observations_combined'][0] z_states_goal = model_dy.compute_z_state(states_goal)['z'] print("states_goal.shape", states_goal.shape) print("z_states_goal.shape", z_states_goal.shape) ##### VISUALIZE PREDICTED ROLLOUT ########## data = dataset._getitem(episode, start_idx, rollout_length=n_roll) states = data['observations_combined'].unsqueeze(0) z = model_dy.compute_z_state(states)['z'] actions = data['actions'].unsqueeze(0) idx_range_model_dy_input = data['idx_range'] print("data.keys()", data.keys()) print("data['idx_range']", data['idx_range']) # z_init z_init = z[:, :n_his] # actions_init action_start_idx = 0 action_end_idx = n_his + n_roll - 1 action_seq = actions[:, action_start_idx:action_end_idx] print("action_seq GT\n", action_seq) with torch.no_grad(): rollout_data = rollout_model(state_init=z_init.cuda(), action_seq=action_seq.cuda(), dynamics_net=model_dy, compute_debug_data=False) # [B, n_roll, state_dim] # state_rollout_pred = rollout_data['state_pred'] z_rollout_pred = rollout_data['state_pred'].squeeze() print("z_rollout_pred.shape", z_rollout_pred.shape) if True: for idx in range(len(z_rollout_pred)): display_idx = data['idx_range'][idx + n_his] visualize_model_prediction_single_timestep( vis, config, z_pred=z_rollout_pred[idx], display_idx=display_idx) print("z_rollout_pred.shape", z_rollout_pred.shape) # compute loss when rolled out using GT action sequence eval_indices = object_indices obs_goal = z_states_goal[object_indices].cuda() reward_data = planner_utils.evaluate_model_rollout( state_pred=rollout_data['state_pred'], obs_goal=obs_goal, eval_indices=eval_indices, terminal_cost_only=planner_config['mpc']['mppi']['terminal_cost_only'], p=planner_config['mpc']['mppi']['cost_norm']) print("reward_data using action_seq_GT\n", reward_data['reward']) ##### MPC ########## data = dataset._getitem(episode, start_idx, rollout_length=0, n_history=config['train']['n_history']) state_cur = data['observations_combined'].cuda() z_state_cur = model_dy.compute_z_state(state_cur)['z'] action_his = data['actions'][:(n_his - 1)].cuda() print("z_state_cur.shape", state_cur.shape) print("action_his.shape", action_his.shape) # don't seed with nominal actions just yet action_seq_rollout_init = None set_seed(SEED) mpc_out = planner.trajectory_optimization( state_cur=z_state_cur, action_his=action_his, obs_goal=obs_goal, model_dy=model_dy, action_seq_rollout_init=action_seq_rollout_init, n_look_ahead=n_roll, eval_indices=object_indices, rollout_best_action_sequence=True, verbose=True, add_grid_action_samples=True, ) print("\n\n------MPC output-------\n\n") print("action_seq:\n", mpc_out['action_seq']) mpc_state_pred = mpc_out['state_pred'] # current shape is [n_roll + 1, state_dim] but really should be # [n_roll, state_dim] . . . something is up print("mpc_state_pred.shape", mpc_state_pred.shape) print("mpc_out['action_seq'].shape", mpc_out['action_seq'].shape) print("n_roll", n_roll) # visualize for idx in range(n_roll): episode_idx = start_idx + idx + 1 visualize_model_prediction_single_timestep(vis, config, z_pred=mpc_state_pred[idx], display_idx=episode_idx, name_prefix="mpc", color=[255, 0, 0]) ######## MPC w/ dynamics model input builder ############# print("\n\n-----DynamicsModelInputBuilder-----") # dynamics model input builder online_episode = OnlineEpisodeReader(no_copy=True) ref_descriptors = d['spatial_descriptor_data']['spatial_descriptors'] ref_descriptors = torch_utils.cast_to_torch(ref_descriptors).cuda() K_matrix = episode.image_episode.camera_K_matrix(camera_name) T_world_camera = episode.image_episode.camera_pose(camera_name, 0) visual_observation_function = \ VisualObservationFunctionFactory.descriptor_keypoints_3D(config=config, camera_name=camera_name, model_dd=d['model_dd'], ref_descriptors=ref_descriptors, K_matrix=K_matrix, T_world_camera=T_world_camera, ) input_builder = DynamicsModelInputBuilder( observation_function=d['observation_function'], visual_observation_function=visual_observation_function, action_function=d['action_function'], episode=online_episode) compute_control_action_msg = dict() compute_control_action_msg['type'] = "COMPUTE_CONTROL_ACTION" for i in range(n_his): episode_idx = idx_range_model_dy_input[i] print("episode_idx", episode_idx) # add image information to data = add_images_to_episode_data(episode, episode_idx, camera_name) online_episode.add_data(copy.deepcopy(data)) compute_control_action_msg['data'] = data # hack for seeing how much the history matters .. . # online_episode.add_data(copy.deepcopy(data)) # save informatin for running zmq controller save_pickle(compute_control_action_msg, os.path.join(save_dir, 'compute_control_action_msg.p')) goal_idx = idx_list_GT[-1] goal_data = add_images_to_episode_data(episode, goal_idx, camera_name) goal_data['observations']['timestamp_system'] = time.time() plan_msg = { 'type': "PLAN", 'data': [goal_data], 'n_roll': n_roll, 'K_matrix': K_matrix, 'T_world_camera': T_world_camera, } save_pickle(plan_msg, os.path.join(save_dir, "plan_msg.p")) print("len(online_episode)", len(online_episode)) # use this to construct input # verify it's the same as what we got from using the dataset directly idx = online_episode.get_latest_idx() mpc_input_data = input_builder.get_dynamics_model_input(idx, n_history=n_his) # print("mpc_input_data\n", mpc_input_data) state_cur_ib = mpc_input_data['states'].cuda() action_his_ib = mpc_input_data['actions'].cuda() z_state_cur_ib = model_dy.compute_z_state(state_cur_ib)['z'] set_seed(SEED) mpc_out = planner.trajectory_optimization( state_cur=z_state_cur_ib, action_his=action_his_ib, obs_goal=obs_goal, model_dy=model_dy, action_seq_rollout_init=None, n_look_ahead=n_roll, eval_indices=object_indices, rollout_best_action_sequence=True, verbose=True, add_grid_action_samples=True, ) # visualize for idx in range(n_roll): episode_idx = start_idx + idx + 1 visualize_model_prediction_single_timestep( vis, config, z_pred=mpc_out['state_pred'][idx], display_idx=episode_idx, name_prefix="mpc_input_builder", color=[255, 255, 0])