示例#1
0
def rollout_action_sequence(
        env,  # gym env
        action_seq,  # [N, action_dim]
):
    """
    Rollout the action sequence using the simulator
    Record the observations
    """

    episode_reader = OnlineEpisodeReader()

    obs_list = []
    N = action_seq.shape[0]
    for i in range(N):
        action = action_seq[i]
        obs, reward, done, info = env.step(action)
        episode_reader.add_observation_action(obs, action)
        obs_list.append(obs)

    return {
        'observations': obs_list,
        'episode_reader': episode_reader,
    }
示例#2
0
def main():
    # load dynamics model
    model_dict = load_model_state_dict()
    model = model_dict['model_dy']
    model_dd = model_dict['model_dd']
    config = model.config

    env_config = load_yaml(os.path.join(get_project_root(), 'experiments/exp_20_mugs/config.yaml'))
    env_config['env']['observation']['depth_int16'] = True
    n_history = config['train']['n_history']

    initial_cond = generate_initial_condition(env_config, push_length=PUSH_LENGTH)
    env_config = initial_cond['config']

    # enable the right observations

    camera_name = model_dict['metadata']['camera_name']
    spatial_descriptor_data = model_dict['spatial_descriptor_data']
    ref_descriptors = spatial_descriptor_data['spatial_descriptors']
    K = ref_descriptors.shape[0]

    ref_descriptors = torch.Tensor(ref_descriptors).cuda()  # put them on the GPU

    print("ref_descriptors\n", ref_descriptors)
    print("ref_descriptors.shape", ref_descriptors.shape)

    # create the environment
    # create the environment
    env = DrakeMugsEnv(env_config)
    env.reset()

    T_world_camera = env.camera_pose(camera_name)
    camera_K_matrix = env.camera_K_matrix(camera_name)

    # create another environment for doing rollouts
    env2 = DrakeMugsEnv(env_config, visualize=False)
    env2.reset()

    action_function = ActionFunctionFactory.function_from_config(config)
    observation_function = ObservationFunctionFactory.drake_pusher_position_3D(config)
    visual_observation_function = \
        VisualObservationFunctionFactory.descriptor_keypoints_3D(config=config,
                                                                 camera_name=camera_name,
                                                                 model_dd=model_dd,
                                                                 ref_descriptors=ref_descriptors,
                                                                 K_matrix=camera_K_matrix,
                                                                 T_world_camera=T_world_camera,
                                                                 )

    episode = OnlineEpisodeReader()
    mpc_input_builder = DynamicsModelInputBuilder(observation_function=observation_function,
                                                  visual_observation_function=visual_observation_function,
                                                  action_function=action_function,
                                                  episode=episode)

    vis = meshcat_utils.make_default_visualizer_object()
    vis.delete()

    reset_environment(env, initial_cond['q_pusher'], initial_cond['q_slider'])
    obs_init = env.get_observation()

    #### ROLLOUT USING LEARNED MODEL + GROUND TRUTH ACTIONS ############
    reset_environment(env, initial_cond['q_pusher'], initial_cond['q_slider'])
    # add just some large number of these
    episode.clear()
    for i in range(n_history):
        action_zero = np.zeros(2)
        obs_tmp = env.get_observation()
        episode.add_observation_action(obs_tmp, action_zero)

    def goal_func(obs_tmp):
        state_tmp = mpc_input_builder.get_state_input_single_timestep({'observation': obs_tmp})['state']
        return model.compute_z_state(state_tmp.unsqueeze(0))['z_object'].flatten()


    #
    idx = episode.get_latest_idx()
    obs_raw = episode.get_observation(idx)
    z_object_goal = goal_func(obs_raw)
    z_keypoints_init_W = keypoints_3D_from_dynamics_model_output(z_object_goal, K)
    z_keypoints_init_W = torch_utils.cast_to_numpy(z_keypoints_init_W)

    z_keypoints_obj = keypoints_world_frame_to_object_frame(z_keypoints_init_W,
                                                          T_W_obj=slider_pose_from_observation(obs_init))

    color = [1, 0, 0]
    meshcat_utils.visualize_points(vis=vis,
                                   name="keypoints_W",
                                   pts=z_keypoints_init_W,
                                   color=color,
                                   size=0.02,
                                   )

    # input("press Enter to continue")

    # rollout single action sequence using the simulator
    action_sequence_np = torch_utils.cast_to_numpy(initial_cond['action_sequence'])
    N = action_sequence_np.shape[0]
    obs_rollout_gt = env_utils.rollout_action_sequence(env, action_sequence_np)[
        'observations']

    # using the vision model to get "goal" keypoints
    z_object_goal = goal_func(obs_rollout_gt[-1])
    z_object_goal_np = torch_utils.cast_to_numpy(z_object_goal)
    z_keypoints_goal = keypoints_3D_from_dynamics_model_output(z_object_goal, K)
    z_keypoints_goal = torch_utils.cast_to_numpy(z_keypoints_goal)

    # visualize goal keypoints
    color = [0, 1, 0]
    meshcat_utils.visualize_points(vis=vis,
                                   name="goal_keypoints",
                                   pts=z_keypoints_goal,
                                   color=color,
                                   size=0.02,
                                   )

    # input("press Enter to continue")

    #### ROLLOUT USING LEARNED MODEL + GROUND TRUTH ACTIONS ############
    reset_environment(env, initial_cond['q_pusher'], initial_cond['q_slider'])
    # add just some large number of these
    episode.clear()
    for i in range(n_history):
        action_zero = np.zeros(2)
        obs_tmp = env.get_observation()
        episode.add_observation_action(obs_tmp, action_zero)

    # [n_history, state_dim]
    idx = episode.get_latest_idx()

    dyna_net_input = mpc_input_builder.get_dynamics_model_input(idx, n_history=n_history)
    state_init = dyna_net_input['states'].cuda() # [n_history, state_dim]
    action_init = dyna_net_input['actions'] # [n_history, action_dim]


    print("state_init.shape", state_init.shape)
    print("action_init.shape", action_init.shape)


    action_seq_gt_torch = torch_utils.cast_to_torch(initial_cond['action_sequence'])
    action_input = torch.cat((action_init[:(n_history-1)], action_seq_gt_torch), dim=0).cuda()
    print("action_input.shape", action_input.shape)


    # rollout using the ground truth actions and learned model
    # need to add the batch dim to do that
    z_init = model.compute_z_state(state_init)['z']
    rollout_pred = rollout_model(state_init=z_init.unsqueeze(0),
                                 action_seq=action_input.unsqueeze(0),
                                 dynamics_net=model,
                                 compute_debug_data=True)

    state_pred_rollout = rollout_pred['state_pred']

    print("state_pred_rollout.shape", state_pred_rollout.shape)

    for i in range(N):
        # vis GT for now
        name = "GT_3D/%d" % (i)
        T_W_obj = slider_pose_from_observation(obs_rollout_gt[i])
        # print("T_W_obj", T_W_obj)

        # green
        color = np.array([0, 1, 0]) * get_color_intensity(i, N)
        meshcat_utils.visualize_points(vis=vis,
                                       name=name,
                                       pts=z_keypoints_obj,
                                       color=color,
                                       size=0.01,
                                       T=T_W_obj)

        # red
        color = np.array([0, 0, 1]) * get_color_intensity(i, N)
        state_pred = state_pred_rollout[:, i, :]
        pts_pred = keypoints_3D_from_dynamics_model_output(state_pred, K).squeeze()
        pts_pred = pts_pred.detach().cpu().numpy()
        name = "pred_3D/%d" % (i)
        meshcat_utils.visualize_points(vis=vis,
                                       name=name,
                                       pts=pts_pred,
                                       color=color,
                                       size=0.01,
                                       )

    # input("finished visualizing GT rollout\npress Enter to continue")
    index_dict = get_object_and_robot_state_indices(config)
    object_indices = index_dict['object_indices']

    # reset the environment and use the MPC controller to stabilize this
    # now setup the MPC to try to stabilize this . . . .
    reset_environment(env, initial_cond['q_pusher'], initial_cond['q_slider'])
    episode.clear()

    # add just some large number of these
    for i in range(n_history):
        action_zero = np.zeros(2)
        obs_tmp = env.get_observation()
        episode.add_observation_action(obs_tmp, action_zero)

    # input("press Enter to continue")

    # make a planner config
    planner_config = copy.copy(config)
    config_tmp = load_yaml(os.path.join(get_project_root(), 'experiments/drake_pusher_slider/eval_config.yaml'))
    planner_config['mpc'] = config_tmp['mpc']
    planner = None
    if PLANNER_TYPE == "random_shooting":
        planner = RandomShootingPlanner(planner_config)
    elif PLANNER_TYPE == "mppi":
        planner = PlannerMPPI(planner_config)
    else:
        raise ValueError("unknown planner type: %s" % (PLANNER_TYPE))

    mpc_out = None
    action_seq_mpc = None
    state_pred_mpc = None
    counter = -1
    while True:
        counter += 1
        print("\n\n-----Running MPC Optimization: Counter (%d)-------" % (counter))

        obs_cur = env.get_observation()
        episode.add_observation_only(obs_cur)

        if counter == 0 or REPLAN:
            print("replanning")
            ####### Run the MPC ##########

            # [1, state_dim]

            n_look_ahead = N - counter
            if USE_FIXED_MPC_HORIZON:
                n_look_ahead = MPC_HORIZON
            if n_look_ahead == 0:
                break

            # start_time = time.time()
            # idx of current observation
            idx = episode.get_latest_idx()
            mpc_start_time = time.time()
            mpc_input_data = mpc_input_builder.get_dynamics_model_input(idx, n_history=n_history)
            state_cur = mpc_input_data['states']
            action_his = mpc_input_data['actions']

            if mpc_out is not None:
                action_seq_rollout_init = mpc_out['action_seq'][1:]
            else:
                action_seq_rollout_init = None

            # run MPPI
            z_cur = None
            with torch.no_grad():
                z_cur = model.compute_z_state(state_cur.unsqueeze(0).cuda())['z'].squeeze(0)



            mpc_out = planner.trajectory_optimization(state_cur=z_cur,
                                                      action_his=action_his,
                                                      obs_goal=z_object_goal_np,
                                                      model_dy=model,
                                                      action_seq_rollout_init=action_seq_rollout_init,
                                                      n_look_ahead=n_look_ahead,
                                                      eval_indices=object_indices,
                                                      rollout_best_action_sequence=True,
                                                      verbose=True,
                                                      )

            print("MPC step took %.4f seconds" %(time.time() - mpc_start_time))
            action_seq_mpc = mpc_out['action_seq'].cpu().numpy()


        # Rollout with ground truth simulator dynamics
        action_seq_mpc = torch_utils.cast_to_numpy(mpc_out['action_seq'])
        env2.set_simulator_state_from_observation_dict(env2.get_mutable_context(), obs_cur)
        obs_mpc_gt = env_utils.rollout_action_sequence(env2, action_seq_mpc)['observations']
        state_pred_mpc = torch_utils.cast_to_numpy(mpc_out['state_pred'])

        vis['mpc_3D'].delete()
        vis['mpc_GT_3D'].delete()

        L = len(obs_mpc_gt)
        print("L", L)
        if L == 0:
            break
        for i in range(L):
            # red
            color = np.array([1, 0, 0]) * get_color_intensity(i, L)
            state_pred = state_pred_mpc[i, :]
            state_pred = np.expand_dims(state_pred, 0)  # may need to expand dims here
            pts_pred = keypoints_3D_from_dynamics_model_output(state_pred, K).squeeze()
            name = "mpc_3D/%d" % (i)
            meshcat_utils.visualize_points(vis=vis,
                                           name=name,
                                           pts=pts_pred,
                                           color=color,
                                           size=0.01,
                                           )

            # ground truth rollout of the MPC action_seq
            name = "mpc_GT_3D/%d" % (i)
            T_W_obj = slider_pose_from_observation(obs_mpc_gt[i])

            # green
            color = np.array([1, 1, 0]) * get_color_intensity(i, L)
            meshcat_utils.visualize_points(vis=vis,
                                           name=name,
                                           pts=z_keypoints_obj,
                                           color=color,
                                           size=0.01,
                                           T=T_W_obj)

        action_cur = action_seq_mpc[0]

        print("action_cur", action_cur)
        # print("action_GT", initial_cond['action'])
        input("press Enter to continue")

        # add observation actions to the episode
        obs_cur = env.get_observation()
        episode.replace_observation_action(obs_cur, action_cur)

        # step the simulator
        env.step(action_cur)

        # visualize current keypoint positions
        obs_cur = env.get_observation()
        T_W_obj = slider_pose_from_observation(obs_cur)

        # yellow
        color = np.array([1, 1, 0])
        meshcat_utils.visualize_points(vis=vis,
                                       name="keypoint_cur",
                                       pts=z_keypoints_obj,
                                       color=color,
                                       size=0.02,
                                       T=T_W_obj)

        action_seq_mpc = action_seq_mpc[1:]
        state_pred_mpc = state_pred_mpc[1:]

    obs_final = env.get_observation()

    pose_error = compute_pose_error(obs_rollout_gt[-1],
                                    obs_final)

    print("position_error: %.3f"  %(pose_error['position_error']))
    print("angle error degrees: %.3f" %(pose_error['angle_error_degrees']))
def evaluate_mpc_z_state(
    model_dir,
    config_planner_mpc=None,
    save_dir=None,
    planner_type=None,
    env_config=None,
    strict=True,
    generate_initial_condition_func=None,
    env_type="DrakePusherSliderEnv",
):
    assert save_dir is not None
    assert planner_type is not None
    assert env_config is not None
    assert generate_initial_condition_func is not None

    model_dict = load_model(model_dir, strict=strict)
    model_dy = model_dict['model_dy']['model_dy']
    config = model_dict['model_dy']['config']
    model_config = config

    model_kp = model_dict['model_kp']['model']
    config_kp = model_kp.config
    camera_name = config_kp['perception']['camera_name']

    # create the environment
    # create the environment
    env = None
    if env_type == "DrakePusherSliderEnv":
        env = DrakePusherSliderEnv(env_config, visualize=False)
    elif env_type == "DrakeMugsEnv":
        env = DrakeMugsEnv(env_config, visualize=False)
    else:
        raise ValueError("unknown env type: %s" % (env_type))
    env.reset()

    T_world_camera = env.camera_pose(camera_name)
    camera_K_matrix = env.camera_K_matrix(camera_name)
    mask_labels = env.get_labels_to_mask_list()

    action_function = ActionFunctionFactory.function_from_config(config)
    observation_function = ObservationFunctionFactory.drake_pusher_position_3D(
        config)
    visual_observation_function = \
        VisualObservationFunctionFactory.function_from_config(config,
                                                              camera_name=camera_name,
                                                              model_kp=model_kp,
                                                              K_matrix=camera_K_matrix,
                                                              T_world_camera=T_world_camera,
                                                              mask_labels=mask_labels)

    episode = OnlineEpisodeReader()

    mpc_input_builder = DynamicsModelInputBuilder(
        observation_function=observation_function,
        visual_observation_function=visual_observation_function,
        action_function=action_function,
        episode=episode)

    def goal_func(obs_tmp):
        state_tmp = mpc_input_builder.get_state_input_single_timestep(
            {'observation': obs_tmp})['state']
        # z_dict= model_dy.compute_z_state(state_tmp.unsqueeze(0))
        # print("z_dict['z_object'].shape", z_dict['z_object'].shape)
        return model_dy.compute_z_state(
            state_tmp.unsqueeze(0))['z_object_flat']

    index_dict = get_object_and_robot_state_indices(model_config)
    object_indices = index_dict['object_indices']

    # make a planner config, same as model config but with mpc and eval sections
    # replaced
    planner_config = copy.copy(model_config)
    if config_planner_mpc is not None:
        planner_config['mpc'] = config_planner_mpc['mpc']
        planner_config['eval'] = config_planner_mpc['eval']

    planner = None
    if planner_type == "random_shooting":
        planner = RandomShootingPlanner(planner_config)
    elif planner_type == "mppi":
        planner = PlannerMPPI(planner_config)
    else:
        raise ValueError("unknown planner type: %s" % (planner_type))

    # run a single iteration
    mpc_eval_drake_pusher_slider.evaluate_mpc(
        model_dy=model_dy,
        env=env,
        episode=episode,
        mpc_input_builder=mpc_input_builder,
        planner=planner,
        eval_indices=object_indices,
        goal_func=goal_func,
        config=planner_config,
        wait_for_user_input=False,
        save_dir=save_dir,
        model_name="test",
        experiment_name="test",
        generate_initial_condition_func=generate_initial_condition_func)

    return {'save_dir': save_dir}
示例#4
0
    def _on_plan_msg(self, msg):
        """
        Received a plan

        - apply the 'visual_observation_function' to each image in the plan
        - store it in the same data struct
        - (maybe) make a 'Plan' object? this is probably wise
        - setup a new controller? maybe just re-use the old one?


        maybe wrap in try/finally block so we don't leave the python side hanging?
        :param msg:
        :type msg:
        :return:
        :rtype:
        """

        assert self._state == ControllerState.STOPPED

        print("\n\n----- RECEIVED PLAN ------")

        plan_data = msg['data']
        msg_data = msg['data']

        print("len(plan_data)", len(plan_data))

        self._state_dict = dict()
        sd = self._state_dict

        episode = OnlineEpisodeReader(no_copy=True)
        sd['episode'] = episode

        input_builder = DynamicsModelInputBuilder(
            observation_function=self._observation_function,
            visual_observation_function=self._visual_observation_function,
            action_function=self._action_function,
            episode=episode)

        plan_container = PlanContainer(
            data=msg_data,
            dynamics_model_input_builder=input_builder,
            model_dy=self._model_dy,
            debug=False)

        self._state_dict = {
            'episode': episode,
            'input_builder': input_builder,
            'plan_msg': msg,
            'plan': plan_container,
            'action_counter': 0,
            'timestamp_system': [],  # list of when we took the actions
            'mpc_out': None,
        }

        self._state = ControllerState.PLAN_READY
        resp = {
            'type': 'PLAN',
            'data': 'READY',
        }

        print("----FINISHED PROCESSING PLAN-----")
        return resp
示例#5
0
def evaluate_mpc(model_dir,
                 config_planner_mpc=None,
                 save_dir=None,
                 planner_type=None,
                 env_config=None,
                 strict=True,
                 generate_initial_condition_func=None,
                 ):

    assert save_dir is not None
    assert planner_type is not None
    assert env_config is not None
    assert generate_initial_condition_func is not None

    model_data = model_builder.load_dynamics_model_from_folder(model_dir, strict=strict)
    model_dy = model_data['model_dy']
    model_dy = model_dy.eval()
    model_dy = model_dy.cuda()
    model_config = model_dy.config




    # create the environment
    env = DrakePusherSliderEnv(env_config, visualize=False)
    env.reset()

    observation_function = ObservationFunctionFactory.function_from_config(model_config)
    action_function = ActionFunctionFactory.function_from_config(model_config)
    episode = OnlineEpisodeReader()

    mpc_input_builder = DynamicsModelInputBuilder(observation_function=observation_function,
                                                  action_function=action_function,
                                                  visual_observation_function=None,
                                                  episode=episode)

    pts_GT = np.array(model_config['dataset']['observation_function']['GT_3D_object_points'])
    K = pts_GT.shape[0]  # num keypoints
    eval_indices = np.arange(3 * K)  # extract the keypoints

    def goal_func(obs_local):
        """
        Helper function for getting the goal state from an observation
        """
        return observation_function(obs_local)[eval_indices]


    # make a planner config, same as model config but with mpc and eval sections
    # replaced
    planner_config = copy.copy(model_config)
    if config_planner_mpc is not None:
        planner_config['mpc'] = config_planner_mpc['mpc']
        planner_config['eval'] = config_planner_mpc['eval']

    planner = None
    if planner_type == "random_shooting":
        planner = RandomShootingPlanner(planner_config)
    elif planner_type == "mppi":
        planner = PlannerMPPI(planner_config)
    else:
        raise ValueError("unknown planner type: %s" %(planner_type))

    # run a single iteration
    mpc_eval_drake_pusher_slider.evaluate_mpc(model_dy=model_dy,
                                              env=env,
                                              episode=episode,
                                              mpc_input_builder=mpc_input_builder,
                                              planner=planner,
                                              eval_indices=eval_indices,
                                              goal_func=goal_func,
                                              config=planner_config,
                                              wait_for_user_input=False,
                                              save_dir=save_dir,
                                              model_name="test",
                                              experiment_name="test",
                                              generate_initial_condition_func=generate_initial_condition_func,
                                              )

    return {'save_dir': save_dir}
示例#6
0
def evaluate_mpc(
    model_dir,
    config_planner_mpc=None,
    save_dir=None,
    planner_type=None,
    env_config=None,
    strict=True,
    generate_initial_condition_func=None,
):

    assert save_dir is not None
    assert planner_type is not None
    assert env_config is not None
    assert generate_initial_condition_func is not None

    model_data = load_model(model_dir, strict=strict)
    model_dy = model_data['model_dy']
    model_config = model_dy.config
    config = model_config

    model_dd = model_data['model_dd']

    # create the environment
    env = DrakePusherSliderEnv(env_config, visualize=False)
    env.reset()

    camera_name = model_data['metadata']['camera_name']

    # sanity check
    camera_name_in_training = model_config['dataset'][
        'visual_observation_function']['camera_name']
    assert camera_name == camera_name_in_training, "camera_names don't match: camera_name = %s, camera_name_in_trainig = %s" % (
        camera_name, camera_name_in_training)

    T_world_camera = env.camera_pose(camera_name)
    camera_K_matrix = env.camera_K_matrix(camera_name)
    spatial_descriptor_data = model_data['spatial_descriptor_data']
    ref_descriptors = torch.Tensor(
        spatial_descriptor_data['spatial_descriptors']).cuda()
    K = ref_descriptors.shape[0]

    action_function = ActionFunctionFactory.function_from_config(config)
    observation_function = ObservationFunctionFactory.drake_pusher_position_3D(
        config)
    visual_observation_function = \
        VisualObservationFunctionFactory.function_from_config(config,
                                                              camera_name=camera_name,
                                                              model_dd=model_dd,
                                                              ref_descriptors=ref_descriptors,
                                                              K_matrix=camera_K_matrix,
                                                              T_world_camera=T_world_camera,
                                                              )

    episode = OnlineEpisodeReader()

    mpc_input_builder = DynamicsModelInputBuilder(
        observation_function=observation_function,
        visual_observation_function=visual_observation_function,
        action_function=action_function,
        episode=episode)

    def goal_func(obs_local):
        keypoints_dict = visual_observation_function(obs_local)
        return keypoints_dict['tensor']

    eval_indices = np.arange(3 * K)  # extract the keypoints

    # make a planner config, same as model config but with mpc and eval sections
    # replaced
    planner_config = copy.copy(model_config)
    if config_planner_mpc is not None:
        planner_config['mpc'] = config_planner_mpc['mpc']
        planner_config['eval'] = config_planner_mpc['eval']

    planner = None
    if planner_type == "random_shooting":
        planner = RandomShootingPlanner(planner_config)
    elif planner_type == "mppi":
        planner = PlannerMPPI(planner_config)
    else:
        raise ValueError("unknown planner type: %s" % (planner_type))

    # run a single iteration
    mpc_eval_drake_pusher_slider.evaluate_mpc(
        model_dy=model_dy,
        env=env,
        episode=episode,
        mpc_input_builder=mpc_input_builder,
        planner=planner,
        eval_indices=eval_indices,
        goal_func=goal_func,
        config=planner_config,
        wait_for_user_input=False,
        save_dir=save_dir,
        model_name="test",
        experiment_name="test",
        generate_initial_condition_func=generate_initial_condition_func)

    return {'save_dir': save_dir}
示例#7
0
def main():
    # load dynamics model
    model_dict = load_autoencoder_model()
    model = model_dict['model_dy']
    model_ae = model_dict['model_ae']
    visual_observation_function = model_dict['visual_observation_function']

    config = model.config

    env_config = load_yaml(
        os.path.join(get_project_root(),
                     'experiments/exp_18_box_on_side/config.yaml'))
    env_config['env']['observation']['depth_int16'] = True
    n_history = config['train']['n_history']

    # create the environment
    # create the environment
    env = DrakePusherSliderEnv(env_config)
    env.reset()

    # create another environment for doing rollouts
    env2 = DrakePusherSliderEnv(env_config, visualize=False)
    env2.reset()

    action_function = ActionFunctionFactory.function_from_config(config)
    observation_function = ObservationFunctionFactory.drake_pusher_position_3D(
        config)

    episode = OnlineEpisodeReader()
    mpc_input_builder = DynamicsModelInputBuilder(
        observation_function=observation_function,
        visual_observation_function=visual_observation_function,
        action_function=action_function,
        episode=episode)

    vis = meshcat_utils.make_default_visualizer_object()
    vis.delete()

    initial_cond = get_initial_state()
    reset_environment(env, initial_cond['q_pusher'], initial_cond['q_slider'])
    obs_init = env.get_observation()

    # visualize starting position of the object
    print("obs_init.keys()", obs_init.keys())
    print("obs_init['slider']['position']", obs_init['slider']['position'])
    T = DrakePusherSliderEnv.object_position_from_observation(obs_init)
    vis['start_pose'].set_object(triad(scale=0.1))
    vis['state_pose'].set_transform(T)

    #### ROLLOUT USING LEARNED MODEL + GROUND TRUTH ACTIONS ############
    reset_environment(env, initial_cond['q_pusher'], initial_cond['q_slider'])
    # add just some large number of these
    episode.clear()
    for i in range(n_history):
        action_zero = np.zeros(2)
        obs_tmp = env.get_observation()
        episode.add_observation_action(obs_tmp, action_zero)

    #### ROLLOUT THE ACTION SEQUENCE USING THE SIMULATOR ##########
    # rollout single action sequence using the simulator
    gt_rollout_data = env_utils.rollout_action_sequence(
        env, initial_cond['action_sequence'].cpu().numpy())
    env_obs_rollout_gt = gt_rollout_data['observations']
    gt_rollout_episode = gt_rollout_data['episode_reader']

    for i, env_obs in enumerate(gt_rollout_data['observations']):
        T = DrakePusherSliderEnv.object_position_from_observation(env_obs)
        vis_name = "GT_trajectory/%d" % (i)
        vis[vis_name].set_object(triad(scale=0.1))
        vis[vis_name].set_transform(T)

    action_state_gt = mpc_input_builder.get_action_state_tensors(
        start_idx=0, num_timesteps=N, episode=gt_rollout_episode)

    state_rollout_gt = action_state_gt['states']
    action_rollout_gt = action_state_gt['actions']
    z_object_rollout_gt = model.compute_z_state(
        state_rollout_gt)['z_object_flat']
    print('state_rollout_gt.shape', state_rollout_gt.shape)
    print("z_object_rollout_gt.shape", z_object_rollout_gt.shape)

    def goal_func(obs_tmp):
        state_tmp = mpc_input_builder.get_state_input_single_timestep(
            {'observation': obs_tmp})['state']
        return model.compute_z_state(
            state_tmp.unsqueeze(0))['z_object'].flatten()

    # using the vision model to get "goal" keypoints
    z_object_goal = goal_func(env_obs_rollout_gt[-1])
    z_object_goal_np = torch_utils.cast_to_numpy(z_object_goal)

    # input("press Enter to continue")

    #### ROLLOUT USING LEARNED MODEL + GROUND TRUTH ACTIONS ############
    reset_environment(env, initial_cond['q_pusher'], initial_cond['q_slider'])
    # add just some large number of these
    episode.clear()
    for i in range(n_history):
        action_zero = np.zeros(2)
        obs_tmp = env.get_observation()
        episode.add_observation_action(obs_tmp, action_zero)

    # [n_history, state_dim]
    idx = episode.get_latest_idx()

    dyna_net_input = mpc_input_builder.get_dynamics_model_input(
        idx, n_history=n_history)
    state_init = dyna_net_input['states'].cuda()  # [n_history, state_dim]
    action_init = dyna_net_input['actions']  # [n_history, action_dim]

    print("state_init.shape", state_init.shape)
    print("action_init.shape", action_init.shape)
    print("n_history", n_history)

    action_seq_gt_torch = initial_cond['action_sequence']
    action_input = torch.cat(
        (action_init[:(n_history - 1)], action_seq_gt_torch), dim=0).cuda()
    print("action_input.shape", action_input.shape)

    # rollout using the ground truth actions and learned model
    # need to add the batch dim to do that
    z_init = model.compute_z_state(state_init)['z']
    rollout_pred = rollout_model(state_init=z_init.unsqueeze(0),
                                 action_seq=action_input.unsqueeze(0),
                                 dynamics_net=model,
                                 compute_debug_data=True)

    state_pred_rollout = rollout_pred['state_pred'].squeeze(0)

    print("state_pred_rollout.shape", state_pred_rollout.shape)
    # input("press Enter to continue")

    # check L2 distance between predicted and actual
    # basically comparing state_pred_rollout and state_rollout_gt
    print("state_rollout_gt[-1]\n", state_rollout_gt[-1])
    print("state_pred_rollout[-1]\n", state_pred_rollout[-1])

    index_dict = get_object_and_robot_state_indices(config)
    object_indices = index_dict['object_indices']

    # reset the environment and use the MPC controller to stabilize this
    # now setup the MPC to try to stabilize this . . . .
    reset_environment(env, initial_cond['q_pusher'], initial_cond['q_slider'])
    episode.clear()

    # add just some large number of these
    for i in range(n_history):
        action_zero = np.zeros(2)
        obs_tmp = env.get_observation()
        episode.add_observation_action(obs_tmp, action_zero)

    # input("press Enter to continue")

    # make a planner config
    planner_config = copy.copy(config)
    config_tmp = load_yaml(
        os.path.join(get_project_root(),
                     'experiments/drake_pusher_slider/eval_config.yaml'))
    planner_config['mpc'] = config_tmp['mpc']

    planner_config['mpc']['mppi']['terminal_cost_only'] = TERMINAL_COST_ONLY
    planner_config['mpc']['random_shooting'][
        'terminal_cost_only'] = TERMINAL_COST_ONLY

    planner = None
    if PLANNER_TYPE == "random_shooting":
        planner = RandomShootingPlanner(planner_config)
    elif PLANNER_TYPE == "mppi":
        planner = PlannerMPPI(planner_config)
    else:
        raise ValueError("unknown planner type: %s" % (PLANNER_TYPE))

    mpc_out = None
    action_seq_mpc = None
    state_pred_mpc = None
    counter = -1
    while True:
        counter += 1
        print("\n\n-----Running MPC Optimization: Counter (%d)-------" %
              (counter))

        obs_cur = env.get_observation()
        episode.add_observation_only(obs_cur)

        if counter == 0 or REPLAN:
            print("replanning")
            ####### Run the MPC ##########

            # [1, state_dim]

            n_look_ahead = N - counter
            if USE_FIXED_MPC_HORIZON:
                n_look_ahead = MPC_HORIZON
            elif USE_SHORT_HORIZON_MPC:
                n_look_ahead = min(MPC_HORIZON, N - counter)
            if n_look_ahead == 0:
                break

            start_idx = counter
            end_idx = counter + n_look_ahead

            print("start_idx:", start_idx)
            print("end_idx:", end_idx)
            print("n_look_ahead", n_look_ahead)

            # start_time = time.time()
            # idx of current observation
            idx = episode.get_latest_idx()
            mpc_start_time = time.time()
            mpc_input_data = mpc_input_builder.get_dynamics_model_input(
                idx, n_history=n_history)
            state_cur = mpc_input_data['states']
            action_his = mpc_input_data['actions']

            if SEED_WITH_NOMINAL_ACTIONS:
                action_seq_rollout_init = action_seq_gt_torch[
                    start_idx:end_idx]
            else:
                if mpc_out is not None:
                    action_seq_rollout_init = mpc_out['action_seq'][1:]
                    print("action_seq_rollout_init.shape",
                          action_seq_rollout_init.shape)

                    if action_seq_rollout_init.shape[0] < n_look_ahead:
                        num_steps = n_look_ahead - action_seq_rollout_init.shape[
                            0]
                        action_seq_zero = torch.zeros([num_steps, 2])

                        action_seq_rollout_init = torch.cat(
                            (action_seq_rollout_init, action_seq_zero), dim=0)
                        print("action_seq_rollout_init.shape",
                              action_seq_rollout_init.shape)
                else:
                    action_seq_rollout_init = None

            # run MPPI
            z_cur = None
            with torch.no_grad():
                z_cur = model.compute_z_state(
                    state_cur.unsqueeze(0).cuda())['z'].squeeze(0)

            if action_seq_rollout_init is not None:
                n_look_ahead = action_seq_rollout_init.shape[0]

            obs_goal = None
            print("z_object_rollout_gt.shape", z_object_rollout_gt.shape)
            if TRAJECTORY_GOAL:
                obs_goal = z_object_rollout_gt[start_idx:end_idx]

                print("n_look_ahead", n_look_ahead)
                print("obs_goal.shape", obs_goal.shape)

                # add the final goal state on as needed
                if obs_goal.shape[0] < n_look_ahead:
                    obs_goal_final = z_object_rollout_gt[-1].unsqueeze(0)
                    num_repeat = n_look_ahead - obs_goal.shape[0]
                    obs_goal_final_expand = obs_goal_final.expand(
                        [num_repeat, -1])
                    obs_goal = torch.cat((obs_goal, obs_goal_final_expand),
                                         dim=0)
            else:
                obs_goal = z_object_rollout_gt[-1]

            obs_goal = torch_utils.cast_to_numpy(obs_goal)
            print("obs_goal.shape", obs_goal.shape)

            seed = 1

            set_seed(seed)
            mpc_out = planner.trajectory_optimization(
                state_cur=z_cur,
                action_his=action_his,
                obs_goal=obs_goal,
                model_dy=model,
                action_seq_rollout_init=action_seq_rollout_init,
                n_look_ahead=n_look_ahead,
                eval_indices=object_indices,
                rollout_best_action_sequence=True,
                verbose=True,
                add_grid_action_samples=True,
            )

            print("MPC step took %.4f seconds" %
                  (time.time() - mpc_start_time))
            action_seq_mpc = torch_utils.cast_to_numpy(mpc_out['action_seq'])
            state_pred_mpc = torch_utils.cast_to_numpy(mpc_out['state_pred'])

        # Rollout with ground truth simulator dynamics
        env2.set_simulator_state_from_observation_dict(
            env2.get_mutable_context(), obs_cur)
        obs_mpc_gt = env_utils.rollout_action_sequence(
            env2, action_seq_mpc)['observations']

        vis['mpc_3D'].delete()
        vis['mpc_GT_3D'].delete()

        L = len(obs_mpc_gt)
        print("L", L)
        if L == 0:
            break
        for i in range(L):

            # ground truth rollout of the MPC action_seq
            name = "mpc_GT_3D/%d" % (i)
            T_W_obj = DrakePusherSliderEnv.object_position_from_observation(
                obs_mpc_gt[i])
            vis[name].set_object(triad(scale=0.1))
            vis[name].set_transform(T_W_obj)

        action_cur = action_seq_mpc[0]

        print("action_cur", action_cur)
        print("action_GT", initial_cond['action'])
        input("press Enter to continue")

        # add observation actions to the episode
        obs_cur = env.get_observation()
        episode.replace_observation_action(obs_cur, action_cur)

        # step the simulator
        env.step(action_cur)

        # update the trajectories, in case we aren't replanning
        action_seq_mpc = action_seq_mpc[1:]
        state_pred_mpc = state_pred_mpc[1:]

        pose_error = compute_pose_error(env_obs_rollout_gt[-1], obs_cur)

        print("position_error: %.3f" % (pose_error['position_error']))
        print("angle error degrees: %.3f" %
              (pose_error['angle_error_degrees']))

    obs_final = env.get_observation()

    pose_error = compute_pose_error(env_obs_rollout_gt[-1], obs_final)

    print("position_error: %.3f" % (pose_error['position_error']))
    print("angle error degrees: %.3f" % (pose_error['angle_error_degrees']))
示例#8
0
def main():
    d = load_model_and_data()
    model_dy = d['model_dy']
    dataset = d['dataset']
    config = d['config']
    multi_episode_dict = d['multi_episode_dict']
    planner = d['planner']
    planner_config = planner.config

    idx_dict = get_object_and_robot_state_indices(config)
    object_indices = idx_dict['object_indices']
    robot_indices = idx_dict['robot_indices']

    n_his = config['train']['n_history']

    # save_dir = os.path.join(get_project_root(),  'sandbox/mpc/', get_current_YYYY_MM_DD_hh_mm_ss_ms())
    save_dir = os.path.join(get_project_root(),
                            'sandbox/mpc/push_right_box_horizontal')
    print("save_dir", save_dir)
    if not os.path.exists(save_dir):
        os.makedirs(save_dir)

    # rotate
    # episode_names = dataset.get_episode_names()
    # print("len(episode_names)", len(episode_names))
    # episode_name = episode_names[0]
    # start_idx = 1
    # n_roll = 15

    # # straight + rotate
    # episode_name = "2020-06-29-21-04-16"
    # print('episode_name', episode_name)
    # start_idx = 1
    # n_roll = 15

    # this is a nice straight push . . .
    # push with box in horizontal position
    episode_name = "2020-06-29-22-03-45"
    start_idx = 2
    n_roll = 10

    # # validation set episodes
    # episode_names = dataset.get_episode_names()
    # print("len(episode_names)", len(episode_names))
    # episode_name = episode_names[1]
    # start_idx = 2
    # n_roll = 15

    camera_name = "d415_01"
    episode = multi_episode_dict[episode_name]
    print("episode_name", episode_name)

    vis = meshcat_utils.make_default_visualizer_object()
    vis.delete()

    idx_list = list(range(start_idx, start_idx + n_roll + 1))
    idx_list_GT = idx_list
    goal_idx = idx_list[-1]
    print("idx_list", idx_list)

    # visualize ground truth rollout
    if True:
        for display_idx, episode_idx in enumerate(idx_list):
            visualize_episode_data_single_timestep(
                vis=vis,
                dataset=dataset,
                episode=episode,
                camera_name=camera_name,
                episode_idx=episode_idx,
                display_idx=episode_idx,
            )

    data_goal = dataset._getitem(episode,
                                 goal_idx,
                                 rollout_length=1,
                                 n_history=1)
    states_goal = data_goal['observations_combined'][0]
    z_states_goal = model_dy.compute_z_state(states_goal)['z']

    print("states_goal.shape", states_goal.shape)
    print("z_states_goal.shape", z_states_goal.shape)

    ##### VISUALIZE PREDICTED ROLLOUT ##########
    data = dataset._getitem(episode, start_idx, rollout_length=n_roll)

    states = data['observations_combined'].unsqueeze(0)
    z = model_dy.compute_z_state(states)['z']
    actions = data['actions'].unsqueeze(0)
    idx_range_model_dy_input = data['idx_range']

    print("data.keys()", data.keys())
    print("data['idx_range']", data['idx_range'])

    # z_init
    z_init = z[:, :n_his]

    # actions_init
    action_start_idx = 0
    action_end_idx = n_his + n_roll - 1
    action_seq = actions[:, action_start_idx:action_end_idx]

    print("action_seq GT\n", action_seq)

    with torch.no_grad():
        rollout_data = rollout_model(state_init=z_init.cuda(),
                                     action_seq=action_seq.cuda(),
                                     dynamics_net=model_dy,
                                     compute_debug_data=False)

    # [B, n_roll, state_dim]
    # state_rollout_pred = rollout_data['state_pred']
    z_rollout_pred = rollout_data['state_pred'].squeeze()
    print("z_rollout_pred.shape", z_rollout_pred.shape)

    if True:
        for idx in range(len(z_rollout_pred)):
            display_idx = data['idx_range'][idx + n_his]
            visualize_model_prediction_single_timestep(
                vis,
                config,
                z_pred=z_rollout_pred[idx],
                display_idx=display_idx)

        print("z_rollout_pred.shape", z_rollout_pred.shape)

    # compute loss when rolled out using GT action sequence
    eval_indices = object_indices
    obs_goal = z_states_goal[object_indices].cuda()
    reward_data = planner_utils.evaluate_model_rollout(
        state_pred=rollout_data['state_pred'],
        obs_goal=obs_goal,
        eval_indices=eval_indices,
        terminal_cost_only=planner_config['mpc']['mppi']['terminal_cost_only'],
        p=planner_config['mpc']['mppi']['cost_norm'])

    print("reward_data using action_seq_GT\n", reward_data['reward'])

    ##### MPC ##########
    data = dataset._getitem(episode,
                            start_idx,
                            rollout_length=0,
                            n_history=config['train']['n_history'])

    state_cur = data['observations_combined'].cuda()
    z_state_cur = model_dy.compute_z_state(state_cur)['z']
    action_his = data['actions'][:(n_his - 1)].cuda()

    print("z_state_cur.shape", state_cur.shape)
    print("action_his.shape", action_his.shape)

    # don't seed with nominal actions just yet
    action_seq_rollout_init = None

    set_seed(SEED)
    mpc_out = planner.trajectory_optimization(
        state_cur=z_state_cur,
        action_his=action_his,
        obs_goal=obs_goal,
        model_dy=model_dy,
        action_seq_rollout_init=action_seq_rollout_init,
        n_look_ahead=n_roll,
        eval_indices=object_indices,
        rollout_best_action_sequence=True,
        verbose=True,
        add_grid_action_samples=True,
    )

    print("\n\n------MPC output-------\n\n")
    print("action_seq:\n", mpc_out['action_seq'])
    mpc_state_pred = mpc_out['state_pred']

    # current shape is [n_roll + 1, state_dim] but really should be
    # [n_roll, state_dim] . . . something is  up
    print("mpc_state_pred.shape", mpc_state_pred.shape)
    print("mpc_out['action_seq'].shape", mpc_out['action_seq'].shape)
    print("n_roll", n_roll)

    # visualize
    for idx in range(n_roll):
        episode_idx = start_idx + idx + 1
        visualize_model_prediction_single_timestep(vis,
                                                   config,
                                                   z_pred=mpc_state_pred[idx],
                                                   display_idx=episode_idx,
                                                   name_prefix="mpc",
                                                   color=[255, 0, 0])

    ######## MPC w/ dynamics model input builder #############
    print("\n\n-----DynamicsModelInputBuilder-----")

    # dynamics model input builder
    online_episode = OnlineEpisodeReader(no_copy=True)

    ref_descriptors = d['spatial_descriptor_data']['spatial_descriptors']
    ref_descriptors = torch_utils.cast_to_torch(ref_descriptors).cuda()
    K_matrix = episode.image_episode.camera_K_matrix(camera_name)
    T_world_camera = episode.image_episode.camera_pose(camera_name, 0)
    visual_observation_function = \
        VisualObservationFunctionFactory.descriptor_keypoints_3D(config=config,
                                                                 camera_name=camera_name,
                                                                 model_dd=d['model_dd'],
                                                                 ref_descriptors=ref_descriptors,
                                                                 K_matrix=K_matrix,
                                                                 T_world_camera=T_world_camera,
                                                                 )

    input_builder = DynamicsModelInputBuilder(
        observation_function=d['observation_function'],
        visual_observation_function=visual_observation_function,
        action_function=d['action_function'],
        episode=online_episode)

    compute_control_action_msg = dict()
    compute_control_action_msg['type'] = "COMPUTE_CONTROL_ACTION"

    for i in range(n_his):
        episode_idx = idx_range_model_dy_input[i]
        print("episode_idx", episode_idx)

        # add image information to
        data = add_images_to_episode_data(episode, episode_idx, camera_name)

        online_episode.add_data(copy.deepcopy(data))
        compute_control_action_msg['data'] = data

        # hack for seeing how much the history matters .. .
        # online_episode.add_data(copy.deepcopy(data))

    # save informatin for running zmq controller
    save_pickle(compute_control_action_msg,
                os.path.join(save_dir, 'compute_control_action_msg.p'))
    goal_idx = idx_list_GT[-1]
    goal_data = add_images_to_episode_data(episode, goal_idx, camera_name)
    goal_data['observations']['timestamp_system'] = time.time()
    plan_msg = {
        'type': "PLAN",
        'data': [goal_data],
        'n_roll': n_roll,
        'K_matrix': K_matrix,
        'T_world_camera': T_world_camera,
    }
    save_pickle(plan_msg, os.path.join(save_dir, "plan_msg.p"))

    print("len(online_episode)", len(online_episode))

    # use this to construct input
    # verify it's the same as what we got from using the dataset directly
    idx = online_episode.get_latest_idx()
    mpc_input_data = input_builder.get_dynamics_model_input(idx,
                                                            n_history=n_his)

    # print("mpc_input_data\n", mpc_input_data)
    state_cur_ib = mpc_input_data['states'].cuda()
    action_his_ib = mpc_input_data['actions'].cuda()

    z_state_cur_ib = model_dy.compute_z_state(state_cur_ib)['z']

    set_seed(SEED)
    mpc_out = planner.trajectory_optimization(
        state_cur=z_state_cur_ib,
        action_his=action_his_ib,
        obs_goal=obs_goal,
        model_dy=model_dy,
        action_seq_rollout_init=None,
        n_look_ahead=n_roll,
        eval_indices=object_indices,
        rollout_best_action_sequence=True,
        verbose=True,
        add_grid_action_samples=True,
    )

    # visualize
    for idx in range(n_roll):
        episode_idx = start_idx + idx + 1
        visualize_model_prediction_single_timestep(
            vis,
            config,
            z_pred=mpc_out['state_pred'][idx],
            display_idx=episode_idx,
            name_prefix="mpc_input_builder",
            color=[255, 255, 0])