Exemplo n.º 1
0
def test_gym_API():
    """
    Collects two episodes and saves them to disk
    Currently no images are being saved
    :return:
    :rtype:
    """

    config_file = os.path.join(get_project_root(),
                               'experiments/01/config.yaml')
    config = load_yaml(config_file)

    env = PusherSlider(config=config)
    env.setup_environment()

    episode_container = None  # for namespacing purposes
    multi_episode_container = MultiEpisodeContainer()

    print("fps", env._fps)
    num_episodes = 1
    # num_timesteps_per_episode = 40
    for episode_idx in range(num_episodes):
        env.reset()
        obs_prev = env.get_observation()

        episode_container = EpisodeContainer()
        episode_container.set_config(env.config)
        for i in range(env.config['dataset']['num_timesteps']):
            env.render(mode='human')
            if i == 50:

                # save this image out in local directory
                img = env.render(mode='rgb_array')

                if DEBUG_PRINTS:
                    print("saving figure to file")
                    print("type(img)", type(img))
                    print(img.shape)
                    print(img.dtype)

                pil_img = numpy_to_PIL(img)
                pil_img.save('test_PIL.png')

            action = np.array([100, 0])
            obs, reward, done, info = env.step(action)

            episode_container.add_obs_action(obs_prev, action)
            obs_prev = obs

            if DEBUG_PRINTS:
                print("\n\nsim_time = ", env._sim_time)
                print("obs sim_time = ", obs['sim_time'])

        multi_episode_container.add_episode(episode_container)

    if True:
        save_file = os.path.join(os.getcwd(), "single_episode_log.p")
        episode_container.save_to_file(save_file)

        multi_episode_save_file = os.path.join(os.getcwd(),
                                               "multi_episode_log.p")
        multi_episode_container.save_to_file(multi_episode_save_file)
Exemplo n.º 2
0
def mpc_episode_keypoint_observation(
        config,  # the global config
        mpc_idx,  # int: index of this trial for logging purposes
        model_dy,  # the dynamics model
        mpc_dir,  # str: directory to store results
        planner,
        obs_goal,
        action_function,
        observation_function,
        video=True,
        image=True,
        use_gpu=True):
    '''
    setup
    '''

    # optionally add noise to observation as specified in config
    add_noise_to_observation = False
    if "add_noise_to_observation" in config["mpc"]:
        # raise ValueError("testing")
        add_noise_to_observation = config["mpc"]["add_noise_to_observation"]

    # basic info
    n_his = config['train']['n_history']
    num_steps = config['mpc']['num_timesteps'] + n_his

    # create PusherSlider env
    env = PusherSlider(config)
    env.setup_environment()
    env.reset()
    env.step_space_to_initialize_render()

    # set up episode container
    action_zero = np.zeros(2)

    # states used to roll out the MPC model
    states_roll_mpc = np.zeros((num_steps, config['dataset']['state_dim']))

    # gt states that actually occurred
    states_roll_gt = np.zeros(states_roll_mpc.shape)
    actions_roll = np.zeros((num_steps, config['dataset']['action_dim']))

    gt_obs_list = []
    img_gt_PIL_list = []

    actions_roll = np.zeros((num_steps, config['dataset']['action_dim']))

    for i in range(n_his, num_steps):
        actions_roll[i] = sample_pusher_velocity()

    action_lower_lim = np.array(config['mpc']['action_lower_lim'])
    action_upper_lim = np.array(config['mpc']['action_upper_lim'])
    '''
    model predictive control
    '''
    for step in range(num_steps):

        print('roll_step %d/%d' % (step, num_steps))

        states_roll_mpc[step] = observation_function(
            env.get_observation(), data_augmentation=add_noise_to_observation)
        img_gt_PIL_list.append(numpy_to_PIL(env.render(mode='rgb_array')))

        # stand still for n_his timesteps
        if step < n_his:
            action = action_zero
        else:
            actions_roll[step:] = planner.trajectory_optimization(
                states_roll_mpc[step - n_his + 1:step + 1],
                obs_goal,
                model_dy,
                actions_roll[step - n_his + 1:],
                n_sample=config['mpc']['n_sample'],
                n_look_ahead=min(num_steps - step,
                                 config['mpc']['n_look_ahead']),
                n_update_iter=config['mpc']['n_update_iter_init']
                if step == n_his else config['mpc']['n_update_iter'],
                action_lower_lim=action_lower_lim,
                action_upper_lim=action_upper_lim,
                use_gpu=use_gpu)

            action = actions_roll[step]

        obs, reward, done, info = env.step(action)
        gt_obs_list.append(obs)
    '''
    render the prediction
    '''

    # reset PusherSlider env
    env.reset()

    # setup rendering data

    width = config['env']['display_size'][0]
    height = config['env']['display_size'][1]
    n_split = 3
    split = 4

    if image:
        eval_img_dir = os.path.join(mpc_dir, '%d_img' % mpc_idx)
        os.system('mkdir -p ' + eval_img_dir)
        print('Save images to %s' % eval_img_dir)

    if video:
        eval_vid_path = os.path.join(mpc_dir, '%d_vid.avi' % mpc_idx)
        fourcc = cv2.VideoWriter_fourcc('M', 'J', 'P', 'G')
        print('Save video as %s' % eval_vid_path)
        fps = int(env.fps)
        print("fps:", fps)
        out = cv2.VideoWriter(eval_vid_path, fourcc, fps,
                              (width * n_split + split *
                               (n_split - 1), height))

    # gt represents goal
    for i, start_idx in enumerate(range(num_steps)):

        # get ground truth positions at goal
        goal_gt_state_dict = pusher_pose_slider_keypoints_from_tensor(obs_goal)

        # now render them
        img_goal_gt_PIL = pusher_slider_keypoints_image(
            config, goal_gt_state_dict['pusher']['position'],
            goal_gt_state_dict['keypoint_positions'])

        img_goal_gt = np.array(img_goal_gt_PIL)

        #
        img_gt_PIL = img_gt_PIL_list[i]

        # # actual GT positions that happened
        state_roll = states_roll_mpc[i, :]
        state_roll_dict = pusher_pose_slider_keypoints_from_tensor(state_roll)
        img_gt_PIL = pusher_slider_keypoints_image(
            config,
            state_roll_dict['pusher']['position'],
            state_roll_dict['keypoint_positions'],
            img=img_gt_PIL)

        img_gt = np.array(img_gt_PIL)

        # blend the images
        img_merge_PIL = Image.blend(img_goal_gt_PIL, img_gt_PIL, 0.5)

        # get numpy image
        img_merge = np.array(img_merge_PIL)

        img_output = np.zeros(
            (img_goal_gt.shape[0],
             img_goal_gt.shape[1] * n_split + split * (n_split - 1),
             3)).astype(np.uint8)

        img_output[:, :img_goal_gt.
                   shape[1]] = img_goal_gt  # goal keypoints image
        img_output[:, img_goal_gt.shape[1] + split:img_goal_gt.shape[1] * 2 +
                   split] = img_gt
        img_output[:, (img_goal_gt.shape[1] + split) * 2:] = img_merge

        if image:
            # convert to PIL then save
            img_output_PIL = numpy_to_PIL(img_output)
            img_output_PIL.save(os.path.join(eval_img_dir, 'fig_%d.png' % i))

        if video:
            # convert from RGB to BGR since video writer wants BGR
            img_output_bgr = cv2.cvtColor(img_output, cv2.COLOR_RGB2BGR)
            out.write(img_output_bgr)

    if video:
        out.release()