示例#1
0
    def test_action_repeated(self):
        mock_env = self._get_mock_env_episode()
        env = wrappers.ActionRepeat(mock_env, 3)
        env.reset()

        env.step([2])
        mock_env.step.assert_has_calls([mock.call([2])] * 3)
示例#2
0
  def test_accumulates_reward(self):
    mock_env = self._get_mock_env_episode()
    env = wrappers.ActionRepeat(mock_env, 3)
    time_step = env.step(0)

    mock_env.step.assert_called_with(0)
    self.assertEqual(10, time_step.reward)
    self.assertEqual([2], time_step.observation)
示例#3
0
  def test_action_stops_on_first(self):
    mock_env = self._get_mock_env_episode()
    env = wrappers.ActionRepeat(mock_env, 3)
    env.reset()

    time_step = env.step([2])
    mock_env.step.assert_has_calls([mock.call([2])])

    self.assertEqual(1, time_step.reward)
    self.assertEqual([0], time_step.observation)
示例#4
0
 def test_checks_times_param(self):
   mock_env = mock.MagicMock()
   with self.assertRaises(ValueError):
     wrappers.ActionRepeat(mock_env, 1)
def load_carla_env(env_name='carla-v0',
                   discount=1.0,
                   number_of_vehicles=100,
                   number_of_walkers=0,
                   display_size=256,
                   max_past_step=1,
                   dt=0.1,
                   discrete=False,
                   discrete_acc=[-3.0, 0.0, 3.0],
                   discrete_steer=[-0.2, 0.0, 0.2],
                   continuous_accel_range=[-3.0, 3.0],
                   continuous_steer_range=[-0.3, 0.3],
                   ego_vehicle_filter='vehicle.lincoln*',
                   port=2000,
                   town='Town03',
                   task_mode='random',
                   max_time_episode=500,
                   max_waypt=12,
                   obs_range=32,
                   lidar_bin=0.5,
                   d_behind=12,
                   out_lane_thres=2.0,
                   desired_speed=8,
                   max_ego_spawn_times=200,
                   display_route=True,
                   pixor_size=64,
                   pixor=False,
                   obs_channels=None,
                   action_repeat=1):
    """Loads train and eval environments."""
    env_params = {
        'number_of_vehicles': number_of_vehicles,
        'number_of_walkers': number_of_walkers,
        'display_size': display_size,  # screen size of bird-eye render
        'max_past_step': max_past_step,  # the number of past steps to draw
        'dt': dt,  # time interval between two frames
        'discrete': discrete,  # whether to use discrete control space
        'discrete_acc': discrete_acc,  # discrete value of accelerations
        'discrete_steer': discrete_steer,  # discrete value of steering angles
        'continuous_accel_range':
        continuous_accel_range,  # continuous acceleration range
        'continuous_steer_range':
        continuous_steer_range,  # continuous steering angle range
        'ego_vehicle_filter':
        ego_vehicle_filter,  # filter for defining ego vehicle
        'port': port,  # connection port
        'town': town,  # which town to simulate
        'task_mode':
        task_mode,  # mode of the task, [random, roundabout (only for Town03)]
        'max_time_episode': max_time_episode,  # maximum timesteps per episode
        'max_waypt': max_waypt,  # maximum number of waypoints
        'obs_range': obs_range,  # observation range (meter)
        'lidar_bin': lidar_bin,  # bin size of lidar sensor (meter)
        'd_behind': d_behind,  # distance behind the ego vehicle (meter)
        'out_lane_thres': out_lane_thres,  # threshold for out of lane
        'desired_speed': desired_speed,  # desired speed (m/s)
        'max_ego_spawn_times':
        max_ego_spawn_times,  # maximum times to spawn ego vehicle
        'display_route': display_route,  # whether to render the desired route
        'pixor_size': pixor_size,  # size of the pixor labels
        'pixor': pixor,  # whether to output PIXOR observation
    }

    gym_spec = gym.spec(env_name)
    gym_env = gym_spec.make(params=env_params)

    if obs_channels:
        gym_env = filter_observation_wrapper.FilterObservationWrapper(
            gym_env, obs_channels)

    py_env = gym_wrapper.GymWrapper(
        gym_env,
        discount=discount,
        auto_reset=True,
    )

    eval_py_env = py_env

    if action_repeat > 1:
        py_env = wrappers.ActionRepeat(py_env, action_repeat)

    return py_env, eval_py_env
示例#6
0
def _load_dm_env(domain_name,
                 task_name,
                 pixels,
                 action_repeat,
                 max_episode_steps=None,
                 obs_type='pixels',
                 distractor=False):
    """Load a Deepmind control suite environment."""
    try:
        if not pixels:
            env = suite_dm_control.load(domain_name=domain_name,
                                        task_name=task_name)
            if action_repeat > 1:
                env = wrappers.ActionRepeat(env, action_repeat)

        else:

            def wrap_repeat(env):
                return ActionRepeatDMWrapper(env, action_repeat)

            camera_id = 2 if domain_name == 'quadruped' else 0

            pixels_only = obs_type == 'pixels'
            if distractor:
                render_kwargs = dict(width=84, height=84, camera_id=camera_id)

                env = distractor_suite.load(
                    domain_name,
                    task_name,
                    difficulty='hard',
                    dynamic=False,
                    background_dataset_path='DAVIS/JPEGImages/480p/',
                    task_kwargs={},
                    environment_kwargs={},
                    render_kwargs=render_kwargs,
                    visualize_reward=False,
                    env_state_wrappers=[wrap_repeat])

                # env = wrap_repeat(env)

                # env = suite.wrappers.pixels.Wrapper(
                #     env,
                #     pixels_only=pixels_only,
                #     render_kwargs=render_kwargs,
                #     observation_key=obs_type)

                env = dm_control_wrapper.DmControlWrapper(env, render_kwargs)

            else:
                env = suite_dm_control.load_pixels(
                    domain_name=domain_name,
                    task_name=task_name,
                    render_kwargs=dict(width=84,
                                       height=84,
                                       camera_id=camera_id),
                    env_state_wrappers=[wrap_repeat],
                    observation_key=obs_type,
                    pixels_only=pixels_only)

        if action_repeat > 1 and max_episode_steps is not None:
            # Shorten episode length.
            max_episode_steps = (max_episode_steps + action_repeat -
                                 1) // action_repeat
            env = wrappers.TimeLimit(env, max_episode_steps)

        return env

    except ValueError as e:
        logging.warning(
            'cannot instantiate dm env: domain_name=%s, task_name=%s',
            domain_name, task_name)
        logging.warning('Supported domains and tasks: %s',
                        str({
                            key: list(val.SUITE.keys())
                            for key, val in suite._DOMAINS.items()
                        }))  # pylint: disable=protected-access
        raise e
示例#7
0
def load_env(env_name,
             seed,
             action_repeat=0,
             frame_stack=1,
             obs_type='pixels'):
    """Loads a learning environment.

  Args:
    env_name: Name of the environment.
    seed: Random seed.
    action_repeat: (optional) action repeat multiplier. Useful for DM control
      suite tasks.
    frame_stack: (optional) frame stack.
    obs_type: `pixels` or `state`
  Returns:
    Learning environment.
  """

    action_repeat_applied = False
    state_env = None

    if env_name.startswith('dm'):
        _, domain_name, task_name = env_name.split('-')
        if 'manipulation' in domain_name:
            env = manipulation.load(task_name)
            env = dm_control_wrapper.DmControlWrapper(env)
        else:
            env = _load_dm_env(domain_name,
                               task_name,
                               pixels=False,
                               action_repeat=action_repeat)
            action_repeat_applied = True
        env = wrappers.FlattenObservationsWrapper(env)

    elif env_name.startswith('pixels-dm'):
        if 'distractor' in env_name:
            _, _, domain_name, task_name, _ = env_name.split('-')
            distractor = True
        else:
            _, _, domain_name, task_name = env_name.split('-')
            distractor = False
        # TODO(tompson): Are there DMC environments that have other
        # max_episode_steps?
        env = _load_dm_env(domain_name,
                           task_name,
                           pixels=True,
                           action_repeat=action_repeat,
                           max_episode_steps=1000,
                           obs_type=obs_type,
                           distractor=distractor)
        action_repeat_applied = True
        if obs_type == 'pixels':
            env = FlattenImageObservationsWrapper(env)
            state_env = None
        else:
            env = JointImageObservationsWrapper(env)
            state_env = tf_py_environment.TFPyEnvironment(
                wrappers.FlattenObservationsWrapper(
                    _load_dm_env(domain_name,
                                 task_name,
                                 pixels=False,
                                 action_repeat=action_repeat)))

    else:
        env = suite_mujoco.load(env_name)
        env.seed(seed)

    if action_repeat > 1 and not action_repeat_applied:
        env = wrappers.ActionRepeat(env, action_repeat)
    if frame_stack > 1:
        env = FrameStackWrapperTfAgents(env, frame_stack)

    env = tf_py_environment.TFPyEnvironment(env)

    return env, state_env
示例#8
0
def load_environments(universe,
                      env_name=None,
                      domain_name=None,
                      task_name=None,
                      render_size=128,
                      observation_render_size=64,
                      observations_whitelist=None,
                      action_repeat=1):
    """Loads train and eval environments.

  The universe can either be gym, in which case domain_name and task_name are
  ignored, or dm_control, in which case env_name is ignored.
  """
    if universe == 'gym':
        tf.compat.v1.logging.info(
            'Using environment {} from {} universe.'.format(
                env_name, universe))
        gym_env_wrappers = [
            functools.partial(gym_wrappers.RenderGymWrapper,
                              render_kwargs={
                                  'height': render_size,
                                  'width': render_size,
                                  'device_id': 0
                              }),
            functools.partial(gym_wrappers.PixelObservationsGymWrapper,
                              observations_whitelist=observations_whitelist,
                              render_kwargs={
                                  'height': observation_render_size,
                                  'width': observation_render_size,
                                  'device_id': 0
                              })
        ]
        eval_gym_env_wrappers = [
            functools.partial(gym_wrappers.RenderGymWrapper,
                              render_kwargs={
                                  'height': render_size,
                                  'width': render_size,
                                  'device_id': 1
                              }),
            # segfaults if the device is the same as train env
            functools.partial(gym_wrappers.PixelObservationsGymWrapper,
                              observations_whitelist=observations_whitelist,
                              render_kwargs={
                                  'height': observation_render_size,
                                  'width': observation_render_size,
                                  'device_id': 1
                              })
        ]  # segfaults if the device is the same as train env
        py_env = suite_mujoco.load(env_name, gym_env_wrappers=gym_env_wrappers)
        eval_py_env = suite_mujoco.load(env_name,
                                        gym_env_wrappers=eval_gym_env_wrappers)
    elif universe == 'dm_control':
        tf.compat.v1.logging.info(
            'Using domain {} and task {} from {} universe.'.format(
                domain_name, task_name, universe))
        render_kwargs = {
            'height': render_size,
            'width': render_size,
            'camera_id': 0,
        }
        dm_env_wrappers = [
            wrappers.
            FlattenObservationsWrapper,  # combine position and velocity
            functools.partial(
                dm_control_wrappers.PixelObservationsDmControlWrapper,
                observations_whitelist=observations_whitelist,
                render_kwargs={
                    'height': observation_render_size,
                    'width': observation_render_size,
                    'camera_id': 0
                })
        ]
        py_env = suite_dm_control.load(domain_name,
                                       task_name,
                                       render_kwargs=render_kwargs,
                                       env_wrappers=dm_env_wrappers)
        eval_py_env = suite_dm_control.load(domain_name,
                                            task_name,
                                            render_kwargs=render_kwargs,
                                            env_wrappers=dm_env_wrappers)
    else:
        raise ValueError('Invalid universe %s.' % universe)

    eval_py_env = video_wrapper.VideoWrapper(eval_py_env)

    if action_repeat > 1:
        py_env = wrappers.ActionRepeat(py_env, action_repeat)
        eval_py_env = wrappers.ActionRepeat(eval_py_env, action_repeat)

    return py_env, eval_py_env
示例#9
0
def load_environments(
    universe,
    action_mode,
    env_name=None,
    render_size=128,
    observation_render_size=64,
    observations_whitelist=None,
    action_repeat=1,
    num_train_tasks=30,
    num_eval_tasks=10,
    eval_on_holdout_tasks=True,
    return_multiple_tasks=False,
    model_input=None,
    auto_reset_task_each_episode=False,
):
    """
  Loads train and eval environments.
  """

    assert universe == 'gym'
    tf.compat.v1.logging.info('Using environment {} from {} universe.'.format(
        env_name, universe))

    is_shelf_env = (env_name == 'SawyerShelfMT-v0') or (env_name
                                                        == 'SawyerShelfMT-v2')
    if is_shelf_env:
        return load_multiple_mugs_env(
            universe,
            action_mode,
            env_name=env_name,
            observations_whitelist=['state', 'pixels', 'env_info'],
            action_repeat=action_repeat,
            num_train_tasks=num_train_tasks,
            num_eval_tasks=num_eval_tasks,
            eval_on_holdout_tasks=eval_on_holdout_tasks,
            return_multiple_tasks=True,
        )

    # select observation wrapper
    # puts either state or image into the 'pixels' location
    use_observation_wrapper = gym_wrappers.PixelObservationsGymWrapper
    if model_input is not None:
        if model_input == 'state':
            use_observation_wrapper = gym_wrappers.PixelObservationsGymWrapperState

    # wrappers for train env (put on GPU 0)
    gym_env_wrappers = [
        functools.partial(gym_wrappers.RenderGymWrapper,
                          render_kwargs={
                              'height': render_size,
                              'width': render_size,
                              'device_id': 0
                          }),
        functools.partial(use_observation_wrapper,
                          observations_whitelist=observations_whitelist,
                          render_kwargs={
                              'height': observation_render_size,
                              'width': observation_render_size,
                              'device_id': 0
                          })
    ]

    # wrappers for eval env (put on GPU 1)
    eval_gym_env_wrappers = [
        functools.partial(gym_wrappers.RenderGymWrapper,
                          render_kwargs={
                              'height': render_size,
                              'width': render_size,
                              'device_id': 1
                          }),
        # segfaults if the device is the same as train env
        functools.partial(use_observation_wrapper,
                          observations_whitelist=observations_whitelist,
                          render_kwargs={
                              'height': observation_render_size,
                              'width': observation_render_size,
                              'device_id': 1
                          })
    ]  # segfaults if the device is the same as train env

    # create train/eval envs
    gym_kwargs = {"action_mode": action_mode}
    py_env = suite_gym.load(env_name,
                            gym_env_wrappers=gym_env_wrappers,
                            gym_kwargs=gym_kwargs)
    eval_py_env = suite_gym.load(env_name,
                                 gym_env_wrappers=eval_gym_env_wrappers,
                                 gym_kwargs=gym_kwargs)

    # set action mode
    py_env.wrapped_env().override_action_mode(action_mode)
    eval_py_env.wrapped_env().override_action_mode(action_mode)

    # video wrapper for eval saving
    eval_py_env = video_wrapper.VideoWrapper(eval_py_env)

    # action repeat
    if action_repeat > 1:
        py_env = wrappers.ActionRepeat(py_env, action_repeat)
        eval_py_env = wrappers.ActionRepeat(eval_py_env, action_repeat)

    ###############################
    # get possible tasks
    ###############################

    if return_multiple_tasks:
        # set env as being "train" or "eval"
        # used for defining the tasks used in the envs
        eval_env_is_true_eval = False
        if eval_on_holdout_tasks:
            eval_env_is_true_eval = True

        # train env
        train_tasks = py_env.init_tasks(num_tasks=num_train_tasks,
                                        is_eval_env=False)
        # eval env
        eval_tasks = eval_py_env.init_tasks(num_tasks=num_eval_tasks,
                                            is_eval_env=eval_env_is_true_eval)

        # set task list and reset variable to true
        if auto_reset_task_each_episode:
            py_env.wrapped_env().set_auto_reset_task(train_tasks)
            eval_py_env.wrapped_env().set_auto_reset_task(eval_tasks)

        return py_env, eval_py_env, train_tasks, eval_tasks
    else:
        return py_env, eval_py_env
示例#10
0
def load_multiple_mugs_env(
    universe,
    action_mode,
    env_name=None,
    render_size=128,
    observation_render_size=64,
    observations_whitelist=None,
    action_repeat=1,
    num_train_tasks=30,
    num_eval_tasks=10,
    eval_on_holdout_tasks=True,
    return_multiple_tasks=False,
    model_input=None,
    auto_reset_task_each_episode=False,
):

    ### HARDCODED
    # temporary sanity
    assert env_name == 'SawyerShelfMT-v0'
    assert return_multiple_tasks
    assert universe == 'gym'

    # get eval and train tasks by loading a sample env
    sample_env = suite_mujoco.load(env_name)
    # train env
    train_tasks = sample_env.init_tasks(num_tasks=num_train_tasks,
                                        is_eval_env=False)
    # eval env
    eval_tasks = sample_env.init_tasks(num_tasks=num_eval_tasks,
                                       is_eval_env=eval_on_holdout_tasks)
    del sample_env

    print("train weights", train_tasks)
    print("eval weights", eval_tasks)
    if env_name == 'SawyerShelfMT-v0':
        from meld.environments.envs.shelf.assets.generate_sawyer_shelf_xml import generate_and_save_xml_file
    else:
        raise NotImplementedError

    train_xml_path = generate_and_save_xml_file(train_tasks,
                                                action_mode,
                                                is_eval=False)
    eval_xml_path = generate_and_save_xml_file(eval_tasks,
                                               action_mode,
                                               is_eval=True)

    ### train env
    # get wrappers
    wrappers = get_wrappers(device_id=0,
                            model_input=model_input,
                            render_size=render_size,
                            observation_render_size=observation_render_size,
                            observations_whitelist=observations_whitelist)
    # load env
    gym_kwargs = {"action_mode": action_mode, "xml_path": train_xml_path}
    py_env = suite_gym.load(env_name,
                            gym_env_wrappers=wrappers,
                            gym_kwargs=gym_kwargs)
    if action_repeat > 1:
        py_env = wrappers.ActionRepeat(py_env, action_repeat)

    ### eval env
    # get wrappers
    wrappers = get_wrappers(device_id=1,
                            model_input=model_input,
                            render_size=render_size,
                            observation_render_size=observation_render_size,
                            observations_whitelist=observations_whitelist)
    # load env
    gym_kwargs = {"action_mode": action_mode, "xml_path": eval_xml_path}
    eval_py_env = suite_gym.load(env_name,
                                 gym_env_wrappers=wrappers,
                                 gym_kwargs=gym_kwargs)
    eval_py_env = video_wrapper.VideoWrapper(eval_py_env)
    if action_repeat > 1:
        eval_py_env = wrappers.ActionRepeat(eval_py_env, action_repeat)

    py_env.assign_tasks(train_tasks)
    eval_py_env.assign_tasks(eval_tasks)

    # set task list and reset variable to true
    if auto_reset_task_each_episode:
        py_env.wrapped_env().set_auto_reset_task(train_tasks)
        eval_py_env.wrapped_env().set_auto_reset_task(eval_tasks)

    return py_env, eval_py_env, train_tasks, eval_tasks