def test_action_repeated(self): mock_env = self._get_mock_env_episode() env = wrappers.ActionRepeat(mock_env, 3) env.reset() env.step([2]) mock_env.step.assert_has_calls([mock.call([2])] * 3)
def test_accumulates_reward(self): mock_env = self._get_mock_env_episode() env = wrappers.ActionRepeat(mock_env, 3) time_step = env.step(0) mock_env.step.assert_called_with(0) self.assertEqual(10, time_step.reward) self.assertEqual([2], time_step.observation)
def test_action_stops_on_first(self): mock_env = self._get_mock_env_episode() env = wrappers.ActionRepeat(mock_env, 3) env.reset() time_step = env.step([2]) mock_env.step.assert_has_calls([mock.call([2])]) self.assertEqual(1, time_step.reward) self.assertEqual([0], time_step.observation)
def test_checks_times_param(self): mock_env = mock.MagicMock() with self.assertRaises(ValueError): wrappers.ActionRepeat(mock_env, 1)
def load_carla_env(env_name='carla-v0', discount=1.0, number_of_vehicles=100, number_of_walkers=0, display_size=256, max_past_step=1, dt=0.1, discrete=False, discrete_acc=[-3.0, 0.0, 3.0], discrete_steer=[-0.2, 0.0, 0.2], continuous_accel_range=[-3.0, 3.0], continuous_steer_range=[-0.3, 0.3], ego_vehicle_filter='vehicle.lincoln*', port=2000, town='Town03', task_mode='random', max_time_episode=500, max_waypt=12, obs_range=32, lidar_bin=0.5, d_behind=12, out_lane_thres=2.0, desired_speed=8, max_ego_spawn_times=200, display_route=True, pixor_size=64, pixor=False, obs_channels=None, action_repeat=1): """Loads train and eval environments.""" env_params = { 'number_of_vehicles': number_of_vehicles, 'number_of_walkers': number_of_walkers, 'display_size': display_size, # screen size of bird-eye render 'max_past_step': max_past_step, # the number of past steps to draw 'dt': dt, # time interval between two frames 'discrete': discrete, # whether to use discrete control space 'discrete_acc': discrete_acc, # discrete value of accelerations 'discrete_steer': discrete_steer, # discrete value of steering angles 'continuous_accel_range': continuous_accel_range, # continuous acceleration range 'continuous_steer_range': continuous_steer_range, # continuous steering angle range 'ego_vehicle_filter': ego_vehicle_filter, # filter for defining ego vehicle 'port': port, # connection port 'town': town, # which town to simulate 'task_mode': task_mode, # mode of the task, [random, roundabout (only for Town03)] 'max_time_episode': max_time_episode, # maximum timesteps per episode 'max_waypt': max_waypt, # maximum number of waypoints 'obs_range': obs_range, # observation range (meter) 'lidar_bin': lidar_bin, # bin size of lidar sensor (meter) 'd_behind': d_behind, # distance behind the ego vehicle (meter) 'out_lane_thres': out_lane_thres, # threshold for out of lane 'desired_speed': desired_speed, # desired speed (m/s) 'max_ego_spawn_times': max_ego_spawn_times, # maximum times to spawn ego vehicle 'display_route': display_route, # whether to render the desired route 'pixor_size': pixor_size, # size of the pixor labels 'pixor': pixor, # whether to output PIXOR observation } gym_spec = gym.spec(env_name) gym_env = gym_spec.make(params=env_params) if obs_channels: gym_env = filter_observation_wrapper.FilterObservationWrapper( gym_env, obs_channels) py_env = gym_wrapper.GymWrapper( gym_env, discount=discount, auto_reset=True, ) eval_py_env = py_env if action_repeat > 1: py_env = wrappers.ActionRepeat(py_env, action_repeat) return py_env, eval_py_env
def _load_dm_env(domain_name, task_name, pixels, action_repeat, max_episode_steps=None, obs_type='pixels', distractor=False): """Load a Deepmind control suite environment.""" try: if not pixels: env = suite_dm_control.load(domain_name=domain_name, task_name=task_name) if action_repeat > 1: env = wrappers.ActionRepeat(env, action_repeat) else: def wrap_repeat(env): return ActionRepeatDMWrapper(env, action_repeat) camera_id = 2 if domain_name == 'quadruped' else 0 pixels_only = obs_type == 'pixels' if distractor: render_kwargs = dict(width=84, height=84, camera_id=camera_id) env = distractor_suite.load( domain_name, task_name, difficulty='hard', dynamic=False, background_dataset_path='DAVIS/JPEGImages/480p/', task_kwargs={}, environment_kwargs={}, render_kwargs=render_kwargs, visualize_reward=False, env_state_wrappers=[wrap_repeat]) # env = wrap_repeat(env) # env = suite.wrappers.pixels.Wrapper( # env, # pixels_only=pixels_only, # render_kwargs=render_kwargs, # observation_key=obs_type) env = dm_control_wrapper.DmControlWrapper(env, render_kwargs) else: env = suite_dm_control.load_pixels( domain_name=domain_name, task_name=task_name, render_kwargs=dict(width=84, height=84, camera_id=camera_id), env_state_wrappers=[wrap_repeat], observation_key=obs_type, pixels_only=pixels_only) if action_repeat > 1 and max_episode_steps is not None: # Shorten episode length. max_episode_steps = (max_episode_steps + action_repeat - 1) // action_repeat env = wrappers.TimeLimit(env, max_episode_steps) return env except ValueError as e: logging.warning( 'cannot instantiate dm env: domain_name=%s, task_name=%s', domain_name, task_name) logging.warning('Supported domains and tasks: %s', str({ key: list(val.SUITE.keys()) for key, val in suite._DOMAINS.items() })) # pylint: disable=protected-access raise e
def load_env(env_name, seed, action_repeat=0, frame_stack=1, obs_type='pixels'): """Loads a learning environment. Args: env_name: Name of the environment. seed: Random seed. action_repeat: (optional) action repeat multiplier. Useful for DM control suite tasks. frame_stack: (optional) frame stack. obs_type: `pixels` or `state` Returns: Learning environment. """ action_repeat_applied = False state_env = None if env_name.startswith('dm'): _, domain_name, task_name = env_name.split('-') if 'manipulation' in domain_name: env = manipulation.load(task_name) env = dm_control_wrapper.DmControlWrapper(env) else: env = _load_dm_env(domain_name, task_name, pixels=False, action_repeat=action_repeat) action_repeat_applied = True env = wrappers.FlattenObservationsWrapper(env) elif env_name.startswith('pixels-dm'): if 'distractor' in env_name: _, _, domain_name, task_name, _ = env_name.split('-') distractor = True else: _, _, domain_name, task_name = env_name.split('-') distractor = False # TODO(tompson): Are there DMC environments that have other # max_episode_steps? env = _load_dm_env(domain_name, task_name, pixels=True, action_repeat=action_repeat, max_episode_steps=1000, obs_type=obs_type, distractor=distractor) action_repeat_applied = True if obs_type == 'pixels': env = FlattenImageObservationsWrapper(env) state_env = None else: env = JointImageObservationsWrapper(env) state_env = tf_py_environment.TFPyEnvironment( wrappers.FlattenObservationsWrapper( _load_dm_env(domain_name, task_name, pixels=False, action_repeat=action_repeat))) else: env = suite_mujoco.load(env_name) env.seed(seed) if action_repeat > 1 and not action_repeat_applied: env = wrappers.ActionRepeat(env, action_repeat) if frame_stack > 1: env = FrameStackWrapperTfAgents(env, frame_stack) env = tf_py_environment.TFPyEnvironment(env) return env, state_env
def load_environments(universe, env_name=None, domain_name=None, task_name=None, render_size=128, observation_render_size=64, observations_whitelist=None, action_repeat=1): """Loads train and eval environments. The universe can either be gym, in which case domain_name and task_name are ignored, or dm_control, in which case env_name is ignored. """ if universe == 'gym': tf.compat.v1.logging.info( 'Using environment {} from {} universe.'.format( env_name, universe)) gym_env_wrappers = [ functools.partial(gym_wrappers.RenderGymWrapper, render_kwargs={ 'height': render_size, 'width': render_size, 'device_id': 0 }), functools.partial(gym_wrappers.PixelObservationsGymWrapper, observations_whitelist=observations_whitelist, render_kwargs={ 'height': observation_render_size, 'width': observation_render_size, 'device_id': 0 }) ] eval_gym_env_wrappers = [ functools.partial(gym_wrappers.RenderGymWrapper, render_kwargs={ 'height': render_size, 'width': render_size, 'device_id': 1 }), # segfaults if the device is the same as train env functools.partial(gym_wrappers.PixelObservationsGymWrapper, observations_whitelist=observations_whitelist, render_kwargs={ 'height': observation_render_size, 'width': observation_render_size, 'device_id': 1 }) ] # segfaults if the device is the same as train env py_env = suite_mujoco.load(env_name, gym_env_wrappers=gym_env_wrappers) eval_py_env = suite_mujoco.load(env_name, gym_env_wrappers=eval_gym_env_wrappers) elif universe == 'dm_control': tf.compat.v1.logging.info( 'Using domain {} and task {} from {} universe.'.format( domain_name, task_name, universe)) render_kwargs = { 'height': render_size, 'width': render_size, 'camera_id': 0, } dm_env_wrappers = [ wrappers. FlattenObservationsWrapper, # combine position and velocity functools.partial( dm_control_wrappers.PixelObservationsDmControlWrapper, observations_whitelist=observations_whitelist, render_kwargs={ 'height': observation_render_size, 'width': observation_render_size, 'camera_id': 0 }) ] py_env = suite_dm_control.load(domain_name, task_name, render_kwargs=render_kwargs, env_wrappers=dm_env_wrappers) eval_py_env = suite_dm_control.load(domain_name, task_name, render_kwargs=render_kwargs, env_wrappers=dm_env_wrappers) else: raise ValueError('Invalid universe %s.' % universe) eval_py_env = video_wrapper.VideoWrapper(eval_py_env) if action_repeat > 1: py_env = wrappers.ActionRepeat(py_env, action_repeat) eval_py_env = wrappers.ActionRepeat(eval_py_env, action_repeat) return py_env, eval_py_env
def load_environments( universe, action_mode, env_name=None, render_size=128, observation_render_size=64, observations_whitelist=None, action_repeat=1, num_train_tasks=30, num_eval_tasks=10, eval_on_holdout_tasks=True, return_multiple_tasks=False, model_input=None, auto_reset_task_each_episode=False, ): """ Loads train and eval environments. """ assert universe == 'gym' tf.compat.v1.logging.info('Using environment {} from {} universe.'.format( env_name, universe)) is_shelf_env = (env_name == 'SawyerShelfMT-v0') or (env_name == 'SawyerShelfMT-v2') if is_shelf_env: return load_multiple_mugs_env( universe, action_mode, env_name=env_name, observations_whitelist=['state', 'pixels', 'env_info'], action_repeat=action_repeat, num_train_tasks=num_train_tasks, num_eval_tasks=num_eval_tasks, eval_on_holdout_tasks=eval_on_holdout_tasks, return_multiple_tasks=True, ) # select observation wrapper # puts either state or image into the 'pixels' location use_observation_wrapper = gym_wrappers.PixelObservationsGymWrapper if model_input is not None: if model_input == 'state': use_observation_wrapper = gym_wrappers.PixelObservationsGymWrapperState # wrappers for train env (put on GPU 0) gym_env_wrappers = [ functools.partial(gym_wrappers.RenderGymWrapper, render_kwargs={ 'height': render_size, 'width': render_size, 'device_id': 0 }), functools.partial(use_observation_wrapper, observations_whitelist=observations_whitelist, render_kwargs={ 'height': observation_render_size, 'width': observation_render_size, 'device_id': 0 }) ] # wrappers for eval env (put on GPU 1) eval_gym_env_wrappers = [ functools.partial(gym_wrappers.RenderGymWrapper, render_kwargs={ 'height': render_size, 'width': render_size, 'device_id': 1 }), # segfaults if the device is the same as train env functools.partial(use_observation_wrapper, observations_whitelist=observations_whitelist, render_kwargs={ 'height': observation_render_size, 'width': observation_render_size, 'device_id': 1 }) ] # segfaults if the device is the same as train env # create train/eval envs gym_kwargs = {"action_mode": action_mode} py_env = suite_gym.load(env_name, gym_env_wrappers=gym_env_wrappers, gym_kwargs=gym_kwargs) eval_py_env = suite_gym.load(env_name, gym_env_wrappers=eval_gym_env_wrappers, gym_kwargs=gym_kwargs) # set action mode py_env.wrapped_env().override_action_mode(action_mode) eval_py_env.wrapped_env().override_action_mode(action_mode) # video wrapper for eval saving eval_py_env = video_wrapper.VideoWrapper(eval_py_env) # action repeat if action_repeat > 1: py_env = wrappers.ActionRepeat(py_env, action_repeat) eval_py_env = wrappers.ActionRepeat(eval_py_env, action_repeat) ############################### # get possible tasks ############################### if return_multiple_tasks: # set env as being "train" or "eval" # used for defining the tasks used in the envs eval_env_is_true_eval = False if eval_on_holdout_tasks: eval_env_is_true_eval = True # train env train_tasks = py_env.init_tasks(num_tasks=num_train_tasks, is_eval_env=False) # eval env eval_tasks = eval_py_env.init_tasks(num_tasks=num_eval_tasks, is_eval_env=eval_env_is_true_eval) # set task list and reset variable to true if auto_reset_task_each_episode: py_env.wrapped_env().set_auto_reset_task(train_tasks) eval_py_env.wrapped_env().set_auto_reset_task(eval_tasks) return py_env, eval_py_env, train_tasks, eval_tasks else: return py_env, eval_py_env
def load_multiple_mugs_env( universe, action_mode, env_name=None, render_size=128, observation_render_size=64, observations_whitelist=None, action_repeat=1, num_train_tasks=30, num_eval_tasks=10, eval_on_holdout_tasks=True, return_multiple_tasks=False, model_input=None, auto_reset_task_each_episode=False, ): ### HARDCODED # temporary sanity assert env_name == 'SawyerShelfMT-v0' assert return_multiple_tasks assert universe == 'gym' # get eval and train tasks by loading a sample env sample_env = suite_mujoco.load(env_name) # train env train_tasks = sample_env.init_tasks(num_tasks=num_train_tasks, is_eval_env=False) # eval env eval_tasks = sample_env.init_tasks(num_tasks=num_eval_tasks, is_eval_env=eval_on_holdout_tasks) del sample_env print("train weights", train_tasks) print("eval weights", eval_tasks) if env_name == 'SawyerShelfMT-v0': from meld.environments.envs.shelf.assets.generate_sawyer_shelf_xml import generate_and_save_xml_file else: raise NotImplementedError train_xml_path = generate_and_save_xml_file(train_tasks, action_mode, is_eval=False) eval_xml_path = generate_and_save_xml_file(eval_tasks, action_mode, is_eval=True) ### train env # get wrappers wrappers = get_wrappers(device_id=0, model_input=model_input, render_size=render_size, observation_render_size=observation_render_size, observations_whitelist=observations_whitelist) # load env gym_kwargs = {"action_mode": action_mode, "xml_path": train_xml_path} py_env = suite_gym.load(env_name, gym_env_wrappers=wrappers, gym_kwargs=gym_kwargs) if action_repeat > 1: py_env = wrappers.ActionRepeat(py_env, action_repeat) ### eval env # get wrappers wrappers = get_wrappers(device_id=1, model_input=model_input, render_size=render_size, observation_render_size=observation_render_size, observations_whitelist=observations_whitelist) # load env gym_kwargs = {"action_mode": action_mode, "xml_path": eval_xml_path} eval_py_env = suite_gym.load(env_name, gym_env_wrappers=wrappers, gym_kwargs=gym_kwargs) eval_py_env = video_wrapper.VideoWrapper(eval_py_env) if action_repeat > 1: eval_py_env = wrappers.ActionRepeat(eval_py_env, action_repeat) py_env.assign_tasks(train_tasks) eval_py_env.assign_tasks(eval_tasks) # set task list and reset variable to true if auto_reset_task_each_episode: py_env.wrapped_env().set_auto_reset_task(train_tasks) eval_py_env.wrapped_env().set_auto_reset_task(eval_tasks) return py_env, eval_py_env, train_tasks, eval_tasks