def test_single_array_observation(self, pixels_only): pixel_key = "depth" env = FakeArrayObservationEnvironment() observation_space = env.observation_space assert isinstance(observation_space, spaces.Box) wrapped_env = PixelObservationWrapper( env, pixel_keys=(pixel_key,), pixels_only=pixels_only ) wrapped_env.observation_space = wrapped_env.observation_space assert isinstance(wrapped_env.observation_space, spaces.Dict) if pixels_only: assert len(wrapped_env.observation_space.spaces) == 1 assert list(wrapped_env.observation_space.spaces.keys()) == [pixel_key] else: assert len(wrapped_env.observation_space.spaces) == 2 assert list(wrapped_env.observation_space.spaces.keys()) == [ STATE_KEY, pixel_key, ] observation = wrapped_env.reset() depth_observation = observation[pixel_key] assert depth_observation.shape == (32, 32, 3) assert depth_observation.dtype == np.uint8 if not pixels_only: assert isinstance(observation[STATE_KEY], np.ndarray)
class GymPixelWrapper(PixelWrapper): def __init__(self, env, render_h=84, render_w=84): super().__init__(env, render_h, render_w) self._wrapped_env = None self._obs = None def _get_obs(self): img = cv2.resize(self._obs['pixels'], (self.render_h, self.render_w), interpolation=cv2.INTER_AREA) img *= 255 return img.astype(np.uint8) def reset(self): gt_obs = self._env.reset() if self._wrapped_env is None: self._wrapped_env = PixelObservationWrapper(self._env, pixels_only=True) self._obs = self._wrapped_env.observation(gt_obs) return self._get_obs() def step(self, action): self._obs, reward, done, info = self._wrapped_env.step(action) return self._get_obs(), reward, done, info def seed(self, seed): self._env.seed(seed)
def reset(self): gt_obs = self._env.reset() if self._wrapped_env is None: self._wrapped_env = PixelObservationWrapper(self._env, pixels_only=True) self._obs = self._wrapped_env.observation(gt_obs) return self._get_obs()
def main(): # wandb.init(project="example_rl_algos") env = gym.make("Hopper-v3") env = PixelObservationWrapper(env, pixels_only=False) env.reset() pixels = [] while True: (state, reward, done, info) = env.step(env.action_space.sample()) pixels.append(state["pixels"].transpose(2, 0, 1)) if done: # wandb.log({"video": wandb.Video(np.array(pixels), fps=60)}) break
def make_visual(env, shape): """ Wrap env to return pixel observations """ env = PixelObservationWrapper(env, pixels_only=False, pixel_keys=("pixels", )) env = ObservationDictToInfo(env, "pixels") env = GrayScaleObservation(env) env = ResizeObservation(env, shape) return env
def test_dict_observation(self, pixels_only): pixel_key = "rgb" env = FakeDictObservationEnvironment() # Make sure we are testing the right environment for the test. observation_space = env.observation_space assert isinstance(observation_space, spaces.Dict) width, height = (320, 240) # The wrapper should only add one observation. wrapped_env = PixelObservationWrapper( env, pixel_keys=(pixel_key, ), pixels_only=pixels_only, render_kwargs={pixel_key: { "width": width, "height": height }}, ) assert isinstance(wrapped_env.observation_space, spaces.Dict) if pixels_only: assert len(wrapped_env.observation_space.spaces) == 1 assert list( wrapped_env.observation_space.spaces.keys()) == [pixel_key] else: assert (len(wrapped_env.observation_space.spaces) == len( observation_space.spaces) + 1) expected_keys = list(observation_space.spaces.keys()) + [pixel_key] assert list( wrapped_env.observation_space.spaces.keys()) == expected_keys # Check that the added space item is consistent with the added observation. observation = wrapped_env.reset() rgb_observation = observation[pixel_key] assert rgb_observation.shape == (height, width, 3) assert rgb_observation.dtype == np.uint8
def cartpole_pixel_env_creator(env_config: dict): env = gym.make("CartPole-v1").unwrapped env.reset() env = PixelObservationWrapper( env, pixels_only=True, render_kwargs={"mode": "rgb_array"} ) env = ResizePixelObservationWrapper(env, shape=env_config["shape"]) env = FlattenObservation(env) env = FrameStack(env, num_stack=env_config["num_stack"]) env.close() return env
def record_videos_from_actor( env: gym.Env, actor, num_videos=1, frame_skip=1, pixel=False, dir=None, logger: logging.Logger = logging.getLogger(__name__), ): if pixel: videos: List[np.ndarray] = [] env.reset() env = PixelObservationWrapper(env, pixels_only=False) for i in range(num_videos): video = [] state_and_pixels = env.reset() video.append(state_and_pixels["pixels"].transpose(2, 0, 1)) reward_sum = 0 step = 0 while True: action = actor(state_and_pixels["state"]) state_and_pixels, reward, done, info = env.step(action) if step % frame_skip == 0: video.append(state_and_pixels["pixels"].transpose(2, 0, 1)) reward_sum += reward step += 1 if done: break logger.info( f"Recording video {i+1}/{num_videos}, reward_sum={reward_sum}, step = {step}" ) videos.append(np.array(video)) return videos elif isinstance(dir, str): raise NotImplementedError() # TODO
def run(env_id, seed, noise_type, layer_norm, evaluation, use_vision, **kwargs): # Configure things. rank = MPI.COMM_WORLD.Get_rank() if rank != 0: logger.set_level(logger.DISABLED) # Create envs. env = gym.make(env_id) env = PixelObservationWrapper(env, pixels_only=False) eval_env = None # Parse noise_type action_noise = None param_noise = None nb_actions = env.action_space.shape[-1] for current_noise_type in noise_type.split(','): current_noise_type = current_noise_type.strip() if current_noise_type == 'none': pass elif 'adaptive-param' in current_noise_type: _, stddev = current_noise_type.split('_') param_noise = AdaptiveParamNoiseSpec(initial_stddev=float(stddev), desired_action_stddev=float(stddev)) elif 'normal' in current_noise_type: _, stddev = current_noise_type.split('_') action_noise = NormalActionNoise(mu=np.zeros(nb_actions), sigma=float(stddev) * np.ones(nb_actions)) elif 'ou' in current_noise_type: _, stddev = current_noise_type.split('_') action_noise = OrnsteinUhlenbeckActionNoise(mu=np.zeros(nb_actions), sigma=float(stddev) * np.ones(nb_actions)) else: raise RuntimeError('unknown noise type "{}"'.format(current_noise_type)) # Configure components. # TODO: Change back to 1e6 observation_shape = (100, 100, 3) if use_vision else env.observation_space["observation"].shape memory = Memory(limit=int(1e2), state_shape=env.observation_space["observation"].shape, action_shape=env.action_space.shape, observation_shape=observation_shape, goal_shape=env.observation_space["desired_goal"].shape, goalobs_shape=env.observation_space["desired_goal"].shape) critic = Critic(layer_norm=layer_norm) actor = Actor(nb_actions, layer_norm=layer_norm, use_vision=use_vision) # Seed everything to make things reproducible. seed = seed + 1000000 * rank logger.info('rank {}: seed={}, logdir={}'.format(rank, seed, logger.get_dir())) tf.reset_default_graph() set_global_seeds(seed) env.seed(seed) if eval_env is not None: eval_env.seed(seed) # Disable logging for rank != 0 to avoid noise. if rank == 0: start_time = time.time() kwargs.pop('state_shape') training.train(env=env, eval_env=eval_env, param_noise=param_noise, action_noise=action_noise, actor=actor, critic=critic, memory=memory, use_vision=use_vision, **kwargs) env.close() if eval_env is not None: eval_env.close() if rank == 0: logger.info('total runtime: {}s'.format(time.time() - start_time))
# Initilize Weights-and-Biases project if wandb_project: wandb.init(project=project_name) # Log hyperparameters in WandB project wandb.config.update(args) wandb.config.control_cost = DEFAULT_CONTROL_COST wandb.config.collision_detect = DEFAULT_COLLISION_DETECTION wandb.config.contact_cost = DEFAULT_CONTACT_COST wandb.config.dead_cost = DEFAULT_DEAD_COST wandb.config.healthy_reward = DEFAULT_HEALTHY_REWARD env = AntPixelWrapper( PixelObservationWrapper(gym.make(env_name).unwrapped, pixels_only=False, render_kwargs=render_kwargs.copy()) ) agent = create_third_level_agent(concept_path, args.load_concept_id, args.n_concepts, noisy=noisy, n_heads=n_heads, init_log_alpha=args.init_log_alpha, latent_dim=args.vision_latent_dim, parallel=args.parallel_q_nets, lr=args.lr, lr_alpha=args.lr_alpha, lr_actor=args.lr_actor, min_entropy_factor=args.entropy_factor, lr_c=args.lr_c, lr_Alpha=args.lr_c_Alpha, entropy_update_rate=args.entropy_update_rate, init_Epsilon=args.init_epsilon_MC, delta_Epsilon=args.delta_epsilon_MC) if args.load_id is not None: if args.load_best: agent.load(MODEL_PATH + env_name + '/best_', args.load_id) else: agent.load(MODEL_PATH + env_name + '/last_', args.load_id)
import gym import pytest from gym.wrappers import AtariPreprocessing, ClipAction, FilterObservation from gym.wrappers.pixel_observation import PixelObservationWrapper from sequoia.conftest import param_requires_atari_py from .pixel_observation import PixelObservationWrapper from .utils import has_wrapper @pytest.mark.parametrize( "env,wrapper_type,result", [ (lambda: PixelObservationWrapper(gym.make("CartPole-v0")), ClipAction, False), (lambda: PixelObservationWrapper(gym.make("CartPole-v0")), PixelObservationWrapper, True), (lambda: PixelObservationWrapper(gym.make("CartPole-v0")), PixelObservationWrapper, True), # param_requires_atari_py(AtariPreprocessing(gym.make("Breakout-v0")), ClipAction, True), ]) def test_has_wrapper(env, wrapper_type, result): assert has_wrapper(env(), wrapper_type) == result
def make_env(args, dream_env: bool = False, seed: Optional[int] = None, keep_image: bool = False, wrap_rnn: bool = True, load_model: bool = True): # Prepares an environment that matches the expected format: # - The environment returns a 64x64 image in observation["image"] # and camera data (x, y, z, pitch, yaw) in observation["camera"] # - If wrapped in the RNN, observation["features"] returns the RNN output to be used for the controller # - A dream environment simulates the actual environment using the RNN. It never returns an image # (because the actual environment doesn't get run) and only returns the features # - A wrapped environment always returns the features, and can return the original image when keep_image is True full_episode = args.full_episode # Initialize VAE and MDNRNN networks if dream_env or wrap_rnn: features_mode = FeatureMode.MODE_ZCH if args.state_space == 2 else FeatureMode.MODE_ZH if args.use_gqn: encoder = GenerativeQueryNetwork(args.gqn_x_dim, args.gqn_r_dim, args.gqn_h_dim, args.gqn_z_dim, args.gqn_l, name="gqn") encoder_path = get_path(args, "tf_gqn") else: encoder = CVAE(args) encoder_path = get_path(args, "tf_vae") rnn = MDNRNN(args) rnn_path = get_path(args, "tf_rnn") # TODO: Is this still needed? Do we ever NOT load the model? if load_model: encoder.load_weights(str(encoder_path)) rnn.load_weights(str(rnn_path)) if dream_env: assert keep_image is False, "Dream environment doesn't support image observations" import json initial_z_dir = get_path(args, "tf_initial_z") if args.use_gqn: initial_z_path = initial_z_dir / "initial_z_gqn.json" with open(str(initial_z_path), 'r') as f: initial_z = json.load(f) else: initial_z_path = initial_z_dir / "initial_z_vae.json" with open(str(initial_z_path), 'r') as f: [initial_mu, initial_logvar] = json.load(f) # This could probably be done more efficiently initial_z = np.array([list(elem) for elem in zip(initial_mu, initial_logvar)], dtype=np.float) # Create dream environment # noinspection PyUnboundLocalVariable env = DreamEnv(initial_z, args.z_size, rnn, features_mode) else: # Create real environment kwargs = {} if args.env_name.startswith("VizdoomTakeCover"): kwargs["position"] = True # Include position data as observation for Vizdoom environment print("Making environment {}...".format(args.env_name)) env = gym.make(args.env_name, **kwargs) print("Raw environment:", env) from gym.envs.box2d import CarRacing from vizdoomgym.envs import VizdoomTakeCover from gym_minigrid.minigrid import MiniGridEnv if isinstance(env.unwrapped, CarRacing): # Accept actions in the required format env = CarRacingActionWrapper(env) # Transform CarRacing observations into expected format and add camera data env = CarRacingObservationWrapper(env) # Cut off "status bar" at the bottom of CarRacing observation (copied from original paper) env = ClipPixelObservationWrapper(env, (slice(84),)) elif isinstance(env.unwrapped, VizdoomTakeCover): # Accept actions in the required format env = VizdoomTakeCoverActionWrapper(env) # Transform Vizdoom observations into expected format env = VizdoomObservationWrapper(env) # Cut off "status bar" at the bottom of the screen (copied from original paper) env = ClipPixelObservationWrapper(env, (slice(400),)) elif isinstance(env.unwrapped, MiniGridEnv): from gym_minigrid.wrappers import RGBImgPartialObsWrapper # Accept actions in the required format env = MiniGridActionWrapper(env) # Get RGB image observations from the agent's viewpoint # (7x7 grid of tiles, with tile size 9 this results in a 63x63 image) env = RGBImgPartialObsWrapper(env, tile_size=9) # Add camera data to the observation env = MiniGridObservationWrapper(env) # Pad image to 64x64 to match the requirements (in effect just adding one row at the right and bottom edge # with repeated values from the edge) env = PadPixelObservationWrapper(env, target_size=64) else: env = PixelObservationWrapper(env, pixel_keys=("image",)) if env.observation_space["image"].shape[:2] != (64, 64): # Resize image to 64x64 env = ResizePixelObservationWrapper(env, size=(64, 64)) # Wrap in RNN to add features to observation if wrap_rnn: # noinspection PyUnboundLocalVariable env = MDNRNNWrapper(env, encoder, rnn, keep_image=keep_image, features_mode=features_mode) # TODO: Is this needed? It was only ever implemented for CarRacing and didn't work # Force done=False if full_episode is True if full_episode: env = NoEarlyStopWrapper(env) # Set seed if given if seed is not None: env.seed(seed) print("Wrapped environment:", env) return env
if __name__ == '__main__': if args.virtual_display: import pyvirtualdisplay _display = pyvirtualdisplay.Display(visible=False, size=(1400, 900)) _ = _display.start() # paths Path(args.checkpoint_dir).mkdir(parents=True, exist_ok=True) Path(args.log_dir).mkdir(parents=True, exist_ok=True) args.checkpoint_dir += f'/{args.env_name}_td3.pth' # env & agent env = gym.make(args.env_name) if args.img_input: env.reset() env = PixelObservationWrapper(env) max_action = float(env.action_space.high[0]) agent = Agent(env, args.alpha, args.beta, args.hidden_dims, args.tau, args.batch_size, args.gamma, args.d, args.warmup, args.max_size, args.c * max_action, args.sigma * max_action, args.one_device, args.log_dir, args.checkpoint_dir, args.img_input, args.in_channels, args.order, args.depth, args.multiplier, args.action_embed_dim, args.hidden_dim, args.crop_dim, args.img_feature_dim) best_score = env.reward_range[0] score_history = deque([], maxlen=args.window_size) episodes = tqdm(range(args.n_episodes)) if args.continue_train: agent.load_state_dicts()