示例#1
0
    def test_single_array_observation(self, pixels_only):
        pixel_key = "depth"

        env = FakeArrayObservationEnvironment()
        observation_space = env.observation_space
        assert isinstance(observation_space, spaces.Box)

        wrapped_env = PixelObservationWrapper(
            env, pixel_keys=(pixel_key,), pixels_only=pixels_only
        )
        wrapped_env.observation_space = wrapped_env.observation_space
        assert isinstance(wrapped_env.observation_space, spaces.Dict)

        if pixels_only:
            assert len(wrapped_env.observation_space.spaces) == 1
            assert list(wrapped_env.observation_space.spaces.keys()) == [pixel_key]
        else:
            assert len(wrapped_env.observation_space.spaces) == 2
            assert list(wrapped_env.observation_space.spaces.keys()) == [
                STATE_KEY,
                pixel_key,
            ]

        observation = wrapped_env.reset()
        depth_observation = observation[pixel_key]

        assert depth_observation.shape == (32, 32, 3)
        assert depth_observation.dtype == np.uint8

        if not pixels_only:
            assert isinstance(observation[STATE_KEY], np.ndarray)
示例#2
0
class GymPixelWrapper(PixelWrapper):
    def __init__(self, env, render_h=84, render_w=84):
        super().__init__(env, render_h, render_w)
        self._wrapped_env = None
        self._obs = None

    def _get_obs(self):
        img = cv2.resize(self._obs['pixels'], (self.render_h, self.render_w),
                         interpolation=cv2.INTER_AREA)
        img *= 255
        return img.astype(np.uint8)

    def reset(self):
        gt_obs = self._env.reset()
        if self._wrapped_env is None:
            self._wrapped_env = PixelObservationWrapper(self._env,
                                                        pixels_only=True)

        self._obs = self._wrapped_env.observation(gt_obs)

        return self._get_obs()

    def step(self, action):
        self._obs, reward, done, info = self._wrapped_env.step(action)
        return self._get_obs(), reward, done, info

    def seed(self, seed):
        self._env.seed(seed)
示例#3
0
    def reset(self):
        gt_obs = self._env.reset()
        if self._wrapped_env is None:
            self._wrapped_env = PixelObservationWrapper(self._env,
                                                        pixels_only=True)

        self._obs = self._wrapped_env.observation(gt_obs)

        return self._get_obs()
示例#4
0
def main():
    # wandb.init(project="example_rl_algos")
    env = gym.make("Hopper-v3")
    env = PixelObservationWrapper(env, pixels_only=False)
    env.reset()
    pixels = []
    while True:
        (state, reward, done, info) = env.step(env.action_space.sample())
        pixels.append(state["pixels"].transpose(2, 0, 1))
        if done:
            # wandb.log({"video": wandb.Video(np.array(pixels), fps=60)})
            break
示例#5
0
def make_visual(env, shape):
    """ Wrap env to return pixel observations """
    env = PixelObservationWrapper(env,
                                  pixels_only=False,
                                  pixel_keys=("pixels", ))
    env = ObservationDictToInfo(env, "pixels")
    env = GrayScaleObservation(env)
    env = ResizeObservation(env, shape)
    return env
示例#6
0
    def test_dict_observation(self, pixels_only):
        pixel_key = "rgb"

        env = FakeDictObservationEnvironment()

        # Make sure we are testing the right environment for the test.
        observation_space = env.observation_space
        assert isinstance(observation_space, spaces.Dict)

        width, height = (320, 240)

        # The wrapper should only add one observation.
        wrapped_env = PixelObservationWrapper(
            env,
            pixel_keys=(pixel_key, ),
            pixels_only=pixels_only,
            render_kwargs={pixel_key: {
                "width": width,
                "height": height
            }},
        )

        assert isinstance(wrapped_env.observation_space, spaces.Dict)

        if pixels_only:
            assert len(wrapped_env.observation_space.spaces) == 1
            assert list(
                wrapped_env.observation_space.spaces.keys()) == [pixel_key]
        else:
            assert (len(wrapped_env.observation_space.spaces) == len(
                observation_space.spaces) + 1)
            expected_keys = list(observation_space.spaces.keys()) + [pixel_key]
            assert list(
                wrapped_env.observation_space.spaces.keys()) == expected_keys

        # Check that the added space item is consistent with the added observation.
        observation = wrapped_env.reset()
        rgb_observation = observation[pixel_key]

        assert rgb_observation.shape == (height, width, 3)
        assert rgb_observation.dtype == np.uint8
示例#7
0
def cartpole_pixel_env_creator(env_config: dict):
    env = gym.make("CartPole-v1").unwrapped
    env.reset()
    env = PixelObservationWrapper(
        env, pixels_only=True, render_kwargs={"mode": "rgb_array"}
    )
    env = ResizePixelObservationWrapper(env, shape=env_config["shape"])
    env = FlattenObservation(env)
    env = FrameStack(env, num_stack=env_config["num_stack"])
    env.close()
    return env
示例#8
0
def record_videos_from_actor(
    env: gym.Env,
    actor,
    num_videos=1,
    frame_skip=1,
    pixel=False,
    dir=None,
    logger: logging.Logger = logging.getLogger(__name__),
):
    if pixel:
        videos: List[np.ndarray] = []
        env.reset()
        env = PixelObservationWrapper(env, pixels_only=False)

        for i in range(num_videos):
            video = []
            state_and_pixels = env.reset()
            video.append(state_and_pixels["pixels"].transpose(2, 0, 1))
            reward_sum = 0
            step = 0
            while True:
                action = actor(state_and_pixels["state"])
                state_and_pixels, reward, done, info = env.step(action)
                if step % frame_skip == 0:
                    video.append(state_and_pixels["pixels"].transpose(2, 0, 1))
                reward_sum += reward
                step += 1
                if done:
                    break

            logger.info(
                f"Recording video {i+1}/{num_videos}, reward_sum={reward_sum}, step = {step}"
            )
            videos.append(np.array(video))

        return videos
    elif isinstance(dir, str):
        raise NotImplementedError()  # TODO
def run(env_id, seed, noise_type, layer_norm, evaluation, use_vision, **kwargs):
    # Configure things.
    rank = MPI.COMM_WORLD.Get_rank()
    if rank != 0:
        logger.set_level(logger.DISABLED)

    # Create envs.
    env = gym.make(env_id)
    env = PixelObservationWrapper(env, pixels_only=False)
    eval_env = None

    # Parse noise_type
    action_noise = None
    param_noise = None
    nb_actions = env.action_space.shape[-1]
    for current_noise_type in noise_type.split(','):
        current_noise_type = current_noise_type.strip()
        if current_noise_type == 'none':
            pass
        elif 'adaptive-param' in current_noise_type:
            _, stddev = current_noise_type.split('_')
            param_noise = AdaptiveParamNoiseSpec(initial_stddev=float(stddev), desired_action_stddev=float(stddev))
        elif 'normal' in current_noise_type:
            _, stddev = current_noise_type.split('_')
            action_noise = NormalActionNoise(mu=np.zeros(nb_actions), sigma=float(stddev) * np.ones(nb_actions))
        elif 'ou' in current_noise_type:
            _, stddev = current_noise_type.split('_')
            action_noise = OrnsteinUhlenbeckActionNoise(mu=np.zeros(nb_actions), sigma=float(stddev) * np.ones(nb_actions))
        else:
            raise RuntimeError('unknown noise type "{}"'.format(current_noise_type))

    # Configure components.
    # TODO: Change back to 1e6

    observation_shape = (100, 100, 3) if use_vision else env.observation_space["observation"].shape
    
    memory = Memory(limit=int(1e2),
                    state_shape=env.observation_space["observation"].shape,
                    action_shape=env.action_space.shape,
                    observation_shape=observation_shape,
                    goal_shape=env.observation_space["desired_goal"].shape,
                    goalobs_shape=env.observation_space["desired_goal"].shape)
    critic = Critic(layer_norm=layer_norm)
    actor = Actor(nb_actions, layer_norm=layer_norm, use_vision=use_vision)

    # Seed everything to make things reproducible.
    seed = seed + 1000000 * rank
    logger.info('rank {}: seed={}, logdir={}'.format(rank, seed, logger.get_dir()))
    tf.reset_default_graph()
    set_global_seeds(seed)
    env.seed(seed)
    if eval_env is not None:
        eval_env.seed(seed)

    # Disable logging for rank != 0 to avoid noise.
    if rank == 0:
        start_time = time.time()


    kwargs.pop('state_shape')
    training.train(env=env, eval_env=eval_env, param_noise=param_noise,
        action_noise=action_noise, actor=actor, critic=critic, memory=memory, use_vision=use_vision, **kwargs)

    env.close()
    if eval_env is not None:
        eval_env.close()
    if rank == 0:
        logger.info('total runtime: {}s'.format(time.time() - start_time))
示例#10
0
    # Initilize Weights-and-Biases project
    if wandb_project:
        wandb.init(project=project_name)

        # Log hyperparameters in WandB project
        wandb.config.update(args)
        wandb.config.control_cost = DEFAULT_CONTROL_COST
        wandb.config.collision_detect = DEFAULT_COLLISION_DETECTION
        wandb.config.contact_cost = DEFAULT_CONTACT_COST
        wandb.config.dead_cost = DEFAULT_DEAD_COST
        wandb.config.healthy_reward = DEFAULT_HEALTHY_REWARD 


    env = AntPixelWrapper( 
            PixelObservationWrapper(gym.make(env_name).unwrapped,
                                    pixels_only=False,
                                    render_kwargs=render_kwargs.copy())
    )
    
    
    agent = create_third_level_agent(concept_path, args.load_concept_id, args.n_concepts, noisy=noisy, 
        n_heads=n_heads, init_log_alpha=args.init_log_alpha, latent_dim=args.vision_latent_dim, 
        parallel=args.parallel_q_nets, lr=args.lr, lr_alpha=args.lr_alpha, lr_actor=args.lr_actor, min_entropy_factor=args.entropy_factor, 
        lr_c=args.lr_c, lr_Alpha=args.lr_c_Alpha, entropy_update_rate=args.entropy_update_rate, init_Epsilon=args.init_epsilon_MC,
        delta_Epsilon=args.delta_epsilon_MC)
    
    if args.load_id is not None:
        if args.load_best:
            agent.load(MODEL_PATH + env_name + '/best_', args.load_id)
        else:
            agent.load(MODEL_PATH + env_name + '/last_', args.load_id)
示例#11
0
import gym
import pytest
from gym.wrappers import AtariPreprocessing, ClipAction, FilterObservation
from gym.wrappers.pixel_observation import PixelObservationWrapper
from sequoia.conftest import param_requires_atari_py

from .pixel_observation import PixelObservationWrapper
from .utils import has_wrapper


@pytest.mark.parametrize(
    "env,wrapper_type,result",
    [
        (lambda: PixelObservationWrapper(gym.make("CartPole-v0")), ClipAction,
         False),
        (lambda: PixelObservationWrapper(gym.make("CartPole-v0")),
         PixelObservationWrapper, True),
        (lambda: PixelObservationWrapper(gym.make("CartPole-v0")),
         PixelObservationWrapper, True),
        # param_requires_atari_py(AtariPreprocessing(gym.make("Breakout-v0")), ClipAction, True),
    ])
def test_has_wrapper(env, wrapper_type, result):
    assert has_wrapper(env(), wrapper_type) == result
示例#12
0
def make_env(args, dream_env: bool = False, seed: Optional[int] = None,
             keep_image: bool = False, wrap_rnn: bool = True, load_model: bool = True):
    # Prepares an environment that matches the expected format:
    # - The environment returns a 64x64 image in observation["image"]
    #   and camera data (x, y, z, pitch, yaw) in observation["camera"]
    # - If wrapped in the RNN, observation["features"] returns the RNN output to be used for the controller
    # - A dream environment simulates the actual environment using the RNN. It never returns an image
    #   (because the actual environment doesn't get run) and only returns the features
    # - A wrapped environment always returns the features, and can return the original image when keep_image is True

    full_episode = args.full_episode

    # Initialize VAE and MDNRNN networks
    if dream_env or wrap_rnn:
        features_mode = FeatureMode.MODE_ZCH if args.state_space == 2 else FeatureMode.MODE_ZH

        if args.use_gqn:
            encoder = GenerativeQueryNetwork(args.gqn_x_dim, args.gqn_r_dim,
                                             args.gqn_h_dim, args.gqn_z_dim, args.gqn_l, name="gqn")
            encoder_path = get_path(args, "tf_gqn")
        else:
            encoder = CVAE(args)
            encoder_path = get_path(args, "tf_vae")
        rnn = MDNRNN(args)
        rnn_path = get_path(args, "tf_rnn")

        # TODO: Is this still needed? Do we ever NOT load the model?
        if load_model:
            encoder.load_weights(str(encoder_path))
            rnn.load_weights(str(rnn_path))

    if dream_env:
        assert keep_image is False, "Dream environment doesn't support image observations"

        import json
        initial_z_dir = get_path(args, "tf_initial_z")
        if args.use_gqn:
            initial_z_path = initial_z_dir / "initial_z_gqn.json"
            with open(str(initial_z_path), 'r') as f:
                initial_z = json.load(f)
        else:
            initial_z_path = initial_z_dir / "initial_z_vae.json"
            with open(str(initial_z_path), 'r') as f:
                [initial_mu, initial_logvar] = json.load(f)
            # This could probably be done more efficiently
            initial_z = np.array([list(elem) for elem in zip(initial_mu, initial_logvar)], dtype=np.float)

        # Create dream environment
        # noinspection PyUnboundLocalVariable
        env = DreamEnv(initial_z, args.z_size, rnn, features_mode)

    else:
        # Create real environment
        kwargs = {}
        if args.env_name.startswith("VizdoomTakeCover"):
            kwargs["position"] = True  # Include position data as observation for Vizdoom environment

        print("Making environment {}...".format(args.env_name))
        env = gym.make(args.env_name, **kwargs)
        print("Raw environment:", env)

        from gym.envs.box2d import CarRacing
        from vizdoomgym.envs import VizdoomTakeCover
        from gym_minigrid.minigrid import MiniGridEnv
        if isinstance(env.unwrapped, CarRacing):
            # Accept actions in the required format
            env = CarRacingActionWrapper(env)
            # Transform CarRacing observations into expected format and add camera data
            env = CarRacingObservationWrapper(env)
            # Cut off "status bar" at the bottom of CarRacing observation (copied from original paper)
            env = ClipPixelObservationWrapper(env, (slice(84),))
        elif isinstance(env.unwrapped, VizdoomTakeCover):
            # Accept actions in the required format
            env = VizdoomTakeCoverActionWrapper(env)
            # Transform Vizdoom observations into expected format
            env = VizdoomObservationWrapper(env)
            # Cut off "status bar" at the bottom of the screen (copied from original paper)
            env = ClipPixelObservationWrapper(env, (slice(400),))
        elif isinstance(env.unwrapped, MiniGridEnv):
            from gym_minigrid.wrappers import RGBImgPartialObsWrapper
            # Accept actions in the required format
            env = MiniGridActionWrapper(env)
            # Get RGB image observations from the agent's viewpoint
            # (7x7 grid of tiles, with tile size 9 this results in a 63x63 image)
            env = RGBImgPartialObsWrapper(env, tile_size=9)
            # Add camera data to the observation
            env = MiniGridObservationWrapper(env)
            # Pad image to 64x64 to match the requirements (in effect just adding one row at the right and bottom edge
            # with repeated values from the edge)
            env = PadPixelObservationWrapper(env, target_size=64)
        else:
            env = PixelObservationWrapper(env, pixel_keys=("image",))

        if env.observation_space["image"].shape[:2] != (64, 64):
            # Resize image to 64x64
            env = ResizePixelObservationWrapper(env, size=(64, 64))

        # Wrap in RNN to add features to observation
        if wrap_rnn:
            # noinspection PyUnboundLocalVariable
            env = MDNRNNWrapper(env, encoder, rnn, keep_image=keep_image, features_mode=features_mode)

    # TODO: Is this needed? It was only ever implemented for CarRacing and didn't work
    # Force done=False if full_episode is True
    if full_episode:
        env = NoEarlyStopWrapper(env)

    # Set seed if given
    if seed is not None:
        env.seed(seed)

    print("Wrapped environment:", env)
    return env
示例#13
0
if __name__ == '__main__':
    if args.virtual_display:
        import pyvirtualdisplay
        _display = pyvirtualdisplay.Display(visible=False, size=(1400, 900))
        _ = _display.start()

    # paths
    Path(args.checkpoint_dir).mkdir(parents=True, exist_ok=True)
    Path(args.log_dir).mkdir(parents=True, exist_ok=True)
    args.checkpoint_dir += f'/{args.env_name}_td3.pth'

    # env & agent
    env = gym.make(args.env_name)
    if args.img_input:
        env.reset()
        env = PixelObservationWrapper(env)
    max_action = float(env.action_space.high[0])
    agent = Agent(env, args.alpha, args.beta, args.hidden_dims, args.tau,
                  args.batch_size, args.gamma, args.d, args.warmup,
                  args.max_size, args.c * max_action, args.sigma * max_action,
                  args.one_device, args.log_dir, args.checkpoint_dir,
                  args.img_input, args.in_channels, args.order, args.depth,
                  args.multiplier, args.action_embed_dim, args.hidden_dim,
                  args.crop_dim, args.img_feature_dim)

    best_score = env.reward_range[0]
    score_history = deque([], maxlen=args.window_size)
    episodes = tqdm(range(args.n_episodes))

    if args.continue_train:
        agent.load_state_dicts()