Пример #1
0
    def get_env_spec(cls,
                     env_spec,
                     latent_dim,
                     module,
                     use_information_bottleneck=False):
        """Get environment specs of encoder with latent dimension.

        Args:
            env_spec (garage.envs.EnvSpec): Environment specs.
            latent_dim (int): Latent dimension.
            module (str): Module to get environment specs for.

        Returns:
            garage.envs.InOutSpec: Module environment specs with latent
                dimension.

        """
        obs_dim = int(np.prod(env_spec.observation_space.shape))
        action_dim = int(np.prod(env_spec.action_space.shape))
        if module == 'encoder':
            in_dim = obs_dim + action_dim + 1
            out_dim = latent_dim
            if use_information_bottleneck:
                out_dim = out_dim * 2

        elif module == 'vf':
            in_dim = obs_dim
            out_dim = latent_dim
        in_space = akro.Box(low=-1, high=1, shape=(in_dim, ), dtype=np.float32)
        out_space = akro.Box(low=-1,
                             high=1,
                             shape=(out_dim, ),
                             dtype=np.float32)
        if module == 'encoder':
            spec = InOutSpec(in_space, out_space)
        elif module == 'vf':
            spec = EnvSpec(in_space, out_space)

        return spec
Пример #2
0
    def augment_env_spec(cls, env_spec, latent_dim):
        """Augment environment by a size of latent dimension.

        Args:
            env_spec (garage.envs.EnvSpec): Environment specs to be augmented.
            latent_dim (int): Latent dimension.

        Returns:
            garage.envs.EnvSpec: Augmented environment specs.

        """
        obs_dim = int(np.prod(env_spec.observation_space.shape))
        action_dim = int(np.prod(env_spec.action_space.shape))
        aug_obs = akro.Box(low=-1,
                           high=1,
                           shape=(obs_dim + latent_dim, ),
                           dtype=np.float32)
        aug_act = akro.Box(low=-1,
                           high=1,
                           shape=(action_dim, ),
                           dtype=np.float32)
        return EnvSpec(aug_obs, aug_act)
Пример #3
0
    def setup_method(self):
        """Setup for all test methods."""
        self.latent_dim = 5
        self.env_spec = GymEnv(DummyBoxEnv())
        latent_space = akro.Box(low=-1,
                                high=1,
                                shape=(self.latent_dim, ),
                                dtype=np.float32)

        # add latent space to observation space to create a new space
        augmented_obs_space = akro.Tuple(
            (self.env_spec.observation_space, latent_space))
        augmented_env_spec = EnvSpec(augmented_obs_space,
                                     self.env_spec.action_space)
        self.obs_dim = self.env_spec.observation_space.flat_dim
        self.action_dim = self.env_spec.action_space.flat_dim
        reward_dim = 1
        self.encoder_input_dim = self.obs_dim + self.action_dim + reward_dim
        encoder_output_dim = self.latent_dim * 2
        encoder_hidden_sizes = (3, 2, encoder_output_dim)

        context_encoder = MLPEncoder(input_dim=self.encoder_input_dim,
                                     output_dim=encoder_output_dim,
                                     hidden_nonlinearity=None,
                                     hidden_sizes=encoder_hidden_sizes,
                                     hidden_w_init=nn.init.ones_,
                                     output_w_init=nn.init.ones_)

        context_policy = TanhGaussianMLPPolicy(env_spec=augmented_env_spec,
                                               hidden_sizes=(3, 5, 7),
                                               hidden_nonlinearity=F.relu,
                                               output_nonlinearity=None)

        self.module = ContextConditionedPolicy(latent_dim=self.latent_dim,
                                               context_encoder=context_encoder,
                                               policy=context_policy,
                                               use_information_bottleneck=True,
                                               use_next_obs=False)
Пример #4
0
def batch_data():
    # spaces
    obs_space = gym.spaces.Box(low=1,
                               high=np.inf,
                               shape=(4, 3, 2),
                               dtype=np.float32)
    act_space = gym.spaces.MultiDiscrete([2, 5])
    env_spec = EnvSpec(obs_space, act_space)

    # generate data
    batch_size = 2
    obs = np.stack([obs_space.low] * batch_size)
    next_obs = np.stack([obs_space.low] * batch_size)
    act = np.stack([[1, 3]] * batch_size)
    rew = np.arange(batch_size)
    step_types = np.array([StepType.FIRST, StepType.TERMINAL], dtype=StepType)

    # env_infos
    env_infos = dict()
    env_infos['goal'] = np.stack([[1, 1]] * batch_size)
    env_infos['foo'] = np.arange(batch_size)

    # agent_infos
    agent_infos = dict()
    agent_infos['prev_action'] = act
    agent_infos['hidden'] = np.arange(batch_size)

    return {
        'env_spec': env_spec,
        'observations': obs,
        'next_observations': next_obs,
        'actions': act,
        'rewards': rew,
        'env_infos': env_infos,
        'agent_infos': agent_infos,
        'step_types': step_types
    }
Пример #5
0
def sample_data():
    # spaces
    obs_space = gym.spaces.Box(low=1,
                               high=10,
                               shape=(4, 3, 2),
                               dtype=np.float32)
    act_space = gym.spaces.MultiDiscrete([2, 5])
    env_spec = EnvSpec(obs_space, act_space)

    # generate data
    obs = obs_space.sample()
    next_obs = obs_space.sample()
    act = act_space.sample()
    rew = 10.0
    step_type = StepType.FIRST

    # env_infos
    env_infos = dict()
    env_infos['goal'] = np.array([[1, 1]])
    env_infos['TimeLimit.truncated'] = (step_type == StepType.TIMEOUT)

    # agent_infos
    agent_infos = dict()
    agent_infos['prev_action'] = act

    return {
        'env_spec': env_spec,
        'observation': obs,
        'next_observation': next_obs,
        'action': act,
        'reward': rew,
        'env_info': env_infos,
        'agent_info': agent_infos,
        'step_type': step_type,
        'episode_info': dict(),
    }
Пример #6
0
    def __init__(self, env, name=None):
        """Create a DMControlEnv.

        Args:
            env (dm_control.suite.Task): The wrapped dm_control environment.
            name (str): Name of the environment.

        """
        self._env = env
        self._name = name or type(env.task).__name__
        self._viewer = None
        self._step_cnt = None

        self._max_episode_length = self._env._step_limit

        # action space
        action_spec = self._env.action_spec()
        if (len(action_spec.shape) == 1) and (-np.inf in action_spec.minimum or
                                              np.inf in action_spec.maximum):
            self._action_space = akro.Discrete(np.prod(action_spec.shape))
        else:
            self._action_space = akro.Box(low=action_spec.minimum,
                                          high=action_spec.maximum,
                                          dtype=np.float32)

        # observation_space
        flat_dim = _flat_shape(self._env.observation_spec())
        self._observation_space = akro.Box(low=-np.inf,
                                           high=np.inf,
                                           shape=[flat_dim],
                                           dtype=np.float32)

        # spec
        self._spec = EnvSpec(action_space=self.action_space,
                             observation_space=self.observation_space,
                             max_episode_length=self._max_episode_length)
Пример #7
0
 def spec(self):
     """EnvSpec: the environment specification."""
     return EnvSpec(observation_space=self.observation_space,
                    action_space=self.action_space)
    data = [np.sin(matrices[i]) for i in range(100)]
    obs = [{'observations': [x], 'returns': [np.mean(x)]} for x in data]

    observations = np.concatenate([p['observations'] for p in obs])
    returns = np.concatenate([p['returns'] for p in obs])
    returns = returns.reshape((-1, 1))

    paths = {'observations': [np.sin(matrices[i]) for i in range(100, 110)]}

    expected = [[np.mean(x)] for x in paths['observations']]

    return (obs, observations, returns), (paths, expected)


test_env_spec = EnvSpec(observation_space=akro.Box(low=-1,
                                                   high=1,
                                                   shape=(10, 10, 3)),
                        action_space=None)


class TestGaussianCNNBaseline(TfGraphTestCase):
    @pytest.mark.large
    def test_fit_normalized(self):
        gcr = GaussianCNNBaseline(env_spec=test_env_spec,
                                  filters=((3, (3, 3)), (6, (3, 3))),
                                  strides=(1, 1),
                                  padding='SAME',
                                  hidden_sizes=(32, ),
                                  adaptive_std=False,
                                  use_trust_region=True)

        train_data, test_data = get_train_test_data()
Пример #9
0
 def spec(self):
     """EnvSpec: The environment specification."""
     return EnvSpec(self.observation_space, self._env.spec.action_space)
Пример #10
0
def test_methods():
    """Test PEARLWorker methods."""
    env_spec = GymEnv(DummyBoxEnv())
    latent_dim = 5
    latent_space = akro.Box(low=-1,
                            high=1,
                            shape=(latent_dim, ),
                            dtype=np.float32)

    # add latent space to observation space to create a new space
    augmented_obs_space = akro.Tuple(
        (env_spec.observation_space, latent_space))
    augmented_env_spec = EnvSpec(augmented_obs_space, env_spec.action_space)

    obs_dim = int(np.prod(env_spec.observation_space.shape))
    action_dim = int(np.prod(env_spec.action_space.shape))
    reward_dim = 1
    encoder_input_dim = obs_dim + action_dim + reward_dim
    encoder_output_dim = latent_dim * 2
    encoder_hidden_sizes = (3, 2, encoder_output_dim)

    context_encoder = MLPEncoder(input_dim=encoder_input_dim,
                                 output_dim=encoder_output_dim,
                                 hidden_nonlinearity=None,
                                 hidden_sizes=encoder_hidden_sizes,
                                 hidden_w_init=nn.init.ones_,
                                 output_w_init=nn.init.ones_)

    policy = TanhGaussianMLPPolicy(env_spec=augmented_env_spec,
                                   hidden_sizes=(3, 5, 7),
                                   hidden_nonlinearity=F.relu,
                                   output_nonlinearity=None)

    context_policy = ContextConditionedPolicy(latent_dim=latent_dim,
                                              context_encoder=context_encoder,
                                              policy=policy,
                                              use_information_bottleneck=True,
                                              use_next_obs=False)

    max_episode_length = 20
    worker1 = PEARLWorker(seed=1,
                          max_episode_length=max_episode_length,
                          worker_number=1)
    worker1.update_agent(context_policy)
    worker1.update_env(env_spec)
    episodes = worker1.rollout()

    assert episodes.observations.shape == (max_episode_length, obs_dim)
    assert episodes.actions.shape == (max_episode_length, action_dim)
    assert episodes.rewards.shape == (max_episode_length, )

    worker2 = PEARLWorker(seed=1,
                          max_episode_length=max_episode_length,
                          worker_number=1,
                          deterministic=True,
                          accum_context=True)
    worker2.update_agent(context_policy)
    worker2.update_env(env_spec)
    episodes = worker2.rollout()

    assert context_policy.context.shape == (1, max_episode_length,
                                            encoder_input_dim)
    assert episodes.observations.shape == (max_episode_length, obs_dim)
    assert episodes.actions.shape == (max_episode_length, action_dim)
    assert episodes.rewards.shape == (max_episode_length, )