def test_is_pickleable(self, obs_dim, embedding_dim): env = GarageEnv(DummyBoxEnv(obs_dim=obs_dim, action_dim=embedding_dim)) embedding_spec = InOutSpec(input_space=env.spec.observation_space, output_space=env.spec.action_space) embedding = GaussianMLPEncoder(embedding_spec) env.reset() obs, _, _, _ = env.step(1) obs_dim = env.spec.observation_space.flat_dim with tf.compat.v1.variable_scope('GaussianMLPEncoder/GaussianMLPModel', reuse=True): bias = tf.compat.v1.get_variable( 'dist_params/mean_network/hidden_0/bias') # assign it to all one bias.load(tf.ones_like(bias).eval()) output1 = self.sess.run( [embedding.distribution.loc, embedding.distribution.stddev()], feed_dict={embedding.model.input: [[obs.flatten()]]}) p = pickle.dumps(embedding) with tf.compat.v1.Session(graph=tf.Graph()) as sess: embedding_pickled = pickle.loads(p) output2 = sess.run( [ embedding_pickled.distribution.loc, embedding_pickled.distribution.stddev() ], feed_dict={embedding_pickled.model.input: [[obs.flatten()]]}) assert np.array_equal(output1, output2)
def get_env_spec(cls, env_spec, latent_dim, module): """Get environment specs of encoder with latent dimension. Args: env_spec (garage.envs.EnvSpec): Environment specs. latent_dim (int): Latent dimension. module (str): Module to get environment specs for. Returns: garage.envs.InOutSpec: Module environment specs with latent dimension. """ obs_dim = int(np.prod(env_spec.observation_space.shape)) action_dim = int(np.prod(env_spec.action_space.shape)) if module == 'encoder': in_dim = obs_dim + action_dim + 1 out_dim = latent_dim * 2 elif module == 'vf': in_dim = obs_dim out_dim = latent_dim in_space = akro.Box(low=-1, high=1, shape=(in_dim, ), dtype=np.float32) out_space = akro.Box(low=-1, high=1, shape=(out_dim, ), dtype=np.float32) if module == 'encoder': spec = InOutSpec(in_space, out_space) elif module == 'vf': spec = EnvSpec(in_space, out_space) return spec
def test_is_pickleable(self, obs_dim, embedding_dim): env = TfEnv(DummyBoxEnv(obs_dim=obs_dim, action_dim=embedding_dim)) with mock.patch(('garage.tf.embeddings.' 'gaussian_mlp_encoder.GaussianMLPModel'), new=SimpleGaussianMLPModel): embedding_spec = InOutSpec(input_space=env.spec.observation_space, output_space=env.spec.action_space) embedding = GaussianMLPEncoder(embedding_spec) env.reset() obs, _, _, _ = env.step(1) obs_dim = env.spec.observation_space.flat_dim with tf.compat.v1.variable_scope('GaussianMLPEncoder/GaussianMLPModel', reuse=True): return_var = tf.compat.v1.get_variable('return_var') # assign it to all one return_var.load(tf.ones_like(return_var).eval()) output1 = self.sess.run( embedding.model.outputs[:-1], feed_dict={embedding.model.input: [obs.flatten()]}) p = pickle.dumps(embedding) with tf.compat.v1.Session(graph=tf.Graph()) as sess: embedding_pickled = pickle.loads(p) output2 = sess.run( embedding_pickled.model.outputs[:-1], feed_dict={embedding_pickled.model.input: [obs.flatten()]}) assert np.array_equal(output1, output2)
def get_infer_spec(cls, env_spec, latent_dim, inference_window_size): """Get the embedding spec of the inference. Every `inference_window_size` timesteps in the trajectory will be used as the inference network input. Args: env_spec (garage.envs.EnvSpec): Environment spec. latent_dim (int): Latent dimension. inference_window_size (int): Length of inference window. Returns: garage.InOutSpec: Inference spec. """ latent_space = cls._get_latent_space(latent_dim) obs_lb, obs_ub = env_spec.observation_space.bounds obs_lb_flat = env_spec.observation_space.flatten(obs_lb) obs_ub_flat = env_spec.observation_space.flatten(obs_ub) traj_lb = np.stack([obs_lb_flat] * inference_window_size) traj_ub = np.stack([obs_ub_flat] * inference_window_size) traj_space = akro.Box(traj_lb, traj_ub) return InOutSpec(traj_space, latent_space)
def get_env_spec(cls, env_spec, latent_dim, num_skills, module): obs_dim = int(np.prod(env_spec.observation_space.shape)) # print("obs_dim is") # print(obs_dim) action_dim = int(np.prod(env_spec.action_space.shape)) if module == 'encoder': in_dim = obs_dim + action_dim + num_skills + 1 out_dim = latent_dim * 2 elif module == 'vf': in_dim = obs_dim out_dim = latent_dim elif module == 'controller_policy': in_dim = obs_dim + latent_dim out_dim = num_skills elif module == 'qf': in_dim = obs_dim + latent_dim out_dim = num_skills in_space = akro.Box(low=-1, high=1, shape=(in_dim, ), dtype=np.float32) out_space = akro.Box(low=-1, high=1, shape=(out_dim, ), dtype=np.float32) if module == 'encoder': spec = InOutSpec(in_space, out_space) elif module == 'vf': spec = EnvSpec(in_space, out_space) elif module == 'controller_policy': spec = EnvSpec(in_space, out_space) elif module == 'qf': spec = EnvSpec(in_space, out_space) return spec
def test_get_vars(self): obs_dim, action_dim, task_num, latent_dim = (2, ), (2, ), 5, 2 env = GarageEnv(DummyBoxEnv(obs_dim=obs_dim, action_dim=action_dim)) embedding_spec = InOutSpec( input_space=akro.Box(low=np.zeros(task_num), high=np.ones(task_num)), output_space=akro.Box(low=np.zeros(latent_dim), high=np.ones(latent_dim))) encoder = GaussianMLPEncoder(embedding_spec, hidden_sizes=[32, 32, 32]) policy = GaussianMLPTaskEmbeddingPolicy(env_spec=env.spec, encoder=encoder, hidden_sizes=[32, 32, 32]) vars1 = sorted(policy.get_trainable_vars(), key=lambda v: v.name) vars2 = sorted(policy.get_global_vars(), key=lambda v: v.name) assert vars1 == vars2 # Two network. Each with 4 layers * (1 weight + 1 bias) + 1 log_std assert len(vars1) == 2 * (4 * 2 + 1) obs = np.random.random(obs_dim) latent = np.random.random((latent_dim, )) for var in vars1: var.assign(np.ones(var.shape)) assert np.any(policy.get_action_given_latent(obs, latent) != 0) for var in vars1: var.assign(np.zeros(var.shape)) assert not np.all(policy.get_action_given_latent(obs, latent) == 0)
def test_get_action(self, obs_dim, task_num, latent_dim, action_dim): env = GarageEnv(DummyBoxEnv(obs_dim=obs_dim, action_dim=action_dim)) embedding_spec = InOutSpec( input_space=akro.Box(low=np.zeros(task_num), high=np.ones(task_num)), output_space=akro.Box(low=np.zeros(latent_dim), high=np.ones(latent_dim))) encoder = GaussianMLPEncoder(embedding_spec) policy = GaussianMLPTaskEmbeddingPolicy(env_spec=env.spec, encoder=encoder) env.reset() obs, _, _, _ = env.step(1) latent = np.random.random((latent_dim, )) task = np.zeros(task_num) task[0] = 1 action1, _ = policy.get_action_given_latent(obs, latent) action2, _ = policy.get_action_given_task(obs, task) action3, _ = policy.get_action(np.concatenate([obs.flatten(), task])) assert env.action_space.contains(action1) assert env.action_space.contains(action2) assert env.action_space.contains(action3) obses, latents, tasks = [obs] * 3, [latent] * 3, [task] * 3 aug_obses = [np.concatenate([obs.flatten(), task])] * 3 action1n, _ = policy.get_actions_given_latents(obses, latents) action2n, _ = policy.get_actions_given_tasks(obses, tasks) action3n, _ = policy.get_actions(aug_obses) for action in chain(action1n, action2n, action3n): assert env.action_space.contains(action)
def test_auxiliary(self): obs_dim, action_dim, task_num, latent_dim = (2, ), (2, ), 2, 2 env = GarageEnv(DummyBoxEnv(obs_dim=obs_dim, action_dim=action_dim)) embedding_spec = InOutSpec( input_space=akro.Box(low=np.zeros(task_num), high=np.ones(task_num)), output_space=akro.Box(low=np.zeros(latent_dim), high=np.ones(latent_dim))) encoder = GaussianMLPEncoder(embedding_spec) policy = GaussianMLPTaskEmbeddingPolicy(env_spec=env.spec, encoder=encoder) obs_input = tf.compat.v1.placeholder(tf.float32, shape=(None, None, 2)) task_input = tf.compat.v1.placeholder(tf.float32, shape=(None, None, 2)) policy.build(obs_input, task_input) assert policy.distribution.loc.get_shape().as_list( )[-1] == env.action_space.flat_dim assert policy.encoder == encoder assert policy.latent_space.flat_dim == latent_dim assert policy.task_space.flat_dim == task_num assert (policy.augmented_observation_space.flat_dim == env.observation_space.flat_dim + task_num) assert policy.encoder_distribution.loc.get_shape().as_list( )[-1] == latent_dim
def test_dist_info(self, obs_dim, embedding_dim): env = TfEnv(DummyBoxEnv(obs_dim=obs_dim, action_dim=embedding_dim)) with mock.patch(('garage.tf.embeddings.' 'gaussian_mlp_encoder.GaussianMLPModel'), new=SimpleGaussianMLPModel): embedding_spec = InOutSpec(input_space=env.spec.observation_space, output_space=env.spec.action_space) embedding = GaussianMLPEncoder(embedding_spec) env.reset() obs, _, _, _ = env.step(1) obs_dim = env.spec.observation_space.flat_dim obs_ph = tf.compat.v1.placeholder(tf.float32, shape=(None, obs_dim)) dist1_sym = embedding.dist_info_sym(obs_ph, name='p1_sym') # flatten output expected_mean = [np.full(np.prod(embedding_dim), 0.5)] expected_log_std = [np.full(np.prod(embedding_dim), np.log(0.5))] prob0 = embedding.dist_info(obs.flatten()) prob1 = self.sess.run(dist1_sym, feed_dict={obs_ph: [obs.flatten()]}) assert np.array_equal(prob0['mean'].flatten(), expected_mean[0]) assert np.array_equal(prob0['log_std'].flatten(), expected_log_std[0]) assert np.array_equal(prob1['mean'], expected_mean) assert np.array_equal(prob1['log_std'], expected_log_std)
def test_clone(self): env = GarageEnv(DummyBoxEnv(obs_dim=(2, ), action_dim=(2, ))) embedding_spec = InOutSpec(input_space=env.spec.observation_space, output_space=env.spec.action_space) embedding = GaussianMLPEncoder(embedding_spec) clone_embedding = embedding.clone(name='cloned') assert clone_embedding.input_dim == embedding.input_dim assert clone_embedding.output_dim == embedding.output_dim
def __init__(self, spec, image_format, *, kernel_sizes, hidden_channels, strides, hidden_sizes=(32, 32), cnn_hidden_nonlinearity=nn.ReLU, mlp_hidden_nonlinearity=nn.ReLU, hidden_w_init=nn.init.xavier_uniform_, hidden_b_init=nn.init.zeros_, paddings=0, padding_mode='zeros', max_pool=False, pool_shape=None, pool_stride=1, output_nonlinearity=None, output_w_init=nn.init.xavier_uniform_, output_b_init=nn.init.zeros_, layer_normalization=False): super().__init__() cnn_spec = InOutSpec(input_space=spec.input_space, output_space=None) cnn_module = CNNModule(spec=cnn_spec, image_format=image_format, kernel_sizes=kernel_sizes, strides=strides, hidden_w_init=hidden_w_init, hidden_b_init=hidden_b_init, hidden_channels=hidden_channels, hidden_nonlinearity=cnn_hidden_nonlinearity, paddings=paddings, padding_mode=padding_mode, max_pool=max_pool, layer_normalization=layer_normalization, pool_shape=pool_shape, pool_stride=pool_stride) flat_dim = cnn_module.spec.output_space.flat_dim output_dim = spec.output_space.flat_dim mlp_module = MLPModule(flat_dim, output_dim, hidden_sizes, hidden_nonlinearity=mlp_hidden_nonlinearity, hidden_w_init=hidden_w_init, hidden_b_init=hidden_b_init, output_nonlinearity=output_nonlinearity, output_w_init=output_w_init, output_b_init=output_b_init, layer_normalization=layer_normalization) if mlp_hidden_nonlinearity is None: self._module = nn.Sequential(cnn_module, nn.Flatten(), mlp_module) else: self._module = nn.Sequential(cnn_module, mlp_hidden_nonlinearity(), nn.Flatten(), mlp_module)
def test_output_values(output_dim, kernel_sizes, hidden_channels, strides, paddings): input_width = 32 input_height = 32 in_channel = 3 input_shape = (in_channel, input_height, input_width) spec = InOutSpec(akro.Box(shape=input_shape, low=-np.inf, high=np.inf), akro.Box(shape=(output_dim, ), low=-np.inf, high=np.inf)) obs = torch.rand(input_shape) module = DiscreteCNNModule(spec=spec, image_format='NCHW', hidden_channels=hidden_channels, hidden_sizes=hidden_channels, kernel_sizes=kernel_sizes, strides=strides, paddings=paddings, padding_mode='zeros', hidden_w_init=nn.init.ones_, output_w_init=nn.init.ones_) cnn = CNNModule(spec=InOutSpec( akro.Box(shape=input_shape, low=-np.inf, high=np.inf), None), image_format='NCHW', hidden_channels=hidden_channels, kernel_sizes=kernel_sizes, strides=strides, paddings=paddings, padding_mode='zeros', hidden_w_init=nn.init.ones_) flat_dim = torch.flatten(cnn(obs).detach(), start_dim=1).shape[1] mlp = MLPModule( flat_dim, output_dim, hidden_channels, hidden_w_init=nn.init.ones_, output_w_init=nn.init.ones_, ) cnn_out = cnn(obs) output = mlp(torch.flatten(cnn_out, start_dim=1)) assert torch.all(torch.eq(output.detach(), module(obs).detach()))
def __init__(self, env_spec, image_format, kernel_sizes, *, hidden_channels, strides=1, hidden_sizes=(32, 32), hidden_nonlinearity=torch.tanh, hidden_w_init=nn.init.xavier_uniform_, hidden_b_init=nn.init.zeros_, paddings=0, padding_mode='zeros', max_pool=False, pool_shape=None, pool_stride=1, output_w_init=nn.init.xavier_uniform_, output_b_init=nn.init.zeros_, layer_normalization=False, name='CategoricalCNNPolicy'): if not isinstance(env_spec.action_space, akro.Discrete): raise ValueError('CategoricalMLPPolicy only works ' 'with akro.Discrete action space.') if isinstance(env_spec.observation_space, akro.Dict): raise ValueError('CNN policies do not support ' 'with akro.Dict observation spaces.') super().__init__(env_spec, name) self._cnn_module = CNNModule(InOutSpec( self._env_spec.observation_space, None), image_format=image_format, kernel_sizes=kernel_sizes, strides=strides, hidden_channels=hidden_channels, hidden_w_init=hidden_w_init, hidden_b_init=hidden_b_init, hidden_nonlinearity=hidden_nonlinearity, paddings=paddings, padding_mode=padding_mode, max_pool=max_pool, pool_shape=pool_shape, pool_stride=pool_stride, layer_normalization=layer_normalization) self._mlp_module = MultiHeadedMLPModule( n_heads=1, input_dim=self._cnn_module.spec.output_space.flat_dim, output_dims=[self._env_spec.action_space.flat_dim], hidden_sizes=hidden_sizes, hidden_w_init=hidden_w_init, hidden_b_init=hidden_b_init, hidden_nonlinearity=hidden_nonlinearity, output_w_inits=output_w_init, output_b_inits=output_b_init)
def test_get_embedding(self, obs_dim, embedding_dim): env = GarageEnv(DummyBoxEnv(obs_dim=obs_dim, action_dim=embedding_dim)) embedding_spec = InOutSpec(input_space=env.spec.observation_space, output_space=env.spec.action_space) embedding = GaussianMLPEncoder(embedding_spec) env.reset() obs, _, _, _ = env.step(1) latent, _ = embedding.forward(obs) assert env.action_space.contains(latent)
def test_clone(self): env = GymEnv(DummyBoxEnv(obs_dim=(2, ), action_dim=(2, ))) embedding_spec = InOutSpec(input_space=env.spec.observation_space, output_space=env.spec.action_space) embedding = GaussianMLPEncoder(embedding_spec) clone_embedding = embedding.clone(name='cloned') assert clone_embedding.input_dim == embedding.input_dim for cloned_param, param in zip( clone_embedding.model.parameters.values(), embedding.model.parameters.values()): assert np.array_equal(cloned_param, param) assert clone_embedding.output_dim == embedding.output_dim
def test_auxiliary(self): input_space = akro.Box(np.array([-1, -1]), np.array([1, 1])) latent_space = akro.Box(np.array([-2, -2, -2]), np.array([2, 2, 2])) embedding_spec = InOutSpec(input_space=input_space, output_space=latent_space) embedding = GaussianMLPEncoder(embedding_spec, hidden_sizes=[32, 32, 32]) task_input = tf.compat.v1.placeholder(tf.float32, shape=(None, None, embedding.input_dim)) embedding.build(task_input) # 9 Layers: (3 hidden + 1 output) * (1 weight + 1 bias) + 1 log_std assert len(embedding.get_params()) == 9 assert len(embedding.get_global_vars()) == 9 assert embedding.distribution.loc.get_shape().as_list( )[-1] == latent_space.shape[0] assert embedding.input.shape.as_list() == [ None, None, input_space.shape[0] ] assert (embedding.latent_mean.shape.as_list() == [ None, None, latent_space.shape[0] ]) assert (embedding.latent_std_param.shape.as_list() == [ None, None, latent_space.shape[0] ]) # To increase coverage in embeddings/base.py embedding.reset() assert embedding.input_dim == embedding_spec.input_space.flat_dim assert embedding.output_dim == embedding_spec.output_space.flat_dim var_shapes = [ (2, 32), (32, ), # input (32, 32), (32, ), # hidden 0 (32, 32), (32, ), # hidden 1 (32, 3), (3, ), # hidden 2 (3, ) ] # log_std assert sorted(embedding.get_param_shapes()) == sorted(var_shapes) var_count = sum(list(map(np.prod, var_shapes))) embedding.set_param_values(np.ones(var_count)) assert (embedding.get_param_values() == np.ones(var_count)).all() assert (sorted( map(np.shape, embedding.flat_to_params( np.ones(var_count)))) == sorted(var_shapes))
def get_encoder_spec(cls, task_space, latent_dim): """Get the embedding spec of the encoder. Args: task_space (akro.Space): Task spec. latent_dim (int): Latent dimension. Returns: garage.InOutSpec: Encoder spec. """ latent_space = cls._get_latent_space(latent_dim) return InOutSpec(task_space, latent_space)
def test_check_spec(): with pytest.raises(ValueError, match='should be an akro.Box'): # Input space is not Box or Image _check_spec(InOutSpec(akro.Dict(), None), 'NCHW') with pytest.raises(ValueError, match='should have three dimensions'): # Too many input dimensions _check_spec( InOutSpec(akro.Box(shape=[1, 1, 1, 1], low=-np.inf, high=np.inf), None), 'NCHW') with pytest.raises(ValueError, match='akro.Box with a single dimension'): # Output is not one-dimensional _check_spec( InOutSpec(akro.Box(shape=[1, 1, 1], low=-np.inf, high=np.inf), akro.Box( shape=[1, 1], low=-np.inf, high=np.inf, )), 'NCHW') with pytest.warns(UserWarning): # 4 color channels should warn _check_spec( InOutSpec(akro.Box(shape=[4, 1, 1], low=-np.inf, high=np.inf), None), 'NCHW')
def setup_method(self): self.batch_size = 64 self.input_width = 32 self.input_height = 32 self.in_channel = 3 self.dtype = torch.float32 self.input_spec = InOutSpec( akro.Box( shape=[self.in_channel, self.input_height, self.input_width], high=np.inf, low=np.inf), None) self.input = torch.zeros( (self.batch_size, self.in_channel, self.input_height, self.input_width), dtype=self.dtype) # minibatch size 64, image size [3, 32, 32]
def test_get_embedding(self, obs_dim, embedding_dim): env = GarageEnv(DummyBoxEnv(obs_dim=obs_dim, action_dim=embedding_dim)) embedding_spec = InOutSpec(input_space=env.spec.observation_space, output_space=env.spec.action_space) embedding = GaussianMLPEncoder(embedding_spec) task_input = tf.compat.v1.placeholder(tf.float32, shape=(None, None, embedding.input_dim)) embedding.build(task_input) env.reset() obs, _, _, _ = env.step(1) latent, _ = embedding.forward(obs) assert env.action_space.contains(latent)
def test_set_output_size(kernel_sizes, hidden_channels, strides, pool_shape, pool_stride): spec = InOutSpec(akro.Box(shape=[3, 19, 15], high=np.inf, low=-np.inf), akro.Box(shape=[200], high=np.inf, low=-np.inf)) model = CNNModule(spec, image_format='NCHW', hidden_channels=hidden_channels, kernel_sizes=kernel_sizes, strides=strides, pool_shape=[(pool_shape, pool_shape)], pool_stride=[(pool_stride, pool_stride)], layer_normalization=True) images = torch.ones(10, 3, 19, 15) x = model(images) assert x.shape == (10, 200)
def __init__(self, env_spec, image_format, *, kernel_sizes, hidden_channels, strides, hidden_sizes=(32, 32), cnn_hidden_nonlinearity=torch.nn.ReLU, mlp_hidden_nonlinearity=torch.nn.ReLU, hidden_w_init=nn.init.xavier_uniform_, hidden_b_init=nn.init.zeros_, paddings=0, padding_mode='zeros', max_pool=False, pool_shape=None, pool_stride=1, output_nonlinearity=None, output_w_init=nn.init.xavier_uniform_, output_b_init=nn.init.zeros_, layer_normalization=False): super().__init__() self._env_spec = env_spec self._cnn_module = DiscreteCNNModule( spec=InOutSpec(input_space=env_spec.observation_space, output_space=env_spec.action_space), image_format=image_format, kernel_sizes=kernel_sizes, hidden_channels=hidden_channels, strides=strides, hidden_sizes=hidden_sizes, cnn_hidden_nonlinearity=cnn_hidden_nonlinearity, mlp_hidden_nonlinearity=mlp_hidden_nonlinearity, hidden_w_init=hidden_w_init, hidden_b_init=hidden_b_init, paddings=paddings, padding_mode=padding_mode, max_pool=max_pool, pool_shape=pool_shape, pool_stride=pool_stride, output_nonlinearity=output_nonlinearity, output_w_init=output_w_init, output_b_init=output_b_init, layer_normalization=layer_normalization)
def test_pickling(self): obs_dim, action_dim, task_num, latent_dim = (2, ), (2, ), 5, 2 env = GarageEnv(DummyBoxEnv(obs_dim=obs_dim, action_dim=action_dim)) embedding_spec = InOutSpec( input_space=akro.Box(low=np.zeros(task_num), high=np.ones(task_num)), output_space=akro.Box(low=np.zeros(latent_dim), high=np.ones(latent_dim))) encoder = GaussianMLPEncoder(embedding_spec) policy = GaussianMLPTaskEmbeddingPolicy(env_spec=env.spec, encoder=encoder) pickled = pickle.dumps(policy) with tf.compat.v1.variable_scope('resumed'): unpickled = pickle.loads(pickled) assert hasattr(unpickled, '_f_dist_obs_latent') assert hasattr(unpickled, '_f_dist_obs_task')
def test_get_embedding(self, obs_dim, embedding_dim): env = GymEnv(DummyBoxEnv(obs_dim=obs_dim, action_dim=embedding_dim)) embedding_spec = InOutSpec(input_space=env.spec.observation_space, output_space=env.spec.action_space) embedding = GaussianMLPEncoder(embedding_spec) task_input = tf.compat.v1.placeholder(tf.float32, shape=(None, None, embedding.input_dim)) embedding.build(task_input, name='task_input') env.reset() obs = env.step(env.action_space.sample()).observation latent, _ = embedding.get_latent(obs) latents, _ = embedding.get_latents([obs] * 5) assert env.action_space.contains(latent) for latent in latents: assert env.action_space.contains(latent)
def test_get_latent(self): obs_dim, action_dim, task_num, latent_dim = (2, ), (2, ), 5, 2 env = GarageEnv(DummyBoxEnv(obs_dim=obs_dim, action_dim=action_dim)) embedding_spec = InOutSpec( input_space=akro.Box(low=np.zeros(task_num), high=np.ones(task_num)), output_space=akro.Box(low=np.zeros(latent_dim), high=np.ones(latent_dim))) encoder = GaussianMLPEncoder(embedding_spec) policy = GaussianMLPTaskEmbeddingPolicy(env_spec=env.spec, encoder=encoder) task_id = 3 task_onehot = np.zeros(task_num) task_onehot[task_id] = 1 latent, latent_info = policy.get_latent(task_onehot) assert latent.shape == (latent_dim, ) assert latent_info['mean'].shape == (latent_dim, ) assert latent_info['log_std'].shape == (latent_dim, )
def test_without_nonlinearity(output_dim, hidden_channels, kernel_sizes, strides): input_width = 32 input_height = 32 in_channel = 3 input_shape = (in_channel, input_height, input_width) spec = InOutSpec(akro.Box(shape=input_shape, low=-np.inf, high=np.inf), akro.Box(shape=(output_dim, ), low=-np.inf, high=np.inf)) module = DiscreteCNNModule(spec=spec, image_format='NCHW', hidden_channels=hidden_channels, hidden_sizes=hidden_channels, kernel_sizes=kernel_sizes, strides=strides, mlp_hidden_nonlinearity=None, cnn_hidden_nonlinearity=None, hidden_w_init=nn.init.ones_, output_w_init=nn.init.ones_) assert len(module._module) == 3
def test_encoder_dist_info(self): obs_dim, action_dim, task_num, latent_dim = (2, ), (2, ), 5, 2 env = TfEnv(DummyBoxEnv(obs_dim=obs_dim, action_dim=action_dim)) with mock.patch( 'garage.tf.embeddings.' 'gaussian_mlp_encoder.GaussianMLPModel', new=SimpleGaussianMLPModel): old_build = SimpleGaussianMLPModel._build def float32_build(this, obs_input, name): mean, log_std, std, dist = old_build(this, obs_input, name) return mean, tf.cast(log_std, tf.float32), std, dist SimpleGaussianMLPModel._build = float32_build embedding_spec = InOutSpec( input_space=akro.Box(low=np.zeros(task_num), high=np.ones(task_num)), output_space=akro.Box(low=np.zeros(latent_dim), high=np.ones(latent_dim))) encoder = GaussianMLPEncoder(embedding_spec) policy = GaussianMLPTaskEmbeddingPolicy(env_spec=env.spec, encoder=encoder) assert policy.encoder_distribution.dim == latent_dim inp_ph = tf.compat.v1.placeholder(tf.float32, shape=(None, 5)) dist_sym = policy.encoder_dist_info_sym(inp_ph) dist = self.sess.run(dist_sym, feed_dict={inp_ph: [np.random.random(5)]}) expected_mean = np.full(latent_dim, 0.5) expected_log_std = np.full(latent_dim, np.log(0.5)) assert np.allclose(dist['mean'], expected_mean) assert np.allclose(dist['log_std'], expected_log_std) SimpleGaussianMLPModel._dtype = np.float32
def test_is_pickleable(output_dim, hidden_channels, kernel_sizes, strides): input_width = 32 input_height = 32 in_channel = 3 input_shape = (in_channel, input_height, input_width) input_a = torch.ones(input_shape) spec = InOutSpec(akro.Box(shape=input_shape, low=-np.inf, high=np.inf), akro.Box(shape=(output_dim, ), low=-np.inf, high=np.inf)) model = DiscreteCNNModule(spec=spec, image_format='NCHW', hidden_channels=hidden_channels, kernel_sizes=kernel_sizes, mlp_hidden_nonlinearity=nn.ReLU, cnn_hidden_nonlinearity=nn.ReLU, strides=strides) output1 = model(input_a) h = pickle.dumps(model) model_pickled = pickle.loads(h) output2 = model_pickled(input_a) assert np.array_equal(torch.all(torch.eq(output1, output2)), True)
def test_auxiliary(self): obs_dim, action_dim, task_num, latent_dim = (2, ), (2, ), 2, 2 env = TfEnv(DummyBoxEnv(obs_dim=obs_dim, action_dim=action_dim)) with mock.patch( 'garage.tf.policies.' 'gaussian_mlp_task_embedding_policy.GaussianMLPModel', new=SimpleGaussianMLPModel): embedding_spec = InOutSpec( input_space=akro.Box(low=np.zeros(task_num), high=np.ones(task_num)), output_space=akro.Box(low=np.zeros(latent_dim), high=np.ones(latent_dim))) encoder = GaussianMLPEncoder(embedding_spec) policy = GaussianMLPTaskEmbeddingPolicy(env_spec=env.spec, encoder=encoder) assert policy.distribution.dim == env.action_space.flat_dim assert policy.encoder == encoder assert policy.latent_space.flat_dim == latent_dim assert policy.task_space.flat_dim == task_num assert (policy.augmented_observation_space.flat_dim == env.observation_space.flat_dim + task_num) assert policy.encoder_distribution.dim == latent_dim
def test_get_latent(self): obs_dim, action_dim, task_num, latent_dim = (2, ), (2, ), 5, 2 env = TfEnv(DummyBoxEnv(obs_dim=obs_dim, action_dim=action_dim)) with mock.patch( 'garage.tf.policies.' 'gaussian_mlp_task_embedding_policy.GaussianMLPModel', new=SimpleGaussianMLPModel): embedding_spec = InOutSpec( input_space=akro.Box(low=np.zeros(task_num), high=np.ones(task_num)), output_space=akro.Box(low=np.zeros(latent_dim), high=np.ones(latent_dim))) encoder = GaussianMLPEncoder(embedding_spec) policy = GaussianMLPTaskEmbeddingPolicy(env_spec=env.spec, encoder=encoder) task_id = 3 task_onehot = np.zeros(task_num) task_onehot[task_id] = 1 latent, latent_info = policy.get_latent(task_onehot) assert latent.shape == (latent_dim, ) assert latent_info['mean'].shape == (latent_dim, ) assert latent_info['log_std'].shape == (latent_dim, )