def test_matches_tanh_bijector_double(self): squash = SquashBijector() tanh = tfp.bijectors.Tanh() data = np.linspace(-10, 10, 100).astype(np.float64) squash_forward = squash.forward(data) tanh_forward = tanh.forward(data) np.testing.assert_equal(squash_forward.numpy(), tanh_forward.numpy()) squash_ildj = squash.inverse_log_det_jacobian(squash_forward, event_ndims=0) tanh_ildj = tanh.inverse_log_det_jacobian(tanh_forward, event_ndims=0) np.testing.assert_allclose(squash_ildj.numpy(), tanh_ildj.numpy())
def test_matches_tanh_bijector_double(self): squash = SquashBijector() tanh = tfp.bijectors.Tanh() data = np.linspace(-10, 10, 100).astype(np.float64) squash_forward = squash.forward(data) tanh_forward = tanh.forward(data) self.assertAllClose(self.evaluate(squash_forward), self.evaluate(tanh_forward)) squash_ildj = squash.inverse_log_det_jacobian(squash_forward, event_ndims=0) tanh_ildj = tanh.inverse_log_det_jacobian(tanh_forward, event_ndims=0) self.assertAllClose(self.evaluate(squash_ildj), self.evaluate(tanh_ildj))
def test_matches_tanh_bijector_single(self): squash = SquashBijector() tanh = tfp.bijectors.Tanh() data = np.linspace(-5, 5, 100).astype(np.float32) squash_forward = squash.forward(data) tanh_forward = tanh.forward(data) np.testing.assert_equal(squash_forward.numpy(), tanh_forward.numpy()) squash_ildj = squash.inverse_log_det_jacobian(squash_forward, event_ndims=0) tanh_ildj = tanh.inverse_log_det_jacobian(tanh_forward, event_ndims=0) tanh_isfinite_mask = np.where(np.isfinite(tanh_ildj)) np.testing.assert_allclose(tanh_ildj.numpy()[tanh_isfinite_mask], squash_ildj.numpy()[tanh_isfinite_mask], rtol=1e-3)
def test_matches_tanh_bijector_single(self): squash = SquashBijector() tanh = tfp.bijectors.Tanh() data = np.linspace(-5, 5, 100).astype(np.float32) squash_forward = squash.forward(data) tanh_forward = tanh.forward(data) self.assertAllClose(self.evaluate(squash_forward), self.evaluate(tanh_forward)) squash_ildj = squash.inverse_log_det_jacobian(squash_forward, event_ndims=0) tanh_ildj = tanh.inverse_log_det_jacobian(tanh_forward, event_ndims=0) tanh_finite_mask = tf.where(tf.is_finite(tanh_ildj)) self.assertAllClose( self.evaluate(tf.gather(tanh_ildj, tanh_finite_mask)), self.evaluate(tf.gather(squash_ildj, tanh_finite_mask)), rtol=1e-3)
def __init__(self, input_shapes, output_shape, squash=True, preprocessor=None, name=None, *args, **kwargs): self._Serializable__initialize(locals()) self._input_shapes = input_shapes self._output_shape = output_shape self._squash = squash self._name = name self._preprocessor = preprocessor super(GaussianPolicy, self).__init__(*args, **kwargs) self.condition_inputs = [ tf.keras.layers.Input(shape=input_shape) for input_shape in input_shapes ] conditions = tf.keras.layers.Lambda(lambda x: tf.concat(x, axis=-1))( self.condition_inputs) if preprocessor is not None: conditions = preprocessor(conditions) shift_and_log_scale_diag = self._shift_and_log_scale_diag_net( input_shapes=(conditions.shape[1:], ), output_size=output_shape[0] * 2, )(conditions) shift, log_scale_diag = tf.keras.layers.Lambda( lambda shift_and_log_scale_diag: tf.split( shift_and_log_scale_diag, num_or_size_splits=2, axis=-1))( shift_and_log_scale_diag) log_scale_diag = tf.keras.layers.Lambda( lambda log_scale_diag: tf.clip_by_value( log_scale_diag, *SCALE_DIAG_MIN_MAX))(log_scale_diag) batch_size = tf.keras.layers.Lambda(lambda x: tf.shape(x)[0])( conditions) base_distribution = tfp.distributions.MultivariateNormalDiag( loc=tf.zeros(output_shape), scale_diag=tf.ones(output_shape)) latents = tf.keras.layers.Lambda(lambda batch_size: base_distribution. sample(batch_size))(batch_size) self.latents_model = tf.keras.Model(self.condition_inputs, latents) self.latents_input = tf.keras.layers.Input(shape=output_shape) def raw_actions_fn(inputs): shift, log_scale_diag, latents = inputs bijector = tfp.bijectors.Affine(shift=shift, scale_diag=tf.exp(log_scale_diag)) actions = bijector.forward(latents) return actions raw_actions = tf.keras.layers.Lambda(raw_actions_fn)( (shift, log_scale_diag, latents)) raw_actions_for_fixed_latents = tf.keras.layers.Lambda(raw_actions_fn)( (shift, log_scale_diag, self.latents_input)) squash_bijector = (SquashBijector() if self._squash else tfp.bijectors.Identity()) actions = tf.keras.layers.Lambda(lambda raw_actions: squash_bijector. forward(raw_actions))(raw_actions) self.actions_model = tf.keras.Model(self.condition_inputs, actions) actions_for_fixed_latents = tf.keras.layers.Lambda( lambda raw_actions: squash_bijector.forward(raw_actions))( raw_actions_for_fixed_latents) self.actions_model_for_fixed_latents = tf.keras.Model( (*self.condition_inputs, self.latents_input), actions_for_fixed_latents) deterministic_actions = tf.keras.layers.Lambda( lambda shift: squash_bijector.forward(shift))(shift) self.deterministic_actions_model = tf.keras.Model( self.condition_inputs, deterministic_actions) def log_pis_fn(inputs): shift, log_scale_diag, actions = inputs base_distribution = tfp.distributions.MultivariateNormalDiag( loc=tf.zeros(output_shape), scale_diag=tf.ones(output_shape)) bijector = tfp.bijectors.Chain(( squash_bijector, tfp.bijectors.Affine(shift=shift, scale_diag=tf.exp(log_scale_diag)), )) distribution = ( tfp.distributions.ConditionalTransformedDistribution( distribution=base_distribution, bijector=bijector)) log_pis = distribution.log_prob(actions)[:, None] return log_pis self.actions_input = tf.keras.layers.Input(shape=output_shape) log_pis = tf.keras.layers.Lambda(log_pis_fn)( [shift, log_scale_diag, actions]) log_pis_for_action_input = tf.keras.layers.Lambda(log_pis_fn)( [shift, log_scale_diag, self.actions_input]) self.log_pis_model = tf.keras.Model( (*self.condition_inputs, self.actions_input), log_pis_for_action_input) self.diagnostics_model = tf.keras.Model( self.condition_inputs, (shift, log_scale_diag, log_pis, raw_actions, actions))
def __init__(self, input_shapes, output_shape, eps=0.1, squash=True, preprocessor=None, name=None, *args, **kwargs): self._Serializable__initialize(locals()) self.eps = eps self._input_shapes = input_shapes self._output_shape = output_shape self._squash = squash self._name = name self._preprocessor = preprocessor super(DeterministicsPolicy, self).__init__(*args, **kwargs) self.condition_inputs = [ tf.keras.layers.Input(shape=input_shape) for input_shape in input_shapes ] conditions = tf.keras.layers.Lambda(lambda x: tf.concat(x, axis=-1))( self.condition_inputs) if preprocessor is not None: conditions = preprocessor(conditions) shift = self._shift_and_log_scale_diag_net( input_shapes=(conditions.shape[1:], ), output_size=output_shape[0], )(conditions) batch_size = tf.keras.layers.Lambda(lambda x: tf.shape(x)[0])( conditions) squash_bijector = (SquashBijector() if self._squash else tfp.bijectors.Identity()) deterministic_actions = tf.keras.layers.Lambda( lambda shift: squash_bijector.forward(shift))(shift) self.deterministic_actions_model = tf.keras.Model( self.condition_inputs, deterministic_actions) def raw_actions_fn(inputs): shift, eps = inputs #actions=shift+tf.random.normal(shape=tf.shape(shift), stddev=eps) actions = tf.keras.layers.GaussianNoise(stddev=0.1)(shift) #actions = shift + tf.random.normal(shape=tf.shape(shift), stddev=eps) return actions '''raw_actions = tf.keras.layers.Lambda( raw_actions_fn )((shift,self.eps))''' actions = tf.keras.layers.Lambda( lambda deterministic_actions: tf.clip_by_value( deterministic_actions + tf. random.normal(shape=tf.shape(deterministic_actions), stddev=0.1), -1, 1))(deterministic_actions) '''actions = tf.keras.layers.Lambda( raw_actions_fn )((deterministic_actions, self.eps))''' '''squash_bijector = (SquashBijector() if self._squash else tfp.bijectors.Identity())''' '''actions = tf.keras.layers.Lambda( lambda raw_actions: squash_bijector.forward(raw_actions) )(raw_actions)''' self.actions_model = tf.keras.Model(self.condition_inputs, actions) self.actions_input = tf.keras.layers.Input(shape=output_shape) self.diagnostics_model = tf.keras.Model(self.condition_inputs, (shift, actions))