示例#1
0
    def test_matches_tanh_bijector_double(self):
        squash = SquashBijector()
        tanh = tfp.bijectors.Tanh()
        data = np.linspace(-10, 10, 100).astype(np.float64)

        squash_forward = squash.forward(data)
        tanh_forward = tanh.forward(data)

        np.testing.assert_equal(squash_forward.numpy(), tanh_forward.numpy())

        squash_ildj = squash.inverse_log_det_jacobian(squash_forward,
                                                      event_ndims=0)
        tanh_ildj = tanh.inverse_log_det_jacobian(tanh_forward, event_ndims=0)

        np.testing.assert_allclose(squash_ildj.numpy(), tanh_ildj.numpy())
示例#2
0
    def test_matches_tanh_bijector_double(self):
        squash = SquashBijector()
        tanh = tfp.bijectors.Tanh()
        data = np.linspace(-10, 10, 100).astype(np.float64)

        squash_forward = squash.forward(data)
        tanh_forward = tanh.forward(data)

        self.assertAllClose(self.evaluate(squash_forward),
                            self.evaluate(tanh_forward))

        squash_ildj = squash.inverse_log_det_jacobian(squash_forward,
                                                      event_ndims=0)
        tanh_ildj = tanh.inverse_log_det_jacobian(tanh_forward, event_ndims=0)

        self.assertAllClose(self.evaluate(squash_ildj),
                            self.evaluate(tanh_ildj))
示例#3
0
    def test_matches_tanh_bijector_single(self):
        squash = SquashBijector()
        tanh = tfp.bijectors.Tanh()
        data = np.linspace(-5, 5, 100).astype(np.float32)

        squash_forward = squash.forward(data)
        tanh_forward = tanh.forward(data)

        np.testing.assert_equal(squash_forward.numpy(), tanh_forward.numpy())

        squash_ildj = squash.inverse_log_det_jacobian(squash_forward,
                                                      event_ndims=0)
        tanh_ildj = tanh.inverse_log_det_jacobian(tanh_forward, event_ndims=0)

        tanh_isfinite_mask = np.where(np.isfinite(tanh_ildj))

        np.testing.assert_allclose(tanh_ildj.numpy()[tanh_isfinite_mask],
                                   squash_ildj.numpy()[tanh_isfinite_mask],
                                   rtol=1e-3)
示例#4
0
    def test_matches_tanh_bijector_single(self):
        squash = SquashBijector()
        tanh = tfp.bijectors.Tanh()
        data = np.linspace(-5, 5, 100).astype(np.float32)

        squash_forward = squash.forward(data)
        tanh_forward = tanh.forward(data)

        self.assertAllClose(self.evaluate(squash_forward),
                            self.evaluate(tanh_forward))

        squash_ildj = squash.inverse_log_det_jacobian(squash_forward,
                                                      event_ndims=0)
        tanh_ildj = tanh.inverse_log_det_jacobian(tanh_forward, event_ndims=0)

        tanh_finite_mask = tf.where(tf.is_finite(tanh_ildj))

        self.assertAllClose(
            self.evaluate(tf.gather(tanh_ildj, tanh_finite_mask)),
            self.evaluate(tf.gather(squash_ildj, tanh_finite_mask)),
            rtol=1e-3)
示例#5
0
    def __init__(self,
                 input_shapes,
                 output_shape,
                 squash=True,
                 preprocessor=None,
                 name=None,
                 *args,
                 **kwargs):
        self._Serializable__initialize(locals())

        self._input_shapes = input_shapes
        self._output_shape = output_shape
        self._squash = squash
        self._name = name
        self._preprocessor = preprocessor

        super(GaussianPolicy, self).__init__(*args, **kwargs)

        self.condition_inputs = [
            tf.keras.layers.Input(shape=input_shape)
            for input_shape in input_shapes
        ]

        conditions = tf.keras.layers.Lambda(lambda x: tf.concat(x, axis=-1))(
            self.condition_inputs)

        if preprocessor is not None:
            conditions = preprocessor(conditions)

        shift_and_log_scale_diag = self._shift_and_log_scale_diag_net(
            input_shapes=(conditions.shape[1:], ),
            output_size=output_shape[0] * 2,
        )(conditions)

        shift, log_scale_diag = tf.keras.layers.Lambda(
            lambda shift_and_log_scale_diag: tf.split(
                shift_and_log_scale_diag, num_or_size_splits=2, axis=-1))(
                    shift_and_log_scale_diag)

        log_scale_diag = tf.keras.layers.Lambda(
            lambda log_scale_diag: tf.clip_by_value(
                log_scale_diag, *SCALE_DIAG_MIN_MAX))(log_scale_diag)

        batch_size = tf.keras.layers.Lambda(lambda x: tf.shape(x)[0])(
            conditions)

        base_distribution = tfp.distributions.MultivariateNormalDiag(
            loc=tf.zeros(output_shape), scale_diag=tf.ones(output_shape))

        latents = tf.keras.layers.Lambda(lambda batch_size: base_distribution.
                                         sample(batch_size))(batch_size)

        self.latents_model = tf.keras.Model(self.condition_inputs, latents)
        self.latents_input = tf.keras.layers.Input(shape=output_shape)

        def raw_actions_fn(inputs):
            shift, log_scale_diag, latents = inputs
            bijector = tfp.bijectors.Affine(shift=shift,
                                            scale_diag=tf.exp(log_scale_diag))
            actions = bijector.forward(latents)
            return actions

        raw_actions = tf.keras.layers.Lambda(raw_actions_fn)(
            (shift, log_scale_diag, latents))

        raw_actions_for_fixed_latents = tf.keras.layers.Lambda(raw_actions_fn)(
            (shift, log_scale_diag, self.latents_input))

        squash_bijector = (SquashBijector()
                           if self._squash else tfp.bijectors.Identity())

        actions = tf.keras.layers.Lambda(lambda raw_actions: squash_bijector.
                                         forward(raw_actions))(raw_actions)
        self.actions_model = tf.keras.Model(self.condition_inputs, actions)

        actions_for_fixed_latents = tf.keras.layers.Lambda(
            lambda raw_actions: squash_bijector.forward(raw_actions))(
                raw_actions_for_fixed_latents)
        self.actions_model_for_fixed_latents = tf.keras.Model(
            (*self.condition_inputs, self.latents_input),
            actions_for_fixed_latents)

        deterministic_actions = tf.keras.layers.Lambda(
            lambda shift: squash_bijector.forward(shift))(shift)

        self.deterministic_actions_model = tf.keras.Model(
            self.condition_inputs, deterministic_actions)

        def log_pis_fn(inputs):
            shift, log_scale_diag, actions = inputs
            base_distribution = tfp.distributions.MultivariateNormalDiag(
                loc=tf.zeros(output_shape), scale_diag=tf.ones(output_shape))
            bijector = tfp.bijectors.Chain((
                squash_bijector,
                tfp.bijectors.Affine(shift=shift,
                                     scale_diag=tf.exp(log_scale_diag)),
            ))
            distribution = (
                tfp.distributions.ConditionalTransformedDistribution(
                    distribution=base_distribution, bijector=bijector))

            log_pis = distribution.log_prob(actions)[:, None]
            return log_pis

        self.actions_input = tf.keras.layers.Input(shape=output_shape)

        log_pis = tf.keras.layers.Lambda(log_pis_fn)(
            [shift, log_scale_diag, actions])

        log_pis_for_action_input = tf.keras.layers.Lambda(log_pis_fn)(
            [shift, log_scale_diag, self.actions_input])

        self.log_pis_model = tf.keras.Model(
            (*self.condition_inputs, self.actions_input),
            log_pis_for_action_input)

        self.diagnostics_model = tf.keras.Model(
            self.condition_inputs,
            (shift, log_scale_diag, log_pis, raw_actions, actions))
示例#6
0
    def __init__(self,
                 input_shapes,
                 output_shape,
                 eps=0.1,
                 squash=True,
                 preprocessor=None,
                 name=None,
                 *args,
                 **kwargs):
        self._Serializable__initialize(locals())
        self.eps = eps
        self._input_shapes = input_shapes
        self._output_shape = output_shape
        self._squash = squash
        self._name = name
        self._preprocessor = preprocessor

        super(DeterministicsPolicy, self).__init__(*args, **kwargs)

        self.condition_inputs = [
            tf.keras.layers.Input(shape=input_shape)
            for input_shape in input_shapes
        ]

        conditions = tf.keras.layers.Lambda(lambda x: tf.concat(x, axis=-1))(
            self.condition_inputs)

        if preprocessor is not None:
            conditions = preprocessor(conditions)

        shift = self._shift_and_log_scale_diag_net(
            input_shapes=(conditions.shape[1:], ),
            output_size=output_shape[0],
        )(conditions)

        batch_size = tf.keras.layers.Lambda(lambda x: tf.shape(x)[0])(
            conditions)

        squash_bijector = (SquashBijector()
                           if self._squash else tfp.bijectors.Identity())

        deterministic_actions = tf.keras.layers.Lambda(
            lambda shift: squash_bijector.forward(shift))(shift)

        self.deterministic_actions_model = tf.keras.Model(
            self.condition_inputs, deterministic_actions)

        def raw_actions_fn(inputs):
            shift, eps = inputs
            #actions=shift+tf.random.normal(shape=tf.shape(shift), stddev=eps)
            actions = tf.keras.layers.GaussianNoise(stddev=0.1)(shift)
            #actions = shift + tf.random.normal(shape=tf.shape(shift), stddev=eps)

            return actions

        '''raw_actions = tf.keras.layers.Lambda(
            raw_actions_fn
        )((shift,self.eps))'''

        actions = tf.keras.layers.Lambda(
            lambda deterministic_actions: tf.clip_by_value(
                deterministic_actions + tf.
                random.normal(shape=tf.shape(deterministic_actions),
                              stddev=0.1), -1, 1))(deterministic_actions)
        '''actions = tf.keras.layers.Lambda(
            raw_actions_fn
        )((deterministic_actions, self.eps))'''
        '''squash_bijector = (SquashBijector()
            if self._squash
            else tfp.bijectors.Identity())'''
        '''actions = tf.keras.layers.Lambda(
            lambda raw_actions: squash_bijector.forward(raw_actions)
        )(raw_actions)'''

        self.actions_model = tf.keras.Model(self.condition_inputs, actions)

        self.actions_input = tf.keras.layers.Input(shape=output_shape)

        self.diagnostics_model = tf.keras.Model(self.condition_inputs,
                                                (shift, actions))