def recenter(rv_constructor, *rv_args, **rv_kwargs):
        if (rv_constructor.__name__ == 'Normal'
                and not rv_kwargs['name'].startswith('y')):

            # NB: assume everything is kwargs for now.
            x_loc = rv_kwargs['loc']
            x_scale = rv_kwargs['scale']

            name = rv_kwargs['name']
            shape = rv_constructor(*rv_args, **rv_kwargs).shape

            a, b = get_or_init(name, shape)  # w

            kwargs_std = {}
            kwargs_std['loc'] = tf.multiply(x_loc, a)
            kwargs_std['scale'] = tf.pow(x_scale, b)
            kwargs_std['name'] = name + '_param'

            scale = tf.pow(x_scale, 1. - b)
            b = tfb.AffineScalar(scale=scale,
                                 shift=x_loc +
                                 tf.multiply(scale, -kwargs_std['loc']))
            if 'value' in rv_kwargs:
                kwargs_std['value'] = b.inverse(rv_kwargs['value'])

            rv_std = interceptable(rv_constructor)(*rv_args, **kwargs_std)
            bijectors[name] = b
            return b.forward(rv_std)

        else:
            return interceptable(rv_constructor)(*rv_args, **rv_kwargs)
 def set_values(f, *args, **kwargs):
     """Sets random variable values to its aligned value."""
     name = kwargs.get('name')
     if name in model_kwargs:
         kwargs['value'] = model_kwargs[name]
     elif consumable_args:
         kwargs['value'] = consumable_args.pop(0)
     return interceptable(f)(*args, **kwargs)
def ncp(rv_constructor, *rv_args, **rv_kwargs):
    if (rv_constructor.__name__ == 'Normal'
            and not rv_kwargs['name'].startswith('y')):
        loc = rv_kwargs['loc']
        scale = rv_kwargs['scale']
        name = rv_kwargs['name']

        shape = rv_constructor(*rv_args, **rv_kwargs).shape

        kwargs_std = {}
        kwargs_std['loc'] = tf.zeros(shape)
        kwargs_std['scale'] = tf.ones(shape)
        kwargs_std['name'] = name + '_std'

        b = tfb.AffineScalar(scale=scale, shift=loc)
        if 'value' in rv_kwargs:
            kwargs_std['value'] = b.inverse(rv_kwargs['value'])

        rv_std = interceptable(rv_constructor)(*rv_args, **kwargs_std)

        return b.forward(rv_std)

    else:
        return interceptable(rv_constructor)(*rv_args, **rv_kwargs)
Пример #4
0
 def set_values(f, *args, **kwargs):
     """Sets random variable values to its aligned value."""
     name = kwargs.get("name")
     if name in model_kwargs:
         kwargs["value"] = model_kwargs[name]
     return interceptable(f)(*args, **kwargs)
    def recenter(rv_constructor, *rv_args, **rv_kwargs):

        rv_name = rv_kwargs.get('name')
        rv_value = rv_kwargs.pop('value', None)

        base_bijector = None
        if rv_constructor.__name__ == 'TransformedDistribution':
            if (rv_args[1].__class__.__name__ == 'Invert'
                    and rv_args[1].bijector.__class__.__name__ == 'SoftClip'):
                distribution = rv_args[0]
                base_bijector = rv_args[1].bijector
                rv_constructor = distribution.__class__
                rv_kwargs = distribution.parameters
                rv_args = rv_args[2:]
                # We were given a value for the transformed RV. Let's pretend it was
                # for the original.
                if rv_value is not None:
                    rv_value = base_bijector.forward(rv_value)

        if (rv_constructor.__name__ == 'Normal'
                and not rv_name.startswith('y')):

            # NB: assume everything is kwargs for now.
            x_loc = rv_kwargs['loc']
            x_scale = rv_kwargs['scale']

            name = rv_kwargs['name']
            a, b, _ = get_or_init(name,
                                  loc_shape=tf.shape(x_loc),
                                  scale_shape=tf.shape(x_scale),
                                  parameterisation_type='scalar')

            kwargs_std = {}
            kwargs_std['loc'] = tf.multiply(x_loc, a)
            kwargs_std['scale'] = tf.pow(
                x_scale, b)  # tf.multiply(x_scale - 1., b) + 1.
            kwargs_std['name'] = name

            scale = x_scale / kwargs_std['scale']  # tf.pow(x_scale, 1. - b)
            shift = x_loc - tf.multiply(scale, kwargs_std['loc'])
            b = tfb.AffineScalar(scale=scale, shift=shift)
            if rv_value is not None:
                rv_value = b.inverse(rv_value)
            learnable_parameters[name +
                                 '_prior_mean'] = tf.convert_to_tensor(x_loc)
            learnable_parameters[name + '_prior_scale'] = tf.convert_to_tensor(
                x_scale)

            # If original RV was constrained, transform the constraint to the new
            # standardized RV. For now we assume a double-sided constraint.
            if base_bijector is not None:
                constraint_std = tfb.SoftClip(
                    low=b.inverse(base_bijector.low),
                    high=b.inverse(base_bijector.high),
                    hinge_softness=base_bijector.hinge_softness / scale
                    if base_bijector.hinge_softness is not None else None)
                rv_std = edward2.TransformedDistribution(
                    rv_constructor(**kwargs_std),
                    tfb.Invert(constraint_std),
                    value=constraint_std.inverse(rv_value)
                    if rv_value is not None else None)
                b = b(constraint_std)
            else:
                kwargs_std['value'] = rv_value
                rv_std = interceptable(rv_constructor)(*rv_args, **kwargs_std)
            bijectors[name] = b
            return b.forward(rv_std)

        elif ((rv_constructor.__name__.startswith('MultivariateNormal')
               or rv_constructor.__name__.startswith('GaussianProcess'))
              and not rv_kwargs['name'].startswith('y')):

            name = rv_kwargs['name']

            if rv_constructor.__name__.startswith('GaussianProcess'):
                gp_dist = rv_constructor(*rv_args, **rv_kwargs).distribution
                X = gp_dist._get_index_points()
                x_loc = gp_dist.mean_fn(X)
                x_cov = gp_dist._compute_covariance(index_points=X)
            else:
                x_loc = rv_kwargs['loc']
                x_cov = rv_kwargs['covariance_matrix']

            a, b, c = get_or_init(name,
                                  loc_shape=tf.shape(x_loc),
                                  scale_shape=tf.shape(x_cov)[:-1],
                                  parameterisation_type=parameterisation_type)
            ndims = tf.shape(x_cov)[-1]
            x_loc = tf.broadcast_to(x_loc, tf.shape(x_cov)[:-1])
            cov_dtype = tf.float64 if FLAGS.float64 else x_cov.dtype
            x_cov = tf.cast(x_cov, cov_dtype)
            if parameterisation_type == 'eig':
                """Extra cost of the eigendecomposition?

        we do the eig to get Lambda, Q.
        We rescale Lambda and create the prior dist linop
           - point one: the prior is an MVN (albeit an efficient one), where
              in NCP it's just Normal
        Then we construct the remaining scale matrix. (an n**3 matmul)
        And unlike a cholesky factor these matrices aren't triangular, so
        multiplication or division

        - can we
        """

                Lambda, Q = eigh_with_safe_gradient(x_cov)
                Lambda = tf.abs(Lambda)
                Lambda = tf.cast(Lambda, tf.float32)
                Q = tf.cast(Q, tf.float32)
                Lambda_hat_b = tf.pow(Lambda, b)
                if tied_pparams:
                    # If the scale parameterization is in the eigenbasis,
                    # apply it to the mean in the same basis.
                    loc_in_eigenbasis = tf.linalg.matvec(Q,
                                                         x_loc,
                                                         adjoint_a=True)
                    reparam_loc = tf.linalg.matvec(
                        Q, tf.multiply(loc_in_eigenbasis, a))
                else:
                    reparam_loc = tf.multiply(x_loc, a)

                kwargs_std = {}
                kwargs_std['loc'] = reparam_loc
                kwargs_std['scale'] = LinearOperatorEigenScale(
                    Q, d=tf.sqrt(Lambda_hat_b))
                kwargs_std['name'] = name

                Q_linop = LinearOperatorOrthogonal(Q, det_is_positive=True)
                scale = tf.linalg.LinearOperatorComposition([
                    Q_linop,
                    tf.linalg.LinearOperatorDiag(tf.sqrt(Lambda + 1e-10)),
                    tf.linalg.LinearOperatorDiag(
                        1. / tf.sqrt(Lambda_hat_b + 1e-10)),
                    Q_linop.adjoint(),
                ])
                shift = x_loc - scale.matvec(reparam_loc)
                b = tfb.AffineLinearOperator(scale=scale, shift=shift)

                if 'value' in rv_kwargs:
                    kwargs_std['value'] = b.inverse(rv_kwargs['value'])

            elif parameterisation_type == 'chol':
                L = tf.linalg.cholesky(x_cov +
                                       1e-6 * tf.eye(ndims, dtype=x_cov.dtype))
                L = tf.cast(L, tf.float32)

                reparam_loc = x_loc * a
                reparam_scale = tf.linalg.LinearOperatorLowerTriangular(
                    tf.linalg.diag(1 - b) + b[..., tf.newaxis] * L)
                kwargs_std = {}
                kwargs_std['loc'] = reparam_loc
                kwargs_std['scale'] = reparam_scale
                kwargs_std['name'] = name

                Dinv = tf.linalg.triangular_solve(
                    tf.cast(reparam_scale.to_dense(), cov_dtype),
                    tf.eye(ndims, dtype=cov_dtype))
                Dinv = tf.cast(Dinv, tf.float32)
                scale = tf.matmul(L, Dinv)
                shift = x_loc - tf.linalg.matvec(scale, reparam_loc)
                b = tfb.AffineLinearOperator(
                    scale=tf.linalg.LinearOperatorFullMatrix(scale),
                    shift=shift)
                if 'value' in rv_kwargs:
                    kwargs_std['value'] = b.inverse(rv_kwargs['value'])

            elif parameterisation_type == 'indep':
                # Assumes `C^-1 = diag(c)` is a learned diagonal matrix of 'evidence
                # precisions'. This approximates the true posterior under an iid
                # Gaussian observation model:
                prior_chol = tf.linalg.cholesky(x_cov)
                prior_inv = tf.linalg.cholesky_solve(
                    prior_chol, tf.eye(ndims, dtype=prior_chol.dtype))
                approx_posterior_prec = prior_inv + tf.cast(
                    tf.linalg.diag(c), prior_inv.dtype)
                approx_posterior_prec_chol = tf.linalg.cholesky(
                    approx_posterior_prec)
                approx_posterior_cov = tf.linalg.cholesky_solve(
                    approx_posterior_prec_chol,
                    tf.eye(ndims, dtype=approx_posterior_prec_chol.dtype))
                cov_chol = tf.linalg.cholesky(approx_posterior_cov)

                cov_chol = tf.cast(cov_chol, tf.float32)
                prior_chol = tf.cast(prior_chol, tf.float32)
                scale_linop = tf.linalg.LinearOperatorLowerTriangular(cov_chol)

                reparam_loc = x_loc * a
                reparam_scale = tf.linalg.LinearOperatorComposition([
                    tf.linalg.LinearOperatorInversion(scale_linop),
                    tf.linalg.LinearOperatorLowerTriangular(prior_chol)
                ])
                kwargs_std = {}
                kwargs_std['loc'] = reparam_loc
                kwargs_std['scale'] = reparam_scale
                kwargs_std['name'] = name

                shift = x_loc - scale_linop.matvec(reparam_loc)
                b = tfb.AffineLinearOperator(scale=scale_linop, shift=shift)
                if 'value' in rv_kwargs:
                    kwargs_std['value'] = b.inverse(rv_kwargs['value'])

            elif parameterisation_type == 'eigindep':
                # Combines 'eig' and 'indep' parameterizations, modeling the posterior
                # as
                # (V D**(-b) V' + diag(c))^-1
                # where VDV' is the eigendecomposition of the prior cov, and b and c
                # are learned vectors.
                b, c = [tf.cast(x, cov_dtype) for x in (b, c)]
                Lambda, Q = eigh_with_safe_gradient(x_cov)
                Lambda = tf.abs(Lambda)
                Lambda_hat_b = 1e-6 + tf.pow(Lambda, b)
                prior = tf.matmul(
                    Q,
                    tf.matmul(tf.linalg.diag(Lambda_hat_b), Q, adjoint_b=True))
                prior_chol = tf.linalg.cholesky(
                    prior + 1e-6 * tf.eye(ndims, dtype=prior.dtype))
                prior_prec = tf.linalg.cholesky_solve(
                    prior_chol + 1e-6 * tf.eye(ndims, dtype=prior_chol.dtype),
                    tf.eye(ndims, dtype=prior_chol.dtype))

                approx_posterior_prec = prior_prec + tf.linalg.diag(c)
                approx_posterior_prec_chol = tf.linalg.cholesky(
                    approx_posterior_prec)
                approx_posterior_cov = tf.linalg.cholesky_solve(
                    approx_posterior_prec_chol + 1e-6 *
                    tf.eye(ndims, dtype=approx_posterior_prec_chol.dtype),
                    tf.eye(ndims, dtype=approx_posterior_prec_chol.dtype))
                cov_chol = tf.linalg.cholesky(
                    approx_posterior_cov +
                    1e-6 * tf.eye(ndims, dtype=approx_posterior_cov.dtype))
                cov_chol = tf.cast(cov_chol, tf.float32)
                prior_chol = tf.cast(prior_chol, tf.float32)
                scale_linop = tf.linalg.LinearOperatorLowerTriangular(cov_chol)

                reparam_loc = tf.multiply(x_loc, a)

                reparam_scale = tf.linalg.LinearOperatorComposition([
                    tf.linalg.LinearOperatorInversion(scale_linop),
                    tf.linalg.LinearOperatorLowerTriangular(prior_chol)
                ])
                kwargs_std = {}
                kwargs_std['loc'] = reparam_loc
                kwargs_std['scale'] = reparam_scale
                kwargs_std['name'] = name

                shift = x_loc - scale_linop.matvec(reparam_loc)
                b = tfb.AffineLinearOperator(scale=scale_linop, shift=shift)
                if 'value' in rv_kwargs:
                    kwargs_std['value'] = b.inverse(rv_kwargs['value'])
            else:
                raise Exception('unrecognized reparameterization strategy!')

            if rv_constructor.__name__.startswith('GaussianProcess'):
                rv_std = edward2.MultivariateNormalLinearOperator(
                    *rv_args, **kwargs_std)
            else:
                rv_std = interceptable(rv_constructor)(*rv_args, **kwargs_std)

            bijectors[name] = b
            return b.forward(rv_std)
        else:
            return interceptable(rv_constructor)(*rv_args, **rv_kwargs)
def ncp(rv_constructor, *rv_args, **rv_kwargs):
    base_bijector = None
    rv_value = rv_kwargs.pop('value', None)
    if rv_constructor.__name__ == 'TransformedDistribution':
        if (rv_args[1].__class__.__name__ == 'Invert'
                and rv_args[1].bijector.__class__.__name__ == 'SoftClip'):
            distribution = rv_args[0]
            base_bijector = rv_args[1].bijector
            rv_constructor = distribution.__class__
            rv_kwargs = distribution.parameters
            rv_args = rv_args[2:]
            # We were given a value for the transformed RV. Let's pretend it was
            # for the original.
            if rv_value is not None:
                rv_value = base_bijector.forward(rv_value)

    if (rv_constructor.__name__ == 'Normal'
            and not rv_kwargs['name'].startswith('y')):
        loc = rv_kwargs['loc']
        scale = rv_kwargs['scale']
        name = rv_kwargs['name']

        kwargs_std = {}
        kwargs_std['loc'] = tf.zeros_like(loc)
        kwargs_std['scale'] = tf.ones_like(scale)
        kwargs_std['name'] = name + '_std'

        b = tfb.AffineScalar(scale=scale, shift=loc)
        if rv_value is not None:
            rv_value = b.inverse(rv_value)

        kwargs_std['value'] = rv_value
        rv_std = interceptable(rv_constructor)(*rv_args, **kwargs_std)
        return b.forward(rv_std)

    elif ((rv_constructor.__name__.startswith('MultivariateNormal')
           or rv_constructor.__name__.startswith('GaussianProcess'))
          and not rv_kwargs['name'].startswith('y')):

        name = rv_kwargs['name']

        if rv_constructor.__name__.startswith('GaussianProcess'):
            gp_dist = rv_constructor(*rv_args, **rv_kwargs).distribution
            X = gp_dist._get_index_points()
            x_loc = gp_dist.mean_fn(X)
            x_cov = gp_dist._compute_covariance(index_points=X)
            shape = tfd.MultivariateNormalFullCovariance(x_loc,
                                                         x_cov).event_shape

        else:
            x_loc = rv_kwargs['loc']
            x_cov = rv_kwargs['covariance_matrix']
            shape = rv_constructor(*rv_args, **rv_kwargs).shape

        kwargs_std = {}

        kwargs_std['loc'] = tf.zeros(shape)
        kwargs_std['scale_diag'] = tf.ones(shape[0])
        kwargs_std['name'] = name + '_std'

        scale = tf.linalg.cholesky(x_cov + 1e-6 * tf.eye(tf.shape(x_cov)[-1]))
        b = tfb.AffineLinearOperator(
            scale=tf.linalg.LinearOperatorLowerTriangular(scale), shift=x_loc)

        if 'value' in rv_kwargs:
            kwargs_std['value'] = b.inverse(rv_kwargs['value'])
        rv_std = edward2.MultivariateNormalDiag(*rv_args, **kwargs_std)
        return b.forward(rv_std)

    else:
        return interceptable(rv_constructor)(*rv_args, **rv_kwargs)
 def trace(rv_constructor, *rv_args, **rv_kwargs):
     rv = interceptable(rv_constructor)(*rv_args, **rv_kwargs)
     name = rv_kwargs['name']
     trace_result[name] = rv.value
     return rv