def recenter(rv_constructor, *rv_args, **rv_kwargs): if (rv_constructor.__name__ == 'Normal' and not rv_kwargs['name'].startswith('y')): # NB: assume everything is kwargs for now. x_loc = rv_kwargs['loc'] x_scale = rv_kwargs['scale'] name = rv_kwargs['name'] shape = rv_constructor(*rv_args, **rv_kwargs).shape a, b = get_or_init(name, shape) # w kwargs_std = {} kwargs_std['loc'] = tf.multiply(x_loc, a) kwargs_std['scale'] = tf.pow(x_scale, b) kwargs_std['name'] = name + '_param' scale = tf.pow(x_scale, 1. - b) b = tfb.AffineScalar(scale=scale, shift=x_loc + tf.multiply(scale, -kwargs_std['loc'])) if 'value' in rv_kwargs: kwargs_std['value'] = b.inverse(rv_kwargs['value']) rv_std = interceptable(rv_constructor)(*rv_args, **kwargs_std) bijectors[name] = b return b.forward(rv_std) else: return interceptable(rv_constructor)(*rv_args, **rv_kwargs)
def set_values(f, *args, **kwargs): """Sets random variable values to its aligned value.""" name = kwargs.get('name') if name in model_kwargs: kwargs['value'] = model_kwargs[name] elif consumable_args: kwargs['value'] = consumable_args.pop(0) return interceptable(f)(*args, **kwargs)
def ncp(rv_constructor, *rv_args, **rv_kwargs): if (rv_constructor.__name__ == 'Normal' and not rv_kwargs['name'].startswith('y')): loc = rv_kwargs['loc'] scale = rv_kwargs['scale'] name = rv_kwargs['name'] shape = rv_constructor(*rv_args, **rv_kwargs).shape kwargs_std = {} kwargs_std['loc'] = tf.zeros(shape) kwargs_std['scale'] = tf.ones(shape) kwargs_std['name'] = name + '_std' b = tfb.AffineScalar(scale=scale, shift=loc) if 'value' in rv_kwargs: kwargs_std['value'] = b.inverse(rv_kwargs['value']) rv_std = interceptable(rv_constructor)(*rv_args, **kwargs_std) return b.forward(rv_std) else: return interceptable(rv_constructor)(*rv_args, **rv_kwargs)
def set_values(f, *args, **kwargs): """Sets random variable values to its aligned value.""" name = kwargs.get("name") if name in model_kwargs: kwargs["value"] = model_kwargs[name] return interceptable(f)(*args, **kwargs)
def recenter(rv_constructor, *rv_args, **rv_kwargs): rv_name = rv_kwargs.get('name') rv_value = rv_kwargs.pop('value', None) base_bijector = None if rv_constructor.__name__ == 'TransformedDistribution': if (rv_args[1].__class__.__name__ == 'Invert' and rv_args[1].bijector.__class__.__name__ == 'SoftClip'): distribution = rv_args[0] base_bijector = rv_args[1].bijector rv_constructor = distribution.__class__ rv_kwargs = distribution.parameters rv_args = rv_args[2:] # We were given a value for the transformed RV. Let's pretend it was # for the original. if rv_value is not None: rv_value = base_bijector.forward(rv_value) if (rv_constructor.__name__ == 'Normal' and not rv_name.startswith('y')): # NB: assume everything is kwargs for now. x_loc = rv_kwargs['loc'] x_scale = rv_kwargs['scale'] name = rv_kwargs['name'] a, b, _ = get_or_init(name, loc_shape=tf.shape(x_loc), scale_shape=tf.shape(x_scale), parameterisation_type='scalar') kwargs_std = {} kwargs_std['loc'] = tf.multiply(x_loc, a) kwargs_std['scale'] = tf.pow( x_scale, b) # tf.multiply(x_scale - 1., b) + 1. kwargs_std['name'] = name scale = x_scale / kwargs_std['scale'] # tf.pow(x_scale, 1. - b) shift = x_loc - tf.multiply(scale, kwargs_std['loc']) b = tfb.AffineScalar(scale=scale, shift=shift) if rv_value is not None: rv_value = b.inverse(rv_value) learnable_parameters[name + '_prior_mean'] = tf.convert_to_tensor(x_loc) learnable_parameters[name + '_prior_scale'] = tf.convert_to_tensor( x_scale) # If original RV was constrained, transform the constraint to the new # standardized RV. For now we assume a double-sided constraint. if base_bijector is not None: constraint_std = tfb.SoftClip( low=b.inverse(base_bijector.low), high=b.inverse(base_bijector.high), hinge_softness=base_bijector.hinge_softness / scale if base_bijector.hinge_softness is not None else None) rv_std = edward2.TransformedDistribution( rv_constructor(**kwargs_std), tfb.Invert(constraint_std), value=constraint_std.inverse(rv_value) if rv_value is not None else None) b = b(constraint_std) else: kwargs_std['value'] = rv_value rv_std = interceptable(rv_constructor)(*rv_args, **kwargs_std) bijectors[name] = b return b.forward(rv_std) elif ((rv_constructor.__name__.startswith('MultivariateNormal') or rv_constructor.__name__.startswith('GaussianProcess')) and not rv_kwargs['name'].startswith('y')): name = rv_kwargs['name'] if rv_constructor.__name__.startswith('GaussianProcess'): gp_dist = rv_constructor(*rv_args, **rv_kwargs).distribution X = gp_dist._get_index_points() x_loc = gp_dist.mean_fn(X) x_cov = gp_dist._compute_covariance(index_points=X) else: x_loc = rv_kwargs['loc'] x_cov = rv_kwargs['covariance_matrix'] a, b, c = get_or_init(name, loc_shape=tf.shape(x_loc), scale_shape=tf.shape(x_cov)[:-1], parameterisation_type=parameterisation_type) ndims = tf.shape(x_cov)[-1] x_loc = tf.broadcast_to(x_loc, tf.shape(x_cov)[:-1]) cov_dtype = tf.float64 if FLAGS.float64 else x_cov.dtype x_cov = tf.cast(x_cov, cov_dtype) if parameterisation_type == 'eig': """Extra cost of the eigendecomposition? we do the eig to get Lambda, Q. We rescale Lambda and create the prior dist linop - point one: the prior is an MVN (albeit an efficient one), where in NCP it's just Normal Then we construct the remaining scale matrix. (an n**3 matmul) And unlike a cholesky factor these matrices aren't triangular, so multiplication or division - can we """ Lambda, Q = eigh_with_safe_gradient(x_cov) Lambda = tf.abs(Lambda) Lambda = tf.cast(Lambda, tf.float32) Q = tf.cast(Q, tf.float32) Lambda_hat_b = tf.pow(Lambda, b) if tied_pparams: # If the scale parameterization is in the eigenbasis, # apply it to the mean in the same basis. loc_in_eigenbasis = tf.linalg.matvec(Q, x_loc, adjoint_a=True) reparam_loc = tf.linalg.matvec( Q, tf.multiply(loc_in_eigenbasis, a)) else: reparam_loc = tf.multiply(x_loc, a) kwargs_std = {} kwargs_std['loc'] = reparam_loc kwargs_std['scale'] = LinearOperatorEigenScale( Q, d=tf.sqrt(Lambda_hat_b)) kwargs_std['name'] = name Q_linop = LinearOperatorOrthogonal(Q, det_is_positive=True) scale = tf.linalg.LinearOperatorComposition([ Q_linop, tf.linalg.LinearOperatorDiag(tf.sqrt(Lambda + 1e-10)), tf.linalg.LinearOperatorDiag( 1. / tf.sqrt(Lambda_hat_b + 1e-10)), Q_linop.adjoint(), ]) shift = x_loc - scale.matvec(reparam_loc) b = tfb.AffineLinearOperator(scale=scale, shift=shift) if 'value' in rv_kwargs: kwargs_std['value'] = b.inverse(rv_kwargs['value']) elif parameterisation_type == 'chol': L = tf.linalg.cholesky(x_cov + 1e-6 * tf.eye(ndims, dtype=x_cov.dtype)) L = tf.cast(L, tf.float32) reparam_loc = x_loc * a reparam_scale = tf.linalg.LinearOperatorLowerTriangular( tf.linalg.diag(1 - b) + b[..., tf.newaxis] * L) kwargs_std = {} kwargs_std['loc'] = reparam_loc kwargs_std['scale'] = reparam_scale kwargs_std['name'] = name Dinv = tf.linalg.triangular_solve( tf.cast(reparam_scale.to_dense(), cov_dtype), tf.eye(ndims, dtype=cov_dtype)) Dinv = tf.cast(Dinv, tf.float32) scale = tf.matmul(L, Dinv) shift = x_loc - tf.linalg.matvec(scale, reparam_loc) b = tfb.AffineLinearOperator( scale=tf.linalg.LinearOperatorFullMatrix(scale), shift=shift) if 'value' in rv_kwargs: kwargs_std['value'] = b.inverse(rv_kwargs['value']) elif parameterisation_type == 'indep': # Assumes `C^-1 = diag(c)` is a learned diagonal matrix of 'evidence # precisions'. This approximates the true posterior under an iid # Gaussian observation model: prior_chol = tf.linalg.cholesky(x_cov) prior_inv = tf.linalg.cholesky_solve( prior_chol, tf.eye(ndims, dtype=prior_chol.dtype)) approx_posterior_prec = prior_inv + tf.cast( tf.linalg.diag(c), prior_inv.dtype) approx_posterior_prec_chol = tf.linalg.cholesky( approx_posterior_prec) approx_posterior_cov = tf.linalg.cholesky_solve( approx_posterior_prec_chol, tf.eye(ndims, dtype=approx_posterior_prec_chol.dtype)) cov_chol = tf.linalg.cholesky(approx_posterior_cov) cov_chol = tf.cast(cov_chol, tf.float32) prior_chol = tf.cast(prior_chol, tf.float32) scale_linop = tf.linalg.LinearOperatorLowerTriangular(cov_chol) reparam_loc = x_loc * a reparam_scale = tf.linalg.LinearOperatorComposition([ tf.linalg.LinearOperatorInversion(scale_linop), tf.linalg.LinearOperatorLowerTriangular(prior_chol) ]) kwargs_std = {} kwargs_std['loc'] = reparam_loc kwargs_std['scale'] = reparam_scale kwargs_std['name'] = name shift = x_loc - scale_linop.matvec(reparam_loc) b = tfb.AffineLinearOperator(scale=scale_linop, shift=shift) if 'value' in rv_kwargs: kwargs_std['value'] = b.inverse(rv_kwargs['value']) elif parameterisation_type == 'eigindep': # Combines 'eig' and 'indep' parameterizations, modeling the posterior # as # (V D**(-b) V' + diag(c))^-1 # where VDV' is the eigendecomposition of the prior cov, and b and c # are learned vectors. b, c = [tf.cast(x, cov_dtype) for x in (b, c)] Lambda, Q = eigh_with_safe_gradient(x_cov) Lambda = tf.abs(Lambda) Lambda_hat_b = 1e-6 + tf.pow(Lambda, b) prior = tf.matmul( Q, tf.matmul(tf.linalg.diag(Lambda_hat_b), Q, adjoint_b=True)) prior_chol = tf.linalg.cholesky( prior + 1e-6 * tf.eye(ndims, dtype=prior.dtype)) prior_prec = tf.linalg.cholesky_solve( prior_chol + 1e-6 * tf.eye(ndims, dtype=prior_chol.dtype), tf.eye(ndims, dtype=prior_chol.dtype)) approx_posterior_prec = prior_prec + tf.linalg.diag(c) approx_posterior_prec_chol = tf.linalg.cholesky( approx_posterior_prec) approx_posterior_cov = tf.linalg.cholesky_solve( approx_posterior_prec_chol + 1e-6 * tf.eye(ndims, dtype=approx_posterior_prec_chol.dtype), tf.eye(ndims, dtype=approx_posterior_prec_chol.dtype)) cov_chol = tf.linalg.cholesky( approx_posterior_cov + 1e-6 * tf.eye(ndims, dtype=approx_posterior_cov.dtype)) cov_chol = tf.cast(cov_chol, tf.float32) prior_chol = tf.cast(prior_chol, tf.float32) scale_linop = tf.linalg.LinearOperatorLowerTriangular(cov_chol) reparam_loc = tf.multiply(x_loc, a) reparam_scale = tf.linalg.LinearOperatorComposition([ tf.linalg.LinearOperatorInversion(scale_linop), tf.linalg.LinearOperatorLowerTriangular(prior_chol) ]) kwargs_std = {} kwargs_std['loc'] = reparam_loc kwargs_std['scale'] = reparam_scale kwargs_std['name'] = name shift = x_loc - scale_linop.matvec(reparam_loc) b = tfb.AffineLinearOperator(scale=scale_linop, shift=shift) if 'value' in rv_kwargs: kwargs_std['value'] = b.inverse(rv_kwargs['value']) else: raise Exception('unrecognized reparameterization strategy!') if rv_constructor.__name__.startswith('GaussianProcess'): rv_std = edward2.MultivariateNormalLinearOperator( *rv_args, **kwargs_std) else: rv_std = interceptable(rv_constructor)(*rv_args, **kwargs_std) bijectors[name] = b return b.forward(rv_std) else: return interceptable(rv_constructor)(*rv_args, **rv_kwargs)
def ncp(rv_constructor, *rv_args, **rv_kwargs): base_bijector = None rv_value = rv_kwargs.pop('value', None) if rv_constructor.__name__ == 'TransformedDistribution': if (rv_args[1].__class__.__name__ == 'Invert' and rv_args[1].bijector.__class__.__name__ == 'SoftClip'): distribution = rv_args[0] base_bijector = rv_args[1].bijector rv_constructor = distribution.__class__ rv_kwargs = distribution.parameters rv_args = rv_args[2:] # We were given a value for the transformed RV. Let's pretend it was # for the original. if rv_value is not None: rv_value = base_bijector.forward(rv_value) if (rv_constructor.__name__ == 'Normal' and not rv_kwargs['name'].startswith('y')): loc = rv_kwargs['loc'] scale = rv_kwargs['scale'] name = rv_kwargs['name'] kwargs_std = {} kwargs_std['loc'] = tf.zeros_like(loc) kwargs_std['scale'] = tf.ones_like(scale) kwargs_std['name'] = name + '_std' b = tfb.AffineScalar(scale=scale, shift=loc) if rv_value is not None: rv_value = b.inverse(rv_value) kwargs_std['value'] = rv_value rv_std = interceptable(rv_constructor)(*rv_args, **kwargs_std) return b.forward(rv_std) elif ((rv_constructor.__name__.startswith('MultivariateNormal') or rv_constructor.__name__.startswith('GaussianProcess')) and not rv_kwargs['name'].startswith('y')): name = rv_kwargs['name'] if rv_constructor.__name__.startswith('GaussianProcess'): gp_dist = rv_constructor(*rv_args, **rv_kwargs).distribution X = gp_dist._get_index_points() x_loc = gp_dist.mean_fn(X) x_cov = gp_dist._compute_covariance(index_points=X) shape = tfd.MultivariateNormalFullCovariance(x_loc, x_cov).event_shape else: x_loc = rv_kwargs['loc'] x_cov = rv_kwargs['covariance_matrix'] shape = rv_constructor(*rv_args, **rv_kwargs).shape kwargs_std = {} kwargs_std['loc'] = tf.zeros(shape) kwargs_std['scale_diag'] = tf.ones(shape[0]) kwargs_std['name'] = name + '_std' scale = tf.linalg.cholesky(x_cov + 1e-6 * tf.eye(tf.shape(x_cov)[-1])) b = tfb.AffineLinearOperator( scale=tf.linalg.LinearOperatorLowerTriangular(scale), shift=x_loc) if 'value' in rv_kwargs: kwargs_std['value'] = b.inverse(rv_kwargs['value']) rv_std = edward2.MultivariateNormalDiag(*rv_args, **kwargs_std) return b.forward(rv_std) else: return interceptable(rv_constructor)(*rv_args, **rv_kwargs)
def trace(rv_constructor, *rv_args, **rv_kwargs): rv = interceptable(rv_constructor)(*rv_args, **rv_kwargs) name = rv_kwargs['name'] trace_result[name] = rv.value return rv