def __init__(self, data, Y_var): super().__init__(active_dims=[0]) self.Y_var = Y_var self.num_genes = data.m_obs.shape[1] # l_affine = tfb.AffineScalar(shift=tf.cast(1., tf.float64), # scale=tf.cast(4-1., tf.float64)) # l_sigmoid = tfb.Sigmoid() # l_logistic = tfb.Chain([l_affine, l_sigmoid]) self.lengthscale = gpflow.Parameter(1.414, transform=positive()) D_affine = tfb.AffineScalar(shift=tf.cast(0.1, tf.float64), scale=tf.cast(1.5 - 0.1, tf.float64)) D_sigmoid = tfb.Sigmoid() D_logistic = tfb.Chain([D_affine, D_sigmoid]) S_affine = tfb.AffineScalar(shift=tf.cast(0.1, tf.float64), scale=tf.cast(4. - 0.1, tf.float64)) S_sigmoid = tfb.Sigmoid() S_logistic = tfb.Chain([S_affine, S_sigmoid]) self.D = gpflow.Parameter(np.random.uniform(0.9, 1, self.num_genes), transform=positive(), dtype=tf.float64) # self.D[3].trainable = False # self.D[3].assign(0.8) self.S = gpflow.Parameter(np.random.uniform(1, 1, self.num_genes), transform=positive(), dtype=tf.float64) # self.S[3].trainable = False # self.S[3].assign(1) self.kervar = gpflow.Parameter(np.float64(1), transform=positive()) self.noise_term = gpflow.Parameter( 0.1353 * tf.ones(self.num_genes, dtype='float64'), transform=positive())
def __init__(self, active_dims=[0], gap_decay=0.1, match_decay=0.9, max_subsequence_length=3, max_occurence_length=10, alphabet=[], maxlen=0, normalize=True, batch_size=1000): super().__init__(active_dims=active_dims) # constrain kernel params to between 0 and 1 self.logistic_gap = tfb.Chain([ tfb.AffineScalar(shift=tf.cast(0, tf.float64), scale=tf.cast(1, tf.float64)), tfb.Sigmoid() ]) self.logisitc_match = tfb.Chain([ tfb.AffineScalar(shift=tf.cast(0, tf.float64), scale=tf.cast(1, tf.float64)), tfb.Sigmoid() ]) self.gap_decay_param = Parameter(gap_decay, transform=self.logistic_gap, name="gap_decay") self.match_decay_param = Parameter(match_decay, transform=self.logisitc_match, name="match_decay") self.max_subsequence_length = max_subsequence_length self.max_occurence_length = max_occurence_length self.alphabet = alphabet self.maxlen = maxlen self.normalize = normalize self.batch_size = batch_size self.symmetric = False # use will use copies of the kernel params to stop building expensive computation graph # we instead efficientely calculate gradients using dynamic programming # These params are updated at every call to K and K_diag (to check if parameters have been updated) self.match_decay = self.match_decay_param.numpy() self.gap_decay = self.gap_decay_param.numpy() self.match_decay_unconstrained = self.match_decay_param.unconstrained_variable.numpy( ) self.gap_decay_unconstrained = self.gap_decay_param.unconstrained_variable.numpy( ) # initialize helful construction matricies to be lazily computed once needed self.D = None self.dD_dgap = None # build a lookup table of the alphabet to encode input strings self.table = tf.lookup.StaticHashTable( initializer=tf.lookup.KeyValueTensorInitializer( keys=tf.constant(["PAD"] + alphabet), values=tf.constant(range(0, len(alphabet) + 1)), ), default_value=0)
def recenter(rv_constructor, *rv_args, **rv_kwargs): if (rv_constructor.__name__ == 'Normal' and not rv_kwargs['name'].startswith('y')): # NB: assume everything is kwargs for now. x_loc = rv_kwargs['loc'] x_scale = rv_kwargs['scale'] name = rv_kwargs['name'] shape = rv_constructor(*rv_args, **rv_kwargs).shape a, b = get_or_init(name, shape) # w kwargs_std = {} kwargs_std['loc'] = tf.multiply(x_loc, a) kwargs_std['scale'] = tf.pow(x_scale, b) kwargs_std['name'] = name + '_param' scale = tf.pow(x_scale, 1. - b) b = tfb.AffineScalar(scale=scale, shift=x_loc + tf.multiply(scale, -kwargs_std['loc'])) if 'value' in rv_kwargs: kwargs_std['value'] = b.inverse(rv_kwargs['value']) rv_std = interceptable(rv_constructor)(*rv_args, **kwargs_std) bijectors[name] = b return b.forward(rv_std) else: return interceptable(rv_constructor)(*rv_args, **rv_kwargs)
def __init__(self,rank=1,active_dims=[0],gap_decay=0.1, match_decay=0.9,max_subsequence_length=3, alphabet = [], maxlen=0): super().__init__(active_dims=active_dims) # constrain decay kernel params to between 0 and 1 logistic_gap = tfb.Chain([tfb.Shift(tf.cast(0,tf.float64))(tfb.Scale(tf.cast(1,tf.float64))),tfb.Sigmoid()]) logisitc_match = tfb.Chain([tfb.AffineScalar(shift=tf.cast(0,tf.float64),scale=tf.cast(1,tf.float64)),tfb.Sigmoid()]) self.gap_decay= Parameter(gap_decay, transform=logistic_gap ,name="gap_decay") self.match_decay = Parameter(match_decay, transform=logisitc_match,name="match_decay") # prepare similarity matrix parameters self.rank=rank W = 0.1 * tf.ones((len(alphabet), self.rank)) kappa = tf.ones(len(alphabet)) self.W = Parameter(W,name="W") self.kappa = Parameter(kappa, transform=positive(),name="kappa") # store additional kernel parameters self.max_subsequence_length = tf.constant(max_subsequence_length) self.alphabet = tf.constant(alphabet) self.alphabet_size=tf.shape(self.alphabet)[0] self.maxlen = tf.constant(maxlen) # build a lookup table of the alphabet to encode input strings self.table = tf.lookup.StaticHashTable( initializer=tf.lookup.KeyValueTensorInitializer( keys=tf.constant(["PAD"]+alphabet), values=tf.constant(range(0,len(alphabet)+1)),),default_value=0)
def bounded_parameter(low, high, param): """Make parameter tfp Parameter with optimization bounds.""" affine = tfb.AffineScalar(shift=tf.cast(low, tf.float64), scale=tf.cast(high - low, tf.float64)) sigmoid = tfb.Sigmoid() logistic = tfb.Chain([affine, sigmoid]) parameter = gpf.Parameter(param, transform=logistic, dtype=tf.float64) return parameter
def __init__(self, m=1, active_dims=[0], gap_decay=0.1, match_decay=0.9, max_subsequence_length=3, alphabet=[], maxlen=0): super().__init__(active_dims=active_dims) # constrain decay kernel params to between 0 and 1 logistic_gap = tfb.Chain([ tfb.Shift(tf.cast(0, tf.float64))(tfb.Scale(tf.cast(1, tf.float64))), tfb.Sigmoid() ]) logisitc_match = tfb.Chain([ tfb.AffineScalar(shift=tf.cast(0, tf.float64), scale=tf.cast(1, tf.float64)), tfb.Sigmoid() ]) self.gap_decay = Parameter(gap_decay, transform=logistic_gap, name="gap_decay") self.match_decay = Parameter(match_decay, transform=logisitc_match, name="match_decay") # prepare order coefs params order_coefs = tf.ones(max_subsequence_length) self.order_coefs = Parameter(order_coefs, transform=positive(), name="order_coefs") # get split weights self.m = m split_weights = tf.ones(2 * self.m - 1) self.split_weights = Parameter(split_weights, transform=positive(), name="order_coefs") # store additional kernel parameters self.max_subsequence_length = tf.constant(max_subsequence_length) self.alphabet = tf.constant(alphabet) self.alphabet_size = tf.shape(self.alphabet)[0] self.maxlen = tf.cast(tf.math.ceil(maxlen / self.m), dtype=tf.int32) self.full_maxlen = tf.constant(maxlen) # build a lookup table of the alphabet to encode input strings self.table = tf.lookup.StaticHashTable( initializer=tf.lookup.KeyValueTensorInitializer( keys=tf.constant(["PAD"] + alphabet), values=tf.constant(range(0, len(alphabet) + 1)), ), default_value=0)
def ncp(rv_constructor, *rv_args, **rv_kwargs): if (rv_constructor.__name__ == 'Normal' and not rv_kwargs['name'].startswith('y')): loc = rv_kwargs['loc'] scale = rv_kwargs['scale'] name = rv_kwargs['name'] shape = rv_constructor(*rv_args, **rv_kwargs).shape kwargs_std = {} kwargs_std['loc'] = tf.zeros(shape) kwargs_std['scale'] = tf.ones(shape) kwargs_std['name'] = name + '_std' b = tfb.AffineScalar(scale=scale, shift=loc) if 'value' in rv_kwargs: kwargs_std['value'] = b.inverse(rv_kwargs['value']) rv_std = interceptable(rv_constructor)(*rv_args, **kwargs_std) return b.forward(rv_std) else: return interceptable(rv_constructor)(*rv_args, **rv_kwargs)
def __init__(self, rank=1, active_dims=[0], gap_decay=0.1, match_decay=0.9, max_subsequence_length=3, alphabet=[], maxlen=0, batch_size=100): super().__init__(active_dims=active_dims) # constrain decay kernel params to between 0 and 1 self.logistic_gap = tfb.Chain([ tfb.Shift(tf.cast(0, tf.float64))(tfb.Scale(tf.cast(1, tf.float64))), tfb.Sigmoid() ]) self.logisitc_match = tfb.Chain([ tfb.AffineScalar(shift=tf.cast(0, tf.float64), scale=tf.cast(1, tf.float64)), tfb.Sigmoid() ]) self.gap_decay_param = Parameter(gap_decay, transform=self.logistic_gap, name="gap_decay") self.match_decay_param = Parameter(match_decay, transform=self.logisitc_match, name="match_decay") self.W_param = Parameter(0.1 * tf.ones((len(alphabet), rank)), name="W") self.kappa_param = Parameter(tf.ones(len(alphabet)), transform=positive(), name="kappa") # use will use copies of the kernel params to stop building expensive computation graph # we instead efficientely calculate gradients using dynamic programming # These params are updated at every call to K and K_diag (to check if parameters have been updated) self.match_decay = self.match_decay_param.numpy() self.gap_decay = self.gap_decay_param.numpy() self.kappa = self.kappa_param.numpy() self.W = self.W_param.numpy() self.match_decay_unconstrained = self.match_decay_param.unconstrained_variable.numpy( ) self.gap_decay_unconstrained = self.gap_decay_param.unconstrained_variable.numpy( ) self.kappa_unconstrained = self.kappa_param.unconstrained_variable.numpy( ) self.W_unconstrained = self.W_param.unconstrained_variable.numpy() # store additional kernel parameters self.max_subsequence_length = tf.constant(max_subsequence_length) self.alphabet = tf.constant(alphabet) self.alphabet_size = tf.shape(self.alphabet)[0] self.maxlen = tf.constant(maxlen) self.batch_size = tf.constant(batch_size) self.rank = tf.constant(rank) # build a lookup table of the alphabet to encode input strings self.table = tf.lookup.StaticHashTable( initializer=tf.lookup.KeyValueTensorInitializer( keys=tf.constant(["PAD"] + alphabet), values=tf.constant(range(0, len(alphabet) + 1)), ), default_value=0) # initialize helful construction matricies to be lazily computed once needed self.D = None self.dD_dgap = None
def recenter(rv_constructor, *rv_args, **rv_kwargs): rv_name = rv_kwargs.get('name') rv_value = rv_kwargs.pop('value', None) base_bijector = None if rv_constructor.__name__ == 'TransformedDistribution': if (rv_args[1].__class__.__name__ == 'Invert' and rv_args[1].bijector.__class__.__name__ == 'SoftClip'): distribution = rv_args[0] base_bijector = rv_args[1].bijector rv_constructor = distribution.__class__ rv_kwargs = distribution.parameters rv_args = rv_args[2:] # We were given a value for the transformed RV. Let's pretend it was # for the original. if rv_value is not None: rv_value = base_bijector.forward(rv_value) if (rv_constructor.__name__ == 'Normal' and not rv_name.startswith('y')): # NB: assume everything is kwargs for now. x_loc = rv_kwargs['loc'] x_scale = rv_kwargs['scale'] name = rv_kwargs['name'] a, b, _ = get_or_init(name, loc_shape=tf.shape(x_loc), scale_shape=tf.shape(x_scale), parameterisation_type='scalar') kwargs_std = {} kwargs_std['loc'] = tf.multiply(x_loc, a) kwargs_std['scale'] = tf.pow( x_scale, b) # tf.multiply(x_scale - 1., b) + 1. kwargs_std['name'] = name scale = x_scale / kwargs_std['scale'] # tf.pow(x_scale, 1. - b) shift = x_loc - tf.multiply(scale, kwargs_std['loc']) b = tfb.AffineScalar(scale=scale, shift=shift) if rv_value is not None: rv_value = b.inverse(rv_value) learnable_parameters[name + '_prior_mean'] = tf.convert_to_tensor(x_loc) learnable_parameters[name + '_prior_scale'] = tf.convert_to_tensor( x_scale) # If original RV was constrained, transform the constraint to the new # standardized RV. For now we assume a double-sided constraint. if base_bijector is not None: constraint_std = tfb.SoftClip( low=b.inverse(base_bijector.low), high=b.inverse(base_bijector.high), hinge_softness=base_bijector.hinge_softness / scale if base_bijector.hinge_softness is not None else None) rv_std = edward2.TransformedDistribution( rv_constructor(**kwargs_std), tfb.Invert(constraint_std), value=constraint_std.inverse(rv_value) if rv_value is not None else None) b = b(constraint_std) else: kwargs_std['value'] = rv_value rv_std = interceptable(rv_constructor)(*rv_args, **kwargs_std) bijectors[name] = b return b.forward(rv_std) elif ((rv_constructor.__name__.startswith('MultivariateNormal') or rv_constructor.__name__.startswith('GaussianProcess')) and not rv_kwargs['name'].startswith('y')): name = rv_kwargs['name'] if rv_constructor.__name__.startswith('GaussianProcess'): gp_dist = rv_constructor(*rv_args, **rv_kwargs).distribution X = gp_dist._get_index_points() x_loc = gp_dist.mean_fn(X) x_cov = gp_dist._compute_covariance(index_points=X) else: x_loc = rv_kwargs['loc'] x_cov = rv_kwargs['covariance_matrix'] a, b, c = get_or_init(name, loc_shape=tf.shape(x_loc), scale_shape=tf.shape(x_cov)[:-1], parameterisation_type=parameterisation_type) ndims = tf.shape(x_cov)[-1] x_loc = tf.broadcast_to(x_loc, tf.shape(x_cov)[:-1]) cov_dtype = tf.float64 if FLAGS.float64 else x_cov.dtype x_cov = tf.cast(x_cov, cov_dtype) if parameterisation_type == 'eig': """Extra cost of the eigendecomposition? we do the eig to get Lambda, Q. We rescale Lambda and create the prior dist linop - point one: the prior is an MVN (albeit an efficient one), where in NCP it's just Normal Then we construct the remaining scale matrix. (an n**3 matmul) And unlike a cholesky factor these matrices aren't triangular, so multiplication or division - can we """ Lambda, Q = eigh_with_safe_gradient(x_cov) Lambda = tf.abs(Lambda) Lambda = tf.cast(Lambda, tf.float32) Q = tf.cast(Q, tf.float32) Lambda_hat_b = tf.pow(Lambda, b) if tied_pparams: # If the scale parameterization is in the eigenbasis, # apply it to the mean in the same basis. loc_in_eigenbasis = tf.linalg.matvec(Q, x_loc, adjoint_a=True) reparam_loc = tf.linalg.matvec( Q, tf.multiply(loc_in_eigenbasis, a)) else: reparam_loc = tf.multiply(x_loc, a) kwargs_std = {} kwargs_std['loc'] = reparam_loc kwargs_std['scale'] = LinearOperatorEigenScale( Q, d=tf.sqrt(Lambda_hat_b)) kwargs_std['name'] = name Q_linop = LinearOperatorOrthogonal(Q, det_is_positive=True) scale = tf.linalg.LinearOperatorComposition([ Q_linop, tf.linalg.LinearOperatorDiag(tf.sqrt(Lambda + 1e-10)), tf.linalg.LinearOperatorDiag( 1. / tf.sqrt(Lambda_hat_b + 1e-10)), Q_linop.adjoint(), ]) shift = x_loc - scale.matvec(reparam_loc) b = tfb.AffineLinearOperator(scale=scale, shift=shift) if 'value' in rv_kwargs: kwargs_std['value'] = b.inverse(rv_kwargs['value']) elif parameterisation_type == 'chol': L = tf.linalg.cholesky(x_cov + 1e-6 * tf.eye(ndims, dtype=x_cov.dtype)) L = tf.cast(L, tf.float32) reparam_loc = x_loc * a reparam_scale = tf.linalg.LinearOperatorLowerTriangular( tf.linalg.diag(1 - b) + b[..., tf.newaxis] * L) kwargs_std = {} kwargs_std['loc'] = reparam_loc kwargs_std['scale'] = reparam_scale kwargs_std['name'] = name Dinv = tf.linalg.triangular_solve( tf.cast(reparam_scale.to_dense(), cov_dtype), tf.eye(ndims, dtype=cov_dtype)) Dinv = tf.cast(Dinv, tf.float32) scale = tf.matmul(L, Dinv) shift = x_loc - tf.linalg.matvec(scale, reparam_loc) b = tfb.AffineLinearOperator( scale=tf.linalg.LinearOperatorFullMatrix(scale), shift=shift) if 'value' in rv_kwargs: kwargs_std['value'] = b.inverse(rv_kwargs['value']) elif parameterisation_type == 'indep': # Assumes `C^-1 = diag(c)` is a learned diagonal matrix of 'evidence # precisions'. This approximates the true posterior under an iid # Gaussian observation model: prior_chol = tf.linalg.cholesky(x_cov) prior_inv = tf.linalg.cholesky_solve( prior_chol, tf.eye(ndims, dtype=prior_chol.dtype)) approx_posterior_prec = prior_inv + tf.cast( tf.linalg.diag(c), prior_inv.dtype) approx_posterior_prec_chol = tf.linalg.cholesky( approx_posterior_prec) approx_posterior_cov = tf.linalg.cholesky_solve( approx_posterior_prec_chol, tf.eye(ndims, dtype=approx_posterior_prec_chol.dtype)) cov_chol = tf.linalg.cholesky(approx_posterior_cov) cov_chol = tf.cast(cov_chol, tf.float32) prior_chol = tf.cast(prior_chol, tf.float32) scale_linop = tf.linalg.LinearOperatorLowerTriangular(cov_chol) reparam_loc = x_loc * a reparam_scale = tf.linalg.LinearOperatorComposition([ tf.linalg.LinearOperatorInversion(scale_linop), tf.linalg.LinearOperatorLowerTriangular(prior_chol) ]) kwargs_std = {} kwargs_std['loc'] = reparam_loc kwargs_std['scale'] = reparam_scale kwargs_std['name'] = name shift = x_loc - scale_linop.matvec(reparam_loc) b = tfb.AffineLinearOperator(scale=scale_linop, shift=shift) if 'value' in rv_kwargs: kwargs_std['value'] = b.inverse(rv_kwargs['value']) elif parameterisation_type == 'eigindep': # Combines 'eig' and 'indep' parameterizations, modeling the posterior # as # (V D**(-b) V' + diag(c))^-1 # where VDV' is the eigendecomposition of the prior cov, and b and c # are learned vectors. b, c = [tf.cast(x, cov_dtype) for x in (b, c)] Lambda, Q = eigh_with_safe_gradient(x_cov) Lambda = tf.abs(Lambda) Lambda_hat_b = 1e-6 + tf.pow(Lambda, b) prior = tf.matmul( Q, tf.matmul(tf.linalg.diag(Lambda_hat_b), Q, adjoint_b=True)) prior_chol = tf.linalg.cholesky( prior + 1e-6 * tf.eye(ndims, dtype=prior.dtype)) prior_prec = tf.linalg.cholesky_solve( prior_chol + 1e-6 * tf.eye(ndims, dtype=prior_chol.dtype), tf.eye(ndims, dtype=prior_chol.dtype)) approx_posterior_prec = prior_prec + tf.linalg.diag(c) approx_posterior_prec_chol = tf.linalg.cholesky( approx_posterior_prec) approx_posterior_cov = tf.linalg.cholesky_solve( approx_posterior_prec_chol + 1e-6 * tf.eye(ndims, dtype=approx_posterior_prec_chol.dtype), tf.eye(ndims, dtype=approx_posterior_prec_chol.dtype)) cov_chol = tf.linalg.cholesky( approx_posterior_cov + 1e-6 * tf.eye(ndims, dtype=approx_posterior_cov.dtype)) cov_chol = tf.cast(cov_chol, tf.float32) prior_chol = tf.cast(prior_chol, tf.float32) scale_linop = tf.linalg.LinearOperatorLowerTriangular(cov_chol) reparam_loc = tf.multiply(x_loc, a) reparam_scale = tf.linalg.LinearOperatorComposition([ tf.linalg.LinearOperatorInversion(scale_linop), tf.linalg.LinearOperatorLowerTriangular(prior_chol) ]) kwargs_std = {} kwargs_std['loc'] = reparam_loc kwargs_std['scale'] = reparam_scale kwargs_std['name'] = name shift = x_loc - scale_linop.matvec(reparam_loc) b = tfb.AffineLinearOperator(scale=scale_linop, shift=shift) if 'value' in rv_kwargs: kwargs_std['value'] = b.inverse(rv_kwargs['value']) else: raise Exception('unrecognized reparameterization strategy!') if rv_constructor.__name__.startswith('GaussianProcess'): rv_std = edward2.MultivariateNormalLinearOperator( *rv_args, **kwargs_std) else: rv_std = interceptable(rv_constructor)(*rv_args, **kwargs_std) bijectors[name] = b return b.forward(rv_std) else: return interceptable(rv_constructor)(*rv_args, **rv_kwargs)
def ncp(rv_constructor, *rv_args, **rv_kwargs): base_bijector = None rv_value = rv_kwargs.pop('value', None) if rv_constructor.__name__ == 'TransformedDistribution': if (rv_args[1].__class__.__name__ == 'Invert' and rv_args[1].bijector.__class__.__name__ == 'SoftClip'): distribution = rv_args[0] base_bijector = rv_args[1].bijector rv_constructor = distribution.__class__ rv_kwargs = distribution.parameters rv_args = rv_args[2:] # We were given a value for the transformed RV. Let's pretend it was # for the original. if rv_value is not None: rv_value = base_bijector.forward(rv_value) if (rv_constructor.__name__ == 'Normal' and not rv_kwargs['name'].startswith('y')): loc = rv_kwargs['loc'] scale = rv_kwargs['scale'] name = rv_kwargs['name'] kwargs_std = {} kwargs_std['loc'] = tf.zeros_like(loc) kwargs_std['scale'] = tf.ones_like(scale) kwargs_std['name'] = name + '_std' b = tfb.AffineScalar(scale=scale, shift=loc) if rv_value is not None: rv_value = b.inverse(rv_value) kwargs_std['value'] = rv_value rv_std = interceptable(rv_constructor)(*rv_args, **kwargs_std) return b.forward(rv_std) elif ((rv_constructor.__name__.startswith('MultivariateNormal') or rv_constructor.__name__.startswith('GaussianProcess')) and not rv_kwargs['name'].startswith('y')): name = rv_kwargs['name'] if rv_constructor.__name__.startswith('GaussianProcess'): gp_dist = rv_constructor(*rv_args, **rv_kwargs).distribution X = gp_dist._get_index_points() x_loc = gp_dist.mean_fn(X) x_cov = gp_dist._compute_covariance(index_points=X) shape = tfd.MultivariateNormalFullCovariance(x_loc, x_cov).event_shape else: x_loc = rv_kwargs['loc'] x_cov = rv_kwargs['covariance_matrix'] shape = rv_constructor(*rv_args, **rv_kwargs).shape kwargs_std = {} kwargs_std['loc'] = tf.zeros(shape) kwargs_std['scale_diag'] = tf.ones(shape[0]) kwargs_std['name'] = name + '_std' scale = tf.linalg.cholesky(x_cov + 1e-6 * tf.eye(tf.shape(x_cov)[-1])) b = tfb.AffineLinearOperator( scale=tf.linalg.LinearOperatorLowerTriangular(scale), shift=x_loc) if 'value' in rv_kwargs: kwargs_std['value'] = b.inverse(rv_kwargs['value']) rv_std = edward2.MultivariateNormalDiag(*rv_args, **kwargs_std) return b.forward(rv_std) else: return interceptable(rv_constructor)(*rv_args, **rv_kwargs)