def _model(): p = yield Root(tfd.Beta(dtype(1), dtype(1), name="p")) gamma_C = yield Root(tfd.Beta(dtype(1), dtype(1), name="gamma_C")) gamma_T = yield Root(tfd.Beta(dtype(1), dtype(1), name="gamma_T")) eta_C = yield Root(tfd.Dirichlet(np.ones(K, dtype=dtype) / K, name="eta_C")) eta_T = yield Root(tfd.Dirichlet(np.ones(K, dtype=dtype) / K, name="eta_T")) loc = yield Root(tfd.Sample(tfd.Normal(dtype(0), dtype(1)), sample_shape=K, name="loc")) nu = yield Root(tfd.Sample(tfd.Uniform(dtype(10), dtype(50)), sample_shape=K, name="nu")) phi = yield Root(tfd.Sample(tfd.Normal(dtype(m_phi), dtype(s_phi)), sample_shape=K, name="phi")) sigma_sq = yield Root(tfd.Sample(tfd.InverseGamma(dtype(3), dtype(2)), sample_shape=K, name="sigma_sq")) scale = np.sqrt(sigma_sq) gamma_T_star = compute_gamma_T_star(gamma_C, gamma_T, p) eta_T_star = compute_eta_T_star(gamma_C[..., tf.newaxis], gamma_T[..., tf.newaxis], eta_C, eta_T, p[..., tf.newaxis], gamma_T_star[..., tf.newaxis]) # likelihood y_C = yield mix(nC, eta_C, loc, scale, name="y_C") n0C = yield tfd.Binomial(nC, gamma_C, name="n0C") y_T = yield mix(nT, eta_T_star, loc, scale, name="y_T") n0T = yield tfd.Binomial(nT, gamma_T_star, name="n0T")
def KL_Beta_Binomial(Z_a, Z_b, X_a, X_b): """Calculate KL divergence between Beta distribution and Binomial likelihood See the relationship between Beta function and binomial coefficient: https://en.wikipedia.org/wiki/Beta_function#Properties """ # TODO: introduce sparse matrix _KL = tfd.kl_divergence(tfd.Beta(Z_a, Z_b), tfd.Beta(X_a + 1, X_b + 1)) _diff_binomLik_to_beta = -tf.math.log(X_a + X_b + 1) return _KL + _diff_binomLik_to_beta
def set_prior(self, theta_prior=None, gamma_prior=None, Y_prior=None, Z_prior=None): """Set prior ditributions """ # Prior distributions for the allelic ratio if theta_prior is None: self.theta_prior = tfd.Beta(self.cnv_states[:, 0] + 0.01, self.cnv_states[:, 1] + 0.01) else: self.theta_prior = theta_prior # Prior distributions for the depth ratio if gamma_prior is None: if self.RDR_cov is None: self.set_GP_kernal() self.gamma_prior = FullNormal(loc = self.cnv_states.sum(axis=1), covariance_matrix = self.RDR_cov) else: self.gamma_prior = gamma_prior # Prior distributions for CNV state weights if Y_prior is None: self.Y_prior = tfd.Multinomial(total_count=1, probs=tf.ones((self.Nb, self.Nk, self.Ns)) / self.Ns) else: self.Y_prior = Y_prior # Prior distributions for cell assignment weights if Z_prior is None: self.Z_prior = tfd.Multinomial(total_count=1, probs=tf.ones((self.Nc, self.Nk)) / self.Nk) else: self.Z_prior = Z_prior
def create_dp_sb_gmm(nobs, K, dtype=np.float64): return tfd.JointDistributionNamed( dict( # Mixture means mu=tfd.Independent(tfd.Normal(np.zeros(K, dtype), 3), reinterpreted_batch_ndims=1), # Mixture scales sigma=tfd.Independent(tfd.LogNormal(loc=np.full(K, -2, dtype), scale=0.5), reinterpreted_batch_ndims=1), # Mixture weights (stick-breaking construction) alpha=tfd.Gamma(concentration=np.float64(1.0), rate=10.0), v=lambda alpha: tfd.Independent( # NOTE: Dave Moore suggests doing this instead, to ensure # that a batch dimension in alpha doesn't conflict with # the other parameters. tfd.Beta(np.ones(K - 1, dtype), alpha[..., tf.newaxis]), reinterpreted_batch_ndims=1), # Observations (likelihood) obs=lambda mu, sigma, v: tfd.Sample( tfd.MixtureSameFamily( # This will be marginalized over. mixture_distribution=tfd.Categorical(probs=stickbreak(v)), components_distribution=tfd.Normal(mu, sigma)), sample_shape=nobs)))
def losses(self): """Sum of KL divergences between posteriors and priors""" w_prior = tfd.Dirichlet(tf.ones([self.K])) theta_prior = tfd.Beta([0.1, 3, 9.9], [9.9, 3, 0.1]) return (tf.reduce_sum(tfd.kl_divergence(self.weight, w_prior)) + tf.reduce_sum(tfd.kl_divergence(self.ASR, theta_prior)))
def create_prior(K, a_p=1, b_p=1, a_gamma=1, b_gamma=1, m_loc=0, g_loc=0.1, m_sigma=3, s_sigma=2, m_nu=0, s_nu=1, m_skew=0, g_skew=0.1, dtype=np.float64): return tfd.JointDistributionNamed( dict( p=tfd.Beta(dtype(a_p), dtype(b_p)), gamma_C=tfd.Gamma(dtype(a_gamma), dtype(b_gamma)), gamma_T=tfd.Gamma(dtype(a_gamma), dtype(b_gamma)), eta_C=tfd.Dirichlet(tf.ones(K, dtype=dtype) / K), eta_T=tfd.Dirichlet(tf.ones(K, dtype=dtype) / K), nu=tfd.Sample(tfd.LogNormal(dtype(m_nu), s_nu), sample_shape=K), sigma_sq=tfd.Sample(tfd.InverseGamma(dtype(m_sigma), dtype(s_sigma)), sample_shape=K), loc=lambda sigma_sq: tfd.Independent(tfd.Normal( dtype(m_loc), g_loc * tf.sqrt(sigma_sq)), reinterpreted_batch_ndims=1), skew=lambda sigma_sq: tfd.Independent(tfd.Normal( dtype(m_skew), g_skew * tf.sqrt(sigma_sq)), reinterpreted_batch_ndims=1), ))
def create_model(n_C, n_T, K, neg_inf=-10, dtype=np.float64): return tfd.JointDistributionNamed( dict(p=tfd.Beta(dtype(1), dtype(1)), gamma_C=tfd.Beta(dtype(1), dtype(1)), gamma_T=tfd.Beta(dtype(1), dtype(1)), eta_C=tfd.Dirichlet(tf.ones(K, dtype=dtype) / K), eta_T=tfd.Dirichlet(tf.ones(K, dtype=dtype) / K), loc=tfd.Sample(tfd.Normal(dtype(0), dtype(1)), sample_shape=K), sigma_sq=tfd.Sample(tfd.InverseGamma(dtype(3), dtype(2)), sample_shape=K), y_C=lambda gamma_C, eta_C, loc, sigma_sq: tfd.Sample( mix(gamma_C, eta_C, loc, tf.sqrt(sigma_sq), dtype(neg_inf)), n_C), y_T=lambda gamma_C, gamma_T, eta_C, eta_T, p, loc, sigma_sq: tfd. Sample( mix_T(gamma_C, gamma_T, eta_C, eta_T, p, loc, tf.sqrt( sigma_sq), dtype(neg_inf)), n_T)))
def init_feature_thresholds(features, beta, n_trees, depth): sampler = distributions.Beta(beta, beta) percentiles_q = sampler.sample([n_trees * depth]) flattened_feature_values = tf.map_fn(tf.keras.backend.flatten, features) percentile = stats.percentile(flattened_feature_values, 100 * percentiles_q) feature_thresholds = tf.reshape(percentile, (n_trees, depth)) return feature_thresholds
def __init__(self, theta: tf.Tensor, validate_args: bool = False, name: str = 'bernstein_bijector'): """ Constructs a new instance of a Bernstein polynomial bijector. :param theta: The Bernstein coefficients. :type theta: Tensor :param validate_args: Whether to validate input with asserts. Passed to `super()`. :type validate_args: bool :param name: The name to give Ops created by the initializer. Passed to `super()`. :type name: str """ with tf.name_scope(name) as name: dtype = dtype_util.common_dtype([theta], dtype_hint=tf.float32) self.theta = tensor_util.convert_nonref_to_tensor(theta, dtype=dtype) shape = prefer_static.shape(self.theta) self.order = shape[-1] self.batch_shape = shape[:-1] # Bernstein polynomials of order M, # generated by the M + 1 beta-densities self.beta_dist_h = tfd.Beta( tf.range(1, self.order + 1, dtype=tf.float32), tf.range(self.order, 0, -1, dtype=tf.float32)) # Deviation of the Bernstein polynomials self.beta_dist_h_dash = tfd.Beta( tf.range(1, self.order, dtype=tf.float32), tf.range(self.order - 1, 0, -1, dtype=tf.float32)) # Cubic splines are used to approximate the inverse self.interp = None super().__init__(forward_min_event_ndims=0, validate_args=validate_args, dtype=dtype, name=name)
def __call__(self, inputs: tf.Tensor, *args, **kwargs): super().__call__(inputs, *args, **kwargs) concentrations = tf.exp(inputs) + 1.0 concentrations = tf.split(concentrations, 2, axis=1) class Output(DistributionOutput): def log_prob(self, x: tf.Tensor): rounding_value = np.finfo(x.dtype.as_numpy_dtype).tiny return super().log_prob( tf.clip_by_value(x, rounding_value, 1.0 - rounding_value)) return Output(distributions.Beta(*concentrations), stop_entropy_gradient=True)
def mixup_dataset(dataset: tf.data.Dataset, mixup_alpha: float) -> tf.data.Dataset: dist = tfd.Beta(mixup_alpha, mixup_alpha) def mixup(*batch): """ Augments a batch of samples by overlaying consecutive samples weighted by samples taken from a beta distribution. """ in_batch, out_batch = batch[BATCH_INPUT_INDEX], batch[ BATCH_OUTPUT_INDEX] # last batch in epoch may not be exactly batch_size, get actual _size _size = tf.shape(in_batch[next(iter(in_batch))])[:1] # roll samples for masking [1,2,3] -> [3,1,2] in_roll = {k: tf.roll(in_batch[k], 1, 0) for k in in_batch} out_roll = {k: tf.roll(out_batch[k], 1, 0) for k in out_batch} # sample from beta distribution lambdas = dist.sample(_size) for k in in_batch: # lambdas is shape (_size,), reshape to match rank of tensor for math _dims = [_size, tf.ones(tf.rank(in_batch[k]) - 1, tf.int32)] _shape = tf.concat(_dims, 0) _lambdas = tf.reshape(lambdas, _shape) # augment samples with mixup in_batch[k] = in_batch[k] * _lambdas + in_roll[k] * (1 - _lambdas) for k in out_batch: _dims = [_size, tf.ones(tf.rank(out_batch[k]) - 1, tf.int32)] _shape = tf.concat(_dims, 0) _lambdas = tf.reshape(lambdas, _shape) out_batch[k] = out_batch[k] * _lambdas + out_roll[k] * (1 - _lambdas) return batch dataset = dataset.map(mixup) return dataset
def mix(gamma, eta, loc, scale, neg_inf): _gamma = gamma[..., tf.newaxis] # FIXME: Possible to use tfd.Blockwise? return tfd.Mixture( cat=tfd.Categorical(probs=tf.concat([_gamma, 1 - _gamma], axis=-1)), components=[ tfd.Deterministic(np.float64(neg_inf)), tfd.MixtureSameFamily( mixture_distribution=tfd.Categorical(probs=eta), components_distribution=tfd.Normal(loc=loc, scale=scale)), ]) # TEST: K = 5 gamma = tfd.Beta(dtype(1), 1.).sample() eta = tfd.Dirichlet(tf.ones(K, dtype=dtype) / K).sample() m = mix(gamma, eta, tf.zeros(K, dtype=dtype), tf.ones(K, dtype=dtype), dtype(-10)) s = m.sample(3) m.log_prob(s) # NOTE: # - `Sample` and `Independent` resemble, respectively, `filldist` and `arraydist` in Turing. def create_model(n_C, n_T, K, neg_inf=-10, dtype=np.float64): return tfd.JointDistributionNamed( dict(p=tfd.Beta(dtype(1), dtype(1)), gamma_C=tfd.Beta(dtype(1), dtype(1)), gamma_T=tfd.Beta(dtype(1), dtype(1)), eta_C=tfd.Dirichlet(tf.ones(K, dtype=dtype) / K),
def _base_dist(self, alpha: TensorLike, beta: TensorLike, *args, **kwargs): return tfd.Beta(concentration0=alpha, concentration1=beta, *args, **kwargs)
def ASR(self): """Variational posterior for the binomial rate""" return tfd.Beta(tf.math.exp(self.beta_size[:, 0]), tf.math.exp(self.beta_size[:, 1]))
def PsiDist(self): """Variational Beta distribution for Psi""" return tfd.Beta(self.Z_a, self.Z_b)
# prepend a 0 onto tally of heads and tails, for zeroth flip coin_flip_data = tf.pad( tensor=rv_coin_flip_prior.sample(num_trials[-1]), # flip 2000 times paddings=tf.constant([[ 1, 0, ]]), mode="CONSTANT") # compute cumulative headcounts from 0 to 2000 flips, and then grab them at each of num_trials intervals cumulative_headcounts = tf.gather(params=tf.cumsum(coin_flip_data), indices=num_trials) rv_observed_heads = tfd.Beta(concentration1=tf.cast(1 + cumulative_headcounts, dtype="float32"), concentration0=tf.cast(1 + num_trials - cumulative_headcounts, dtype="float32")) probs_of_heads = tf.linspace(start=0., stop=1., num=100, name="linspace") observed_probs_heads = tf.transpose( rv_observed_heads.prob(probs_of_heads[:, tf.newaxis])) # For the already prepared, I'm using Binomial's conj. prior. plt.figure(figsize=(8, 6)) for i in range(len(num_trials)): sx = plt.subplot(len(num_trials) / 2, 2, i + 1) if i == len(num_trials) - 1: plt.xlabel("$p$, probability of heads") plt.setp(sx.get_yticklabels(), visible=False) plt.plot(probs_of_heads,
def _init_distribution(conditions, **kwargs): concentration0, concentration1 = conditions[ "concentration0"], conditions["concentration1"] return tfd.Beta(concentration0=concentration0, concentration1=concentration1, **kwargs)
def theta(self): """Variational posterior for ASE ratio""" # return tfd.Beta(tf.math.exp(self.theta_s1), tf.math.exp(self.theta_s2)) return tfd.Beta(self.theta_s1, self.theta_s2)