def model_fn(self, likelihood_model, sample_scales): x_global_scale_variance = yield JDCRoot( Independent(tfd.InverseGamma(concentration=0.5, scale=0.5))) x_global_scale_noncentered = yield JDCRoot( Independent(tfd.HalfNormal(scale=1.0))) x_global_scale = x_global_scale_noncentered * tf.sqrt( x_global_scale_variance) x_local1_scale_variance = yield JDCRoot( Independent( tfd.InverseGamma( concentration=tf.fill( [self.num_samples, self.num_features], 0.5), scale=tf.fill([self.num_samples, self.num_features], 0.5)))) x_local1_scale_noncentered = yield JDCRoot( Independent( tfd.HalfNormal( scale=tf.ones([self.num_samples, self.num_features])))) x_local1_scale = x_local1_scale_noncentered * tf.sqrt( x_local1_scale_variance) x_bias = yield JDCRoot( Independent( tfd.Normal(loc=tf.fill([self.num_features], np.float32(self.x_bias_loc0)), scale=np.float32(self.x_bias_scale0)))) x = yield Independent( tfd.Normal(loc=x_bias - sample_scales, scale=x_local1_scale * x_global_scale)) yield from likelihood_model(x)
def test_invalid_model_spec_raises_error(self): observed_time_series = tf.ones([2]) design_matrix = tf.eye(2) with self.assertRaisesRegexp(ValueError, 'Weights prior must be a univariate normal'): gibbs_sampler.build_model_for_gibbs_fitting( observed_time_series, design_matrix=design_matrix, weights_prior=tfd.StudentT(df=10, loc=0., scale=1.), level_variance_prior=tfd.InverseGamma(0.01, 0.01), observation_noise_variance_prior=tfd.InverseGamma(0.01, 0.01)) with self.assertRaisesRegexp( ValueError, 'Level variance prior must be an inverse gamma'): gibbs_sampler.build_model_for_gibbs_fitting( observed_time_series, design_matrix=design_matrix, weights_prior=tfd.Normal(loc=0., scale=1.), level_variance_prior=tfd.LogNormal(0., 3.), observation_noise_variance_prior=tfd.InverseGamma(0.01, 0.01)) with self.assertRaisesRegexp( ValueError, 'noise variance prior must be an inverse gamma'): gibbs_sampler.build_model_for_gibbs_fitting( observed_time_series, design_matrix=design_matrix, weights_prior=tfd.Normal(loc=0., scale=1.), level_variance_prior=tfd.InverseGamma(0.01, 0.01), observation_noise_variance_prior=tfd.LogNormal(0., 3.))
def test_sampled_scale_follows_correct_distribution(self): strm = test_util.test_seed_stream() prior = tfd.InverseGamma(concentration=0.1, scale=0.1) num_timesteps = 100 observed_samples = tf.random.normal([2, num_timesteps], seed=strm()) * 3. is_missing = tf.random.uniform([2, num_timesteps], seed=strm()) > 0.9 # Check that posterior variance samples have the moments of the correct # InverseGamma distribution. posterior_scale_samples = parallel_for.pfor( lambda i: gibbs_sampler._resample_scale( # pylint: disable=g-long-lambda prior=prior, observed_residuals=observed_samples, is_missing=is_missing, seed=strm()), 10000) concentration = prior.concentration + tf.reduce_sum( 1 - tf.cast(is_missing, tf.float32), axis=-1)/2. scale = prior.scale + tf.reduce_sum( (observed_samples * tf.cast(~is_missing, tf.float32))**2, axis=-1)/2. posterior_scale_samples_, concentration_, scale_ = self.evaluate( (posterior_scale_samples, concentration, scale)) self.assertAllClose(np.mean(posterior_scale_samples_**2, axis=0), scale_ / (concentration_ - 1), atol=0.05) self.assertAllClose( np.std(posterior_scale_samples_**2, axis=0), scale_ / ((concentration_ - 1) * np.sqrt(concentration_ - 2)), atol=0.05)
def _model(): p = yield Root(tfd.Beta(dtype(1), dtype(1), name="p")) gamma_C = yield Root(tfd.Beta(dtype(1), dtype(1), name="gamma_C")) gamma_T = yield Root(tfd.Beta(dtype(1), dtype(1), name="gamma_T")) eta_C = yield Root(tfd.Dirichlet(np.ones(K, dtype=dtype) / K, name="eta_C")) eta_T = yield Root(tfd.Dirichlet(np.ones(K, dtype=dtype) / K, name="eta_T")) loc = yield Root(tfd.Sample(tfd.Normal(dtype(0), dtype(1)), sample_shape=K, name="loc")) nu = yield Root(tfd.Sample(tfd.Uniform(dtype(10), dtype(50)), sample_shape=K, name="nu")) phi = yield Root(tfd.Sample(tfd.Normal(dtype(m_phi), dtype(s_phi)), sample_shape=K, name="phi")) sigma_sq = yield Root(tfd.Sample(tfd.InverseGamma(dtype(3), dtype(2)), sample_shape=K, name="sigma_sq")) scale = np.sqrt(sigma_sq) gamma_T_star = compute_gamma_T_star(gamma_C, gamma_T, p) eta_T_star = compute_eta_T_star(gamma_C[..., tf.newaxis], gamma_T[..., tf.newaxis], eta_C, eta_T, p[..., tf.newaxis], gamma_T_star[..., tf.newaxis]) # likelihood y_C = yield mix(nC, eta_C, loc, scale, name="y_C") n0C = yield tfd.Binomial(nC, gamma_C, name="n0C") y_T = yield mix(nT, eta_T_star, loc, scale, name="y_T") n0T = yield tfd.Binomial(nT, gamma_T_star, name="n0T")
def create_prior(K, a_p=1, b_p=1, a_gamma=1, b_gamma=1, m_loc=0, g_loc=0.1, m_sigma=3, s_sigma=2, m_nu=0, s_nu=1, m_skew=0, g_skew=0.1, dtype=np.float64): return tfd.JointDistributionNamed( dict( p=tfd.Beta(dtype(a_p), dtype(b_p)), gamma_C=tfd.Gamma(dtype(a_gamma), dtype(b_gamma)), gamma_T=tfd.Gamma(dtype(a_gamma), dtype(b_gamma)), eta_C=tfd.Dirichlet(tf.ones(K, dtype=dtype) / K), eta_T=tfd.Dirichlet(tf.ones(K, dtype=dtype) / K), nu=tfd.Sample(tfd.LogNormal(dtype(m_nu), s_nu), sample_shape=K), sigma_sq=tfd.Sample(tfd.InverseGamma(dtype(m_sigma), dtype(s_sigma)), sample_shape=K), loc=lambda sigma_sq: tfd.Independent(tfd.Normal( dtype(m_loc), g_loc * tf.sqrt(sigma_sq)), reinterpreted_batch_ndims=1), skew=lambda sigma_sq: tfd.Independent(tfd.Normal( dtype(m_skew), g_skew * tf.sqrt(sigma_sq)), reinterpreted_batch_ndims=1), ))
def _build_test_model(self, num_timesteps=5, num_features=2, batch_shape=(), missing_prob=0, true_noise_scale=0.1, true_level_scale=0.04, dtype=tf.float32): seed = test_util.test_seed(sampler_type='stateless') (design_seed, weights_seed, noise_seed, level_seed, is_missing_seed) = samplers.split_seed(seed, 5, salt='_build_test_model') design_matrix = samplers.normal([num_timesteps, num_features], dtype=dtype, seed=design_seed) weights = samplers.normal(list(batch_shape) + [num_features], dtype=dtype, seed=weights_seed) regression = tf.linalg.matvec(design_matrix, weights) noise = samplers.normal(list(batch_shape) + [num_timesteps], dtype=dtype, seed=noise_seed) * true_noise_scale level = tf.cumsum(samplers.normal(list(batch_shape) + [num_timesteps], dtype=dtype, seed=level_seed) * true_level_scale, axis=-1) time_series = (regression + noise + level) is_missing = samplers.uniform(list(batch_shape) + [num_timesteps], dtype=dtype, seed=is_missing_seed) < missing_prob model = gibbs_sampler.build_model_for_gibbs_fitting( observed_time_series=tfp.sts.MaskedTimeSeries( time_series[..., tf.newaxis], is_missing), design_matrix=design_matrix, weights_prior=tfd.Normal(loc=tf.cast(0., dtype), scale=tf.cast(10.0, dtype)), level_variance_prior=tfd.InverseGamma( concentration=tf.cast(0.01, dtype), scale=tf.cast(0.01 * 0.01, dtype)), observation_noise_variance_prior=tfd.InverseGamma( concentration=tf.cast(0.01, dtype), scale=tf.cast(0.01 * 0.01, dtype))) return model, time_series, is_missing
def __call__(self): """Get the distribution object from the backend""" if get_backend() == 'pytorch': import torch.distributions as tod raise NotImplementedError else: from tensorflow_probability import distributions as tfd return tfd.InverseGamma(self.concentration, self.scale)
def test_noise_variance_posterior_matches_expected(self): # Generate a synthetic regression task. num_features = 5 num_outputs = 20 design_matrix, _, targets = self.evaluate( self._random_regression_task(num_features=num_features, num_outputs=num_outputs, batch_shape=[2], seed=test_util.test_seed())) observation_noise_variance_prior_concentration = 0.03 observation_noise_variance_prior_scale = 0.015 # Posterior on noise variance if all weights are zero. naive_posterior = tfd.InverseGamma( concentration=(observation_noise_variance_prior_concentration + num_outputs / 2.), scale=(observation_noise_variance_prior_scale + tf.reduce_sum(tf.square(targets), axis=-1) / 2.)) # Compare to sampler with weights constrained to near-zero. # We can do this by reducing the width of the slab (here), # or by reducing the probability of the slab (below). Both should give # equivalent noise posteriors. tight_slab_sampler = spike_and_slab.SpikeSlabSampler( design_matrix, weights_prior_precision=tf.eye(num_features) * 1e6, observation_noise_variance_prior_concentration=( observation_noise_variance_prior_concentration), observation_noise_variance_prior_scale=( observation_noise_variance_prior_scale)) self.assertAllClose( tight_slab_sampler. observation_noise_variance_posterior_concentration, naive_posterior.concentration) self.assertAllClose(tight_slab_sampler._initialize_sampler_state( targets=targets, nonzeros=tf.ones( [num_features], dtype=tf.bool)).observation_noise_variance_posterior_scale, naive_posterior.scale, atol=1e-2) downweighted_slab_sampler = spike_and_slab.SpikeSlabSampler( design_matrix, observation_noise_variance_prior_concentration=( observation_noise_variance_prior_concentration), observation_noise_variance_prior_scale=( observation_noise_variance_prior_scale)) self.assertAllClose( (downweighted_slab_sampler. observation_noise_variance_posterior_concentration), naive_posterior.concentration) self.assertAllClose( downweighted_slab_sampler._initialize_sampler_state( targets=targets, nonzeros=tf.zeros( [num_features], dtype=tf.bool)).observation_noise_variance_posterior_scale, naive_posterior.scale)
def __call__(self): """Get the distribution object from the backend""" if get_backend() == 'pytorch': import torch import torch.distributions as tod return tod.transformed_distribution.TransformedDistribution( tod.gamma.Gamma(self['concentration'], self['scale']), tod.transforms.PowerTransform(torch.tensor([-1.]))) # TODO: mean isn't implemented else: from tensorflow_probability import distributions as tfd return tfd.InverseGamma(self['concentration'], self['scale'])
def mean_variance_model( weights, concentration_c, scale_c): concentration = tf.reduce_sum( tf.expand_dims(concentration_c, -1) * weights, axis=-2) scale = tf.reduce_sum( tf.expand_dims(scale_c, -1) * weights, axis=-2) return tfd.InverseGamma( concentration=concentration, scale=scale)
def create_model(n_C, n_T, K, neg_inf=-10, dtype=np.float64): return tfd.JointDistributionNamed( dict(p=tfd.Beta(dtype(1), dtype(1)), gamma_C=tfd.Gamma(dtype(3), dtype(3)), gamma_T=tfd.Gamma(dtype(3), dtype(3)), eta_C=tfd.Dirichlet(tf.ones(K, dtype=dtype) / K), eta_T=tfd.Dirichlet(tf.ones(K, dtype=dtype) / K), loc=tfd.Sample(tfd.Normal(dtype(0), dtype(1)), sample_shape=K), sigma_sq=tfd.Sample(tfd.InverseGamma(dtype(3), dtype(2)), sample_shape=K), y_C=lambda gamma_C, eta_C, loc, sigma_sq: mix( gamma_C, eta_C, loc, tf.sqrt(sigma_sq), dtype(neg_inf), n_C), y_T=lambda gamma_C, gamma_T, eta_C, eta_T, p, loc, sigma_sq: mix_T(gamma_C, gamma_T, eta_C, eta_T, p, loc, tf.sqrt(sigma_sq), dtype(neg_inf), n_T)))
def __init__(self, data, options): self.kernel = options.kernel self.options = options self.τ = data.τ self.N_p = data.τ.shape[0] self.num_tfs = data.f_obs.shape[1] t_1, t_2 = get_time_square(self.τ, self.N_p) self.t_dist = t_1 - t_2 self.tt = t_1 * t_2 self.t2 = tf.square(t_1) self.tprime2 = tf.square(t_2) self.fixed_dist = FixedDistribution( tf.ones(self.num_tfs, dtype='float64')) min_dist = min(data.t[1:] - data.t[:-1]) min_dist = max(min_dist, 1.) self._ranges = { 'rbf': [ (f64(1e-4), f64(5)), #1+max(np.var(data.f_obs, axis=2)) (f64(min_dist**2) - 1.2, f64(data.t[-1]**2)) ], 'mlp': [(f64(1), f64(10)), (f64(3.5), f64(20))], } self._priors = { 'rbf': [ tfd.Uniform(f64(1), f64(20)), tfd.Uniform(f64(min_dist**2), f64(10)) ], 'mlp': [ tfd.Uniform(f64(3.5), f64(10)), tfd.InverseGamma(f64(0.01), f64(0.01)) ], } v_prop = lambda v: tfd.TruncatedNormal(v, 0.007, low=0, high=100) l2_prop = lambda l2: tfd.TruncatedNormal(l2, 0.007, low=0, high=100) proposals = [v_prop, l2_prop] self._proposals = { 'rbf': proposals, } self._names = {'rbf': ['v', 'l2'], 'mlp': ['w', 'b']}
def __init__(self, data: DataHolder, options: Options): self.data = data self.samples = None self.N_p = data.τ.shape[0] self.N_m = data.t.shape[0] # Number of observations self.num_tfs = data.f_obs.shape[1] # Number of TFs self.num_genes = data.m_obs.shape[1] self.num_replicates = data.m_obs.shape[0] self.likelihood = TranscriptionLikelihood(data, options) self.options = options self.kernel_selector = GPKernelSelector(data, options) self.state_indices = {} step_sizes = self.options.initial_step_sizes logistic_step_size = step_sizes['nuts'] if 'nuts' in step_sizes else 0.00001 # Latent function & GP hyperparameters kernel_initial = self.kernel_selector.initial_params() f_step_size = step_sizes['latents'] if 'latents' in step_sizes else 20 latents_kernel = LatentKernel(data, options, self.likelihood, self.kernel_selector, self.state_indices, f_step_size*tf.ones(self.N_p, dtype='float64')) latents_initial = 0.3*tf.ones((self.num_replicates, self.num_tfs, self.N_p), dtype='float64') if self.options.joint_latent: latents_initial = [latents_initial, *kernel_initial] latents = KernelParameter('latents', self.fbar_prior, latents_initial, kernel=latents_kernel, requires_all_states=False) # White noise for genes if not options.preprocessing_variance: def m_sq_diff_fn(all_states): fbar, k_fbar, kbar, wbar, w_0bar, σ2_m, Δ = self.likelihood.get_parameters_from_state(all_states, self.state_indices) m_pred = self.likelihood.predict_m(kbar, k_fbar, wbar, fbar, w_0bar, Δ) sq_diff = tfm.square(self.data.m_obs - tf.transpose(tf.gather(tf.transpose(m_pred),self.data.common_indices))) return tf.reduce_sum(sq_diff, axis=0) σ2_m_kernel = GibbsKernel(data, options, self.likelihood, tfd.InverseGamma(f64(0.01), f64(0.01)), self.state_indices, m_sq_diff_fn) σ2_m = KernelParameter('σ2_m', None, 1e-3*tf.ones((self.num_genes, 1), dtype='float64'), kernel=σ2_m_kernel) else: def σ2_m_log_prob(all_states): def σ2_m_log_prob_fn(σ2_mstar): # tf.print('starr:', logit(σ2_mstar)) new_prob = self.likelihood.genes( all_states=all_states, state_indices=self.state_indices, σ2_m=σ2_mstar ) + self.params.σ2_m.prior.log_prob(logit(σ2_mstar)) # tf.print('prob', tf.reduce_sum(new_prob)) return tf.reduce_sum(new_prob) return σ2_m_log_prob_fn σ2_m = KernelParameter('σ2_m', LogisticNormal(f64(1e-5), f64(1e-2)), # f64(max(np.var(data.f_obs, axis=1))) logistic(f64(5e-3))*tf.ones(self.num_genes, dtype='float64'), hmc_log_prob=σ2_m_log_prob, requires_all_states=True, step_size=logistic_step_size) kernel_params = None if not self.options.joint_latent: # GP kernel def kernel_params_log_prob(all_states): def kernel_params_log_prob(param_0bar, param_1bar): param_0 = logit(param_0bar, nan_replace=self.params.kernel_params.prior[0].b) param_1 = logit(param_1bar, nan_replace=self.params.kernel_params.prior[1].b) new_prob = tf.reduce_sum(self.params.latents.prior( all_states[self.state_indices['latents']], param_0bar, param_1bar)) new_prob += self.params.kernel_params.prior[0].log_prob(param_0) new_prob += self.params.kernel_params.prior[1].log_prob(param_1) return tf.reduce_sum(new_prob) return kernel_params_log_prob kernel_initial = self.kernel_selector.initial_params() kernel_ranges = self.kernel_selector.ranges() kernel_params = KernelParameter('kernel_params', [LogisticNormal(*kernel_ranges[0]), LogisticNormal(*kernel_ranges[1])], [logistic(k) for k in kernel_initial], step_size=0.1*logistic_step_size, hmc_log_prob=kernel_params_log_prob, requires_all_states=True) # Kinetic parameters & Interaction weights w_prior = LogisticNormal(f64(-2), f64(2)) w_initial = logistic(1*tf.ones((self.num_genes, self.num_tfs), dtype='float64')) w_0_prior = LogisticNormal(f64(-0.8), f64(0.8)) w_0_initial = logistic(0*tf.ones(self.num_genes, dtype='float64')) def weights_log_prob(all_states): def weights_log_prob_fn(wbar, w_0bar): # tf.print((wbar)) new_prob = tf.reduce_sum(self.params.weights.prior[0].log_prob((wbar))) new_prob += tf.reduce_sum(self.params.weights.prior[1].log_prob((w_0bar))) new_prob += tf.reduce_sum(self.likelihood.genes( all_states=all_states, state_indices=self.state_indices, wbar=wbar, w_0bar=w_0bar )) # tf.print(new_prob) return new_prob return weights_log_prob_fn weights_kernel = RWMWrapperKernel(weights_log_prob, new_state_fn=tfp.mcmc.random_walk_normal_fn(scale=0.08)) weights = KernelParameter( 'weights', [w_prior, w_0_prior], [w_initial, w_0_initial], hmc_log_prob=weights_log_prob, step_size=10*logistic_step_size, requires_all_states=True) #TODO kernel=weights_kernel num_kin = 4 if self.options.initial_conditions else 3 kbar_initial = 0.8*tf.ones((self.num_genes, num_kin), dtype='float64') def kbar_log_prob(all_states): def kbar_log_prob_fn(*args): #kbar, k_fbar, wbar, w_0bar index = 0 kbar = args[index] new_prob = 0 k_m =logit(kbar) if self.options.kinetic_exponential: k_m = tf.exp(k_m) # tf.print(k_m) lik_args = {'kbar': kbar} new_prob += tf.reduce_sum(self.params.kinetics.prior[index].log_prob(k_m)) # tf.print('kbar', new_prob) if options.translation: index += 1 k_fbar = args[index] lik_args['k_fbar'] = k_fbar kfprob = tf.reduce_sum(self.params.kinetics.prior[index].log_prob(logit(k_fbar))) new_prob += kfprob if options.weights: index += 1 wbar = args[index] w_0bar = args[index+1] new_prob += tf.reduce_sum(self.params.weights.prior[0].log_prob((wbar))) new_prob += tf.reduce_sum(self.params.weights.prior[1].log_prob((w_0bar))) lik_args['wbar'] = wbar lik_args['w_0bar'] = w_0bar new_prob += tf.reduce_sum(self.likelihood.genes( all_states=all_states, state_indices=self.state_indices, **lik_args )) return tf.reduce_sum(new_prob) return kbar_log_prob_fn k_fbar_initial = 0.8*tf.ones((self.num_tfs,), dtype='float64') kinetics_initial = [kbar_initial] kinetics_priors = [LogisticNormal(0.01, 30)] if options.translation: kinetics_initial += [k_fbar_initial] kinetics_priors += [LogisticNormal(0.1, 7)] if options.weights: kinetics_initial += [w_initial, w_0_initial] kinetics = KernelParameter( 'kinetics', kinetics_priors, kinetics_initial, hmc_log_prob=kbar_log_prob, step_size=logistic_step_size, requires_all_states=True) delta_kernel = DelayKernel(self.likelihood, 0, 10, self.state_indices, tfd.Exponential(f64(0.3))) Δ = KernelParameter('Δ', tfd.InverseGamma(f64(0.01), f64(0.01)), 0.6*tf.ones(self.num_tfs, dtype='float64'), kernel=delta_kernel, requires_all_states=False) σ2_f = None if not options.preprocessing_variance: def f_sq_diff_fn(all_states): f_pred = inverse_positivity(all_states[self.state_indices['latents']][0]) sq_diff = tfm.square(self.data.f_obs - tf.transpose(tf.gather(tf.transpose(f_pred),self.data.common_indices))) return tf.reduce_sum(sq_diff, axis=0) kernel = GibbsKernel(data, options, self.likelihood, tfd.InverseGamma(f64(0.01), f64(0.01)), self.state_indices, f_sq_diff_fn) σ2_f = KernelParameter('σ2_f', None, 1e-4*tf.ones((self.num_tfs,1), dtype='float64'), kernel=kernel) self.params = Params(latents, weights, kinetics, Δ, kernel_params, σ2_m, σ2_f) self.active_params = [ self.params.kinetics, self.params.latents, self.params.σ2_m, ] # if options.weights: # self.active_params += [self.params.weights] if not options.joint_latent: self.active_params += [self.params.kernel_params] if not options.preprocessing_variance: self.active_params += [self.params.σ2_f] if options.delays: self.active_params += [self.params.Δ] self.state_indices.update({ param.name: i for i, param in enumerate(self.active_params) })
def _init_distribution(conditions, **kwargs): concentration, scale = conditions["concentration"], conditions["scale"] return tfd.InverseGamma(concentration=concentration, scale=scale, **kwargs)
def create_distributions(self): """Create distribution objects """ self.bijectors = { 'u': tfb.Softplus(), 'v': tfb.Softplus(), 'u_eta': tfb.Softplus(), 'u_tau': tfb.Softplus(), 's': tfb.Softplus(), 's_eta': tfb.Softplus(), 's_tau': tfb.Softplus(), 'w': tfb.Softplus() } symmetry_breaking_decay = self.symmetry_breaking_decay**tf.cast( tf.range(self.latent_dim), self.dtype)[tf.newaxis, ...] distribution_dict = { 'v': tfd.Independent(tfd.HalfNormal(scale=0.1 * tf.ones( (self.latent_dim, self.feature_dim), dtype=self.dtype)), reinterpreted_batch_ndims=2), 'w': tfd.Independent(tfd.HalfNormal( scale=tf.ones((1, self.feature_dim), dtype=self.dtype)), reinterpreted_batch_ndims=2) } if self.horseshoe_plus: distribution_dict = { **distribution_dict, 'u': lambda u_eta, u_tau: tfd.Independent(tfd.HalfNormal( scale=u_eta * u_tau * symmetry_breaking_decay), reinterpreted_batch_ndims= 2), 'u_eta': tfd.Independent(tfd.HalfCauchy( loc=tf.zeros((self.feature_dim, self.latent_dim), dtype=self.dtype), scale=tf.ones((self.feature_dim, self.latent_dim), dtype=self.dtype)), reinterpreted_batch_ndims=2), 'u_tau': tfd.Independent(tfd.HalfCauchy( loc=tf.zeros((1, self.latent_dim), dtype=self.dtype), scale=tf.ones((1, self.latent_dim), dtype=self.dtype) * self.u_tau_scale), reinterpreted_batch_ndims=2), } distribution_dict['s'] = lambda s_eta, s_tau: tfd.Independent( tfd.HalfNormal(scale=s_eta * s_tau), reinterpreted_batch_ndims=2) distribution_dict['s_eta'] = tfd.Independent( tfd.HalfCauchy(loc=tf.zeros((2, self.feature_dim), dtype=self.dtype), scale=tf.ones((2, self.feature_dim), dtype=self.dtype)), reinterpreted_batch_ndims=2) distribution_dict['s_tau'] = tfd.Independent( tfd.HalfCauchy(loc=tf.zeros((1, self.feature_dim), dtype=self.dtype), scale=tf.ones( (1, self.feature_dim), dtype=self.dtype) * self.s_tau_scale), reinterpreted_batch_ndims=2) self.bijectors['u_eta_a'] = tfb.Softplus() self.bijectors['u_tau_a'] = tfb.Softplus() self.bijectors['s_eta_a'] = tfb.Softplus() self.bijectors['s_tau_a'] = tfb.Softplus() distribution_dict['u_eta'] = lambda u_eta_a: tfd.Independent( SqrtInverseGamma(concentration=0.5 * tf.ones( (self.feature_dim, self.latent_dim), dtype=self.dtype), scale=1.0 / u_eta_a), reinterpreted_batch_ndims=2) distribution_dict['u_eta_a'] = tfd.Independent( tfd.InverseGamma(concentration=0.5 * tf.ones( (self.feature_dim, self.latent_dim), dtype=self.dtype), scale=tf.ones( (self.feature_dim, self.latent_dim), dtype=self.dtype)), reinterpreted_batch_ndims=2) distribution_dict['u_tau'] = lambda u_tau_a: tfd.Independent( SqrtInverseGamma(concentration=0.5 * tf.ones( (1, self.latent_dim), dtype=self.dtype), scale=1.0 / u_tau_a), reinterpreted_batch_ndims=2) distribution_dict['u_tau_a'] = tfd.Independent( tfd.InverseGamma(concentration=0.5 * tf.ones( (1, self.latent_dim), dtype=self.dtype), scale=tf.ones( (1, self.latent_dim), dtype=self.dtype) / self.u_tau_scale**2), reinterpreted_batch_ndims=2) distribution_dict['s_eta'] = lambda s_eta_a: tfd.Independent( SqrtInverseGamma(concentration=0.5 * tf.ones( (2, self.feature_dim), dtype=self.dtype), scale=1.0 / s_eta_a), reinterpreted_batch_ndims=2) distribution_dict['s_eta_a'] = tfd.Independent( tfd.InverseGamma(concentration=0.5 * tf.ones( (2, self.feature_dim), dtype=self.dtype), scale=tf.ones((2, self.feature_dim), dtype=self.dtype)), reinterpreted_batch_ndims=2) distribution_dict['s_tau'] = lambda s_tau_a: tfd.Independent( SqrtInverseGamma(concentration=0.5 * tf.ones( (1, self.feature_dim), dtype=self.dtype), scale=1.0 / s_tau_a), reinterpreted_batch_ndims=2) distribution_dict['s_tau_a'] = tfd.Independent( tfd.InverseGamma(concentration=0.5 * tf.ones( (1, self.feature_dim), dtype=self.dtype), scale=tf.ones( (1, self.feature_dim), dtype=self.dtype) / self.s_tau_scale**2), reinterpreted_batch_ndims=2) else: distribution_dict = { **distribution_dict, 'u': tfd.Independent( AbsHorseshoe( scale=(self.u_tau_scale * symmetry_breaking_decay * tf.ones((self.feature_dim, self.latent_dim), dtype=self.dtype)), reinterpreted_batch_ndims=2)), 's': tfd.Independent(AbsHorseshoe( scale=self.s_tau_scale * tf.ones((1, self.feature_dim), dtype=self.dtype)), reinterpreted_batch_ndims=2) } self.prior_distribution = tfd.JointDistributionNamed(distribution_dict) surrogate_dict = { 'v': self.bijectors['v'](build_trainable_normal_dist( -6. * tf.ones( (self.latent_dim, self.feature_dim), dtype=self.dtype), 5e-4 * tf.ones( (self.latent_dim, self.feature_dim), dtype=self.dtype), 2, strategy=self.strategy)), 'w': self.bijectors['w'](build_trainable_normal_dist( -6 * tf.ones((1, self.feature_dim), dtype=self.dtype), 5e-4 * tf.ones((1, self.feature_dim), dtype=self.dtype), 2, strategy=self.strategy)) } if self.horseshoe_plus: surrogate_dict = { **surrogate_dict, 'u': self.bijectors['u'](build_trainable_normal_dist( -6. * tf.ones( (self.feature_dim, self.latent_dim), dtype=self.dtype), 5e-4 * tf.ones( (self.feature_dim, self.latent_dim), dtype=self.dtype), 2, strategy=self.strategy)), 'u_eta': self.bijectors['u_eta'](build_trainable_InverseGamma_dist( 3 * tf.ones( (self.feature_dim, self.latent_dim), dtype=self.dtype), tf.ones((self.feature_dim, self.latent_dim), dtype=self.dtype), 2, strategy=self.strategy)), 'u_tau': self.bijectors['u_tau'](build_trainable_InverseGamma_dist( 3 * tf.ones((1, self.latent_dim), dtype=self.dtype), tf.ones((1, self.latent_dim), dtype=self.dtype), 2, strategy=self.strategy)), } surrogate_dict['s_eta'] = self.bijectors['s_eta']( build_trainable_InverseGamma_dist(tf.ones( (2, self.feature_dim), dtype=self.dtype), tf.ones( (2, self.feature_dim), dtype=self.dtype), 2, strategy=self.strategy)) surrogate_dict['s_tau'] = self.bijectors['s_tau']( build_trainable_InverseGamma_dist(1 * tf.ones( (1, self.feature_dim), dtype=self.dtype), tf.ones( (1, self.feature_dim), dtype=self.dtype), 2, strategy=self.strategy)) surrogate_dict['s'] = self.bijectors['s']( build_trainable_normal_dist( tf.ones((2, self.feature_dim), dtype=self.dtype) * tf.cast([[-2.], [-1.]], dtype=self.dtype), 1e-3 * tf.ones((2, self.feature_dim), dtype=self.dtype), 2, strategy=self.strategy)) self.bijectors['u_eta_a'] = tfb.Softplus() self.bijectors['u_tau_a'] = tfb.Softplus() surrogate_dict['u_eta_a'] = self.bijectors['u_eta_a']( build_trainable_InverseGamma_dist( 2. * tf.ones( (self.feature_dim, self.latent_dim), dtype=self.dtype), tf.ones((self.feature_dim, self.latent_dim), dtype=self.dtype), 2, strategy=self.strategy)) surrogate_dict['u_tau_a'] = self.bijectors['u_tau_a']( build_trainable_InverseGamma_dist( 2. * tf.ones((1, self.latent_dim), dtype=self.dtype), tf.ones((1, self.latent_dim), dtype=self.dtype) / self.u_tau_scale**2, 2, strategy=self.strategy)) self.bijectors['s_eta_a'] = tfb.Softplus() self.bijectors['s_tau_a'] = tfb.Softplus() surrogate_dict['s_eta_a'] = self.bijectors['s_eta_a']( build_trainable_InverseGamma_dist(2. * tf.ones( (2, self.feature_dim), dtype=self.dtype), tf.ones( (2, self.feature_dim), dtype=self.dtype), 2, strategy=self.strategy)) surrogate_dict['s_tau_a'] = self.bijectors['s_tau_a']( build_trainable_InverseGamma_dist( 2. * tf.ones((1, self.feature_dim), dtype=self.dtype), (tf.ones((1, self.feature_dim), dtype=self.dtype) / self.s_tau_scale**2), 2, strategy=self.strategy)) else: surrogate_dict = { **surrogate_dict, 's': self.bijectors['s'](build_trainable_normal_dist( tf.ones((2, self.feature_dim), dtype=self.dtype) * tf.cast([[-2.], [-1.]], dtype=self.dtype), 1e-3 * tf.ones((2, self.feature_dim), dtype=self.dtype), 2, strategy=self.strategy)), 'u': self.bijectors['u'](build_trainable_normal_dist( -9. * tf.ones( (self.feature_dim, self.latent_dim), dtype=self.dtype), 5e-4 * tf.ones( (self.feature_dim, self.latent_dim), dtype=self.dtype), 2, strategy=self.strategy)) } self.surrogate_distribution = tfd.JointDistributionNamed( surrogate_dict) self.surrogate_vars = self.surrogate_distribution.variables self.var_list = list(surrogate_dict.keys()) self.set_calibration_expectations()
def iterate(self): params = self.params # Compute likelihood for comparison old_m_likelihood, sq_diff_m = self.likelihood.genes( params, return_sq_diff=True) old_f_likelihood = 0 if self.options.tf_mrna_present: old_f_likelihood, sq_diff_f = self.likelihood.tfs( params, params.fbar.value, return_sq_diff=True) # Untransformed tf mRNA vectors F (Step 1) fbar = params.fbar.value # Gibbs step z_i = tfd.MultivariateNormalDiag(fbar, self.h_f).sample() # MH m, K = self.fbar_prior_params(params.V.value, params.L.value) for r in range(self.num_replicates): for i in range(self.num_tfs ): # TODO does not really work for multiple TFs invKsigmaK = tf.matmul( tf.linalg.inv(K + tf.linalg.diag(self.h_f)), K) # (C_i + hI)C_i L = jitter_cholesky(K - tf.matmul(K, invKsigmaK)) c_mu = tf.linalg.matvec(invKsigmaK, z_i[r][i]) fstar_i = tf.matmul( tf.random.normal( (1, L.shape[0]), dtype='float64'), L) + c_mu # fstar_i = tf.reshape(fstar, (-1, )) mask = np.zeros((self.num_replicates, 1, 1), dtype='float64') mask[r] = 1 fstar = (1 - mask) * fbar + mask * fstar_i new_m_likelihood = self.likelihood.genes(params, fbar=fstar) new_f_likelihood = 0 if self.options.tf_mrna_present: new_f_likelihood = self.likelihood.tfs(params, fstar) new_prob = np.sum(new_m_likelihood) + np.sum(new_f_likelihood) old_prob = np.sum(old_m_likelihood) + np.sum(old_f_likelihood) if self.is_accepted(new_prob, old_prob): params.fbar.value[r] = fstar_i old_m_likelihood = new_m_likelihood old_f_likelihood = new_f_likelihood self.acceptance_rates['fbar'] += 1 / (self.num_tfs * self.num_replicates) if self.options.tf_mrna_present: # (Step 2) # Log of translation ODE degradation rates δbar = params.δbar.value for i in range(self.num_tfs): # TODO make for self.num_tfs > 1 # Proposal distribution δstar = params.δbar.propose( δbar) # δstar is in log-space, i.e. δstar = δbar* new_prob = np.sum(self.likelihood.genes( params, δbar=δstar)) + params.δbar.prior.log_prob(δstar) old_prob = np.sum( old_m_likelihood) + params.δbar.prior.log_prob(δbar) # print(δstar, params.δbar.prior.log_prob(δstar)) # print(new_prob, old_prob) if self.is_accepted(new_prob, old_prob): params.δbar.value = δstar self.acceptance_rates['δbar'] += 1 / self.num_tfs # Log of transcription ODE kinetic params (Step 3) kbar = params.kbar.value kstar = kbar.copy() for j in range(self.num_genes): sample = params.kbar.propose(kstar[j]) sample = params.kbar.constrain(sample, j) kstar[j] = sample new_prob = self.likelihood.genes(params, kbar=kstar)[j] + sum( params.kbar.prior.log_prob(sample)) old_prob = old_m_likelihood[j] + sum( params.kbar.prior.log_prob(kbar[j])) if self.is_accepted(new_prob, old_prob): test = params.kbar.value test[j] = sample params.kbar.value = test self.acceptance_rates['kbar'] += 1 / self.num_genes else: kstar[j] = params.kbar.value[j] # Interaction weights and biases (note: should work for self.num_tfs > 1) (Step 4) if self.options.weights: w = params.w.value w_0 = params.w_0.value wstar = w.copy() w_0star = w_0.copy() for j in range(self.num_genes): sample_0 = params.w_0.propose(w_0[j], j) sample = params.w.propose(wstar[j], j) wstar[j] = sample w_0star[j] = sample_0 new_prob = self.likelihood.genes( params, w=wstar, w_0=w_0star)[j] + sum( params.w.prior.log_prob( sample)) + params.w_0.prior.log_prob(sample_0) old_prob = old_m_likelihood[j] + sum( params.w.prior.log_prob( w[j, :])) + params.w_0.prior.log_prob(w_0[j]) if self.is_accepted(new_prob, old_prob): params.w.value[j] = sample params.w_0.value[j] = sample_0 self.acceptance_rates['w'] += 1 / self.num_genes self.acceptance_rates['w_0'] += 1 / self.num_genes else: wstar[j] = params.w.value[j] # Noise variances if self.options.preprocessing_variance: σ2_m = params.σ2_m.value σ2_mstar = σ2_m.copy() for j in range(self.num_genes): sample = params.σ2_m.propose(σ2_m[j]) σ2_mstar[j] = sample old_q = params.σ2_m.proposal_dist(σ2_mstar[j]).log_prob( σ2_m[j]) new_prob = self.likelihood.genes( params, σ2_m=σ2_mstar)[j] + params.σ2_m.prior.log_prob( σ2_mstar[j]) new_q = params.σ2_m.proposal_dist(σ2_m[j]).log_prob( σ2_mstar[j]) old_prob = self.likelihood.genes( params, σ2_m=σ2_m)[j] + params.σ2_m.prior.log_prob(σ2_m[j]) if self.is_accepted(new_prob + old_q, old_prob + new_q): params.σ2_m.value[j] = sample self.acceptance_rates['σ2_m'] += 1 / self.num_genes else: σ2_mstar[j] = σ2_m[j] else: # Use Gibbs sampling # Prior parameters α = params.σ2_m.prior.concentration β = params.σ2_m.prior.scale # Conditional posterior of inv gamma parameters: α_post = α + 0.5 * self.N_m β_post = β + 0.5 * np.sum(sq_diff_m) # print(α.shape, sq_diff.shape) # print('val', β_post.shape, params.σ2_m.value) params.σ2_m.value = np.repeat( tfd.InverseGamma(α_post, β_post).sample(), self.num_genes) self.acceptance_rates['σ2_m'] += 1 if self.options.tf_mrna_present: # (Step 5) # Prior parameters α = params.σ2_f.prior.concentration β = params.σ2_f.prior.scale # Conditional posterior of inv gamma parameters: α_post = α + 0.5 * self.N_m β_post = β + 0.5 * np.sum(sq_diff_f) # print(α.shape, sq_diff.shape) # print('val', β_post.shape, params.σ2_m.value) params.σ2_f.value = np.repeat( tfd.InverseGamma(α_post, β_post).sample(), self.num_tfs) self.acceptance_rates['σ2_f'] += 1 # print('val', params.σ2_m.value) # Length scales and variances of GP kernels l2 = params.L.value v = params.V.value for i in range(self.num_tfs): # Proposal distributions Q_v = params.V.proposal_dist Q_l = params.L.proposal_dist vstar = params.V.propose(v) l2star = params.L.propose(l2) # Acceptance probabilities new_fbar_prior = params.fbar.prior(params.fbar.value, vstar, l2star) old_q = Q_v(vstar).log_prob(v) + Q_l(l2star).log_prob( l2) # Q(old|new) new_prob = new_fbar_prior + params.V.prior.log_prob( vstar) + params.L.prior.log_prob(l2star) new_q = Q_v(v).log_prob(vstar) + Q_l(l2).log_prob( l2star) # Q(new|old) old_prob = params.fbar.prior( params.fbar.value, v, l2) + params.V.prior.log_prob(v) + params.L.prior.log_prob(l2) accepted = self.is_accepted(new_prob + old_q, old_prob + new_q) if accepted: params.V.value = vstar params.L.value = l2star self.acceptance_rates['V'] += 1 / self.num_tfs self.acceptance_rates['L'] += 1 / self.num_tfs
def __init__(self, data, options): self.data = data min_dist = min(data.t[1:] - data.t[:-1]) self.N_p = data.τ.shape[0] self.N_m = data.t.shape[0] # Number of observations self.num_replicates = data.f_obs.shape[0] self.num_tfs = data.f_obs.shape[1] self.num_genes = data.m_obs.shape[1] self.kernel_selector = GPKernelSelector(data, options) self.likelihood = TranscriptionLikelihood(data, options) self.options = options # Adaptable variances a = tf.constant(-0.5, dtype='float64') b2 = tf.constant(2., dtype='float64') self.h_f = 0.15 * tf.ones(self.N_p, dtype='float64') # Interaction weights w_0 = Parameter('w_0', tfd.Normal(0, 2), np.zeros(self.num_genes), step_size=0.2 * tf.ones(self.num_genes, dtype='float64')) w_0.proposal_dist = lambda mu, j: tfd.Normal(mu, w_0.step_size[j]) w = Parameter('w', tfd.Normal(0, 2), 1 * np.ones((self.num_genes, self.num_tfs)), step_size=0.2 * tf.ones(self.num_genes, dtype='float64')) w.proposal_dist = lambda mu, j: tfd.Normal(mu, w.step_size[ j]) #) w_j) # At the moment this is the same as w_j0 (see pg.8) # Latent function fbar = Parameter( 'fbar', self.fbar_prior, 0.5 * np.ones( (self.num_replicates, self.num_tfs, self.N_p))) # GP hyperparameters V = Parameter('V', tfd.InverseGamma(f64(0.01), f64(0.01)), f64(1), step_size=0.05, fixed=not options.tf_mrna_present) V.proposal_dist = lambda v: tfd.TruncatedNormal( v, V.step_size, low=0, high=100 ) #v_i Fix to 1 if translation model is not used (pg.8) L = Parameter('L', tfd.Uniform(f64(min_dist**2 - 0.5), f64(data.t[-1]**2)), f64(4), step_size=0.05) # TODO auto set L.proposal_dist = lambda l2: tfd.TruncatedNormal( l2, L.step_size, low=0, high=100) #l2_i # Translation kinetic parameters δbar = Parameter('δbar', tfd.Normal(a, b2), f64(-0.3), step_size=0.05) δbar.proposal_dist = lambda mu: tfd.Normal(mu, δbar.step_size) # White noise for genes σ2_m = Parameter('σ2_m', tfd.InverseGamma(f64(0.01), f64(0.01)), 1e-4 * np.ones(self.num_genes), step_size=0.01) σ2_m.proposal_dist = lambda mu: tfd.TruncatedNormal( mu, σ2_m.step_size, low=0, high=0.1) # Transcription kinetic parameters constraint_index = 2 if self.options.initial_conditions else 1 def constrain_kbar(kbar, gene): '''Constrains a given row in kbar''' # if gene == 3: # kbar[constraint_index] = np.log(0.8) # kbar[constraint_index+1] = np.log(1.0) kbar[kbar < -10] = -10 kbar[kbar > 3] = 3 return kbar num_var = 4 if self.options.initial_conditions else 3 kbar_initial = -0.1 * np.ones( (self.num_genes, num_var), dtype='float64') for j, k in enumerate(kbar_initial): kbar_initial[j] = constrain_kbar(k, j) kbar = Parameter('kbar', tfd.Normal(a, b2), kbar_initial, constraint=constrain_kbar, step_size=0.05 * tf.ones(num_var, dtype='float64')) kbar.proposal_dist = lambda mu: tfd.MultivariateNormalDiag( mu, kbar.step_size) if not options.preprocessing_variance: σ2_f = Parameter('σ2_f', tfd.InverseGamma(f64(0.01), f64(0.01)), 1e-4 * np.ones(self.num_tfs), step_size=tf.constant(0.5, dtype='float64')) super().__init__( TupleParams_pre(fbar, δbar, kbar, σ2_m, w, w_0, L, V, σ2_f)) else: super().__init__(TupleParams(fbar, δbar, kbar, σ2_m, w, w_0, L, V))
def _base_dist(self, alpha: TensorLike, beta: TensorLike, *args, **kwargs): return tfd.InverseGamma(concentration=alpha, rate=beta, *args, **kwargs)
def std(self): """Variational posterior for the noise standard deviation""" return tfd.InverseGamma(tf.exp(self.s_alpha), tf.exp(self.s_beta))
def estimate_splicing_code( qx_feature_loc, qx_feature_scale, donor_seqs, acceptor_seqs, alt_donor_seqs, alt_acceptor_seqs, donor_cons, acceptor_cons, alt_donor_cons, alt_acceptor_cons, tissues): num_samples = len(tissues) num_tissues = np.max(tissues) tissue_matrix = np.zeros((num_samples, num_tissues), dtype=np.float32) for (i, j) in enumerate(tissues): tissue_matrix[i, j-1] = 1 seqs = np.hstack( [donor_seqs, acceptor_seqs, alt_donor_seqs, alt_acceptor_seqs]) # [ num_features, seq_length, 4 ] cons = np.hstack( [donor_cons, acceptor_cons, alt_donor_cons, alt_acceptor_cons]) seqs = np.concatenate((seqs, np.expand_dims(cons, 2)), axis=2) print(seqs.shape) # sys.exit() num_features = seqs.shape[0] # split into testing and training data shuffled_feature_idxs = np.arange(num_features) np.random.shuffle(shuffled_feature_idxs) seqs_train_len = int(np.floor(0.75 * num_features)) seqs_test_len = num_features - seqs_train_len print(num_features) print(seqs_train_len) print(seqs_test_len) print(qx_feature_loc.shape) print(qx_feature_scale.shape) train_idxs = shuffled_feature_idxs[:seqs_train_len] test_idxs = shuffled_feature_idxs[seqs_train_len:] seqs_train = seqs[train_idxs] seqs_test = seqs[test_idxs] qx_feature_loc_train = qx_feature_loc[:,train_idxs] qx_feature_scale_train = qx_feature_scale[:,train_idxs] qx_feature_loc_test = qx_feature_loc[:,test_idxs] qx_feature_scale_test = qx_feature_scale[:,test_idxs] # invented data to test my intuition # seqs_train = np.array( # [[[1.0, 0.0], # [1.0, 0.0], # [1.0, 0.0], # [1.0, 0.0]], # [[0.0, 1.0], # [0.0, 1.0], # [0.0, 1.0], # [0.0, 1.0]]], # dtype=np.float32) # seqs_test = np.copy(seqs_train) # tissue_matrix = np.array( # [[1], # [1], # [1]], # dtype=np.float32) # qx_feature_loc_train = np.array( # [[-1.0, 1.0], # [-1.1, 1.1], # # [-0.5, 0.5]], # [0.9, -0.9]], # dtype=np.float32) # qx_feature_scale_train = np.array( # [[0.1, 0.1], # [0.1, 0.1], # # [0.1, 0.1]], # [1.0, 1.0]], # dtype=np.float32) # qx_feature_loc_test = np.copy(qx_feature_loc_train) # qx_feature_scale_test = np.copy(qx_feature_scale_train) # num_tissues = 1 # num_samples = qx_feature_loc_train.shape[0] # seqs_train_len = 2 # print(qx_feature_loc_train) # print(qx_feature_scale_train) # sys.exit() keep_prob = tf.placeholder(tf.float32) # model lyr0_input = tf.placeholder(tf.float32, (None, seqs_train.shape[1], seqs_train.shape[2])) # lyr0 = tf.layers.flatten(lyr0_input) lyr0 = lyr0_input print(lyr0) training_flag = tf.placeholder(tf.bool) conv1 = tf.layers.conv1d( inputs=lyr0, filters=32, kernel_size=4, activation=tf.nn.leaky_relu, kernel_regularizer=tf.contrib.layers.l2_regularizer(1e-1), name="conv1") conv1_dropout = tf.layers.dropout( inputs=conv1, rate=0.5, training=training_flag, name="conv1_dropout") pool1 = tf.layers.max_pooling1d( inputs=conv1_dropout, pool_size=2, strides=2, name="pool1") conv2 = tf.layers.conv1d( inputs=pool1, filters=64, kernel_size=4, activation=tf.nn.leaky_relu, kernel_regularizer=tf.contrib.layers.l2_regularizer(1e-1), name="conv2") pool2 = tf.layers.max_pooling1d( inputs=conv2, pool_size=2, strides=2, name="pool2") pool2_flat = tf.layers.flatten( pool2, name="pool2_flat") # pool2_flat = tf.layers.flatten(conv1_dropout) dense1 = tf.layers.dense( inputs=pool2_flat, units=256, activation=tf.nn.leaky_relu, kernel_regularizer=tf.contrib.layers.l2_regularizer(1e-1), name="dense1") # dropout1 = tf.layers.dropout( # inputs=dense1, # rate=0.5, # training=training_flag, # name="dropout1") prediction_layer = tf.layers.dense( # inputs=dropout1, inputs=dense1, units=num_tissues, activation=tf.identity) # [num_features, num_tissues] # TODO: eventually this should be a latent variable # x_scale = 0.2 x_scale_prior = tfd.InverseGamma( concentration=0.001, rate=0.001, name="x_scale_prior") x_scale = tf.nn.softplus(tf.Variable(tf.fill([seqs_train_len], -3.0))) # x_scale = tf.constant(0.1) print(tissue_matrix.shape) x_mu = tf.matmul( tf.constant(tissue_matrix), tf.transpose(prediction_layer)) # [num_samples, num_features] x_prior = tfd.Normal( loc=x_mu, # loc=0.0, scale=x_scale, name="x_prior") # x_prior = tfd.StudentT( # loc=x_mu, # scale=x_scale, # df=2.0, # name="x_prior") x_likelihood_loc = tf.placeholder(tf.float32, [num_samples, None]) x_likelihood_scale = tf.placeholder(tf.float32, [num_samples, None]) x_likelihood = ed.Normal( loc=x_likelihood_loc, scale=x_likelihood_scale, name="x_likelihood") # x = x_likelihood x = tf.Variable( qx_feature_loc_train, # tf.random_normal(qx_feature_loc_train.shape), # tf.zeros(qx_feature_loc_train.shape), # qx_feature_loc_train + qx_feature_scale_train * np.float32(np.random.randn(*qx_feature_loc_train.shape)), # trainable=False, name="x") print("X") print(x) # x_delta = tf.Variable( # # qx_feature_loc_train, # # tf.random_normal(qx_feature_loc_train.shape), # tf.zeros(qx_feature_loc_train.shape), # # trainable=False, # name="x") # x_delta = tf.Print(x_delta, # [tf.reduce_min(x_delta), tf.reduce_max(x_delta)], "x_delta span") # x = tf.Print(x, # [tf.reduce_min(x - qx_feature_loc_train), tf.reduce_max(x - qx_feature_loc_train)], # "x deviance from init") # print(x_prior.log_prob(x)) # print(x_likelihood.log_prob(x)) # sys.exit() # log_prior = tf.reduce_sum(x_prior.log_prob(x_delta)) # log_likelihood = tf.reduce_sum(x_likelihood.distribution.log_prob(x_mu + x_delta)) log_prior = tf.reduce_sum(x_prior.log_prob(x)) + tf.reduce_sum(x_scale_prior.log_prob(x_scale)) log_likelihood = tf.reduce_sum(x_likelihood.distribution.log_prob(x)) log_posterior = log_prior + log_likelihood # log_posterior = x_likelihood.distribution.log_prob(x_mu) sess = tf.Session() optimizer = tf.train.AdamOptimizer(learning_rate=1e-3) train = optimizer.minimize(-log_posterior) sess.run(tf.global_variables_initializer()) # dropout doesn't seem to do much.... train_feed_dict = { training_flag: True, # training_flag: False, lyr0_input: seqs_train, x_likelihood_loc: qx_feature_loc_train, x_likelihood_scale: qx_feature_scale_train } test_feed_dict = { training_flag: False, lyr0_input: seqs_test, x_likelihood_loc: qx_feature_loc_test, x_likelihood_scale: qx_feature_scale_test } n_iter = 1000 mad_sample = median_absolute_deviance_sample(x_mu, x_likelihood) for iter in range(n_iter): # _, log_prior_value, log_likelihood_value = sess.run( # [train, log_prior, log_likelihood], # feed_dict=train_feed_dict) sess.run( [train], feed_dict=train_feed_dict) # print((log_prior_value, log_likelihood_value)) if iter % 100 == 0: # print(iter) # print("x") # print(sess.run(x)) # print("x likelihood") # print(sess.run(x_likelihood.distribution.log_prob(x), feed_dict=train_feed_dict)) # print("x_mu") # print(sess.run(x_mu, feed_dict=train_feed_dict)) # print(sess.run(x_mu, feed_dict=test_feed_dict)) # print("x_mu likelihood") # print(sess.run(x_likelihood.distribution.log_prob(x_mu), feed_dict=train_feed_dict)) # print(sess.run(x_likelihood.distribution.log_prob(x_mu), feed_dict=test_feed_dict)) print(sess.run(tf.reduce_sum(x_likelihood.distribution.log_prob(x_mu)), feed_dict=train_feed_dict)) print(sess.run(tf.reduce_sum(x_likelihood.distribution.log_prob(x_mu)), feed_dict=test_feed_dict)) print(sess.run(tfp.distributions.percentile(x_likelihood.distribution.log_prob(x_mu), 50.0), feed_dict=train_feed_dict)) print(sess.run(tfp.distributions.percentile(x_likelihood.distribution.log_prob(x_mu), 50.0), feed_dict=test_feed_dict)) print(est_expected_median_absolute_deviance(sess, mad_sample, train_feed_dict)) print(est_expected_median_absolute_deviance(sess, mad_sample, test_feed_dict)) print(est_expected_median_absolute_deviance(sess, mad_sample, train_feed_dict)) print(est_expected_median_absolute_deviance(sess, mad_sample, test_feed_dict))
def estimate_splicing_code_from_kmers( qx_feature_loc, qx_feature_scale, kmer_usage_matrix, tissues): num_samples = len(tissues) num_tissues = np.max(tissues) tissue_matrix = np.zeros((num_samples, num_tissues), dtype=np.float32) for (i, j) in enumerate(tissues): tissue_matrix[i, j-1] = 1 num_features = kmer_usage_matrix.shape[0] num_kmers = kmer_usage_matrix.shape[1] # split into testing and training data shuffled_feature_idxs = np.arange(num_features) np.random.shuffle(shuffled_feature_idxs) seqs_train_len = int(np.floor(0.75 * num_features)) seqs_test_len = num_features - seqs_train_len train_idxs = shuffled_feature_idxs[:seqs_train_len] test_idxs = shuffled_feature_idxs[seqs_train_len:] kmer_usage_matrix_train = kmer_usage_matrix[train_idxs] kmer_usage_matrix_test = kmer_usage_matrix[test_idxs] qx_feature_loc_train = qx_feature_loc[:,train_idxs] qx_feature_scale_train = qx_feature_scale[:,train_idxs] qx_feature_loc_test = qx_feature_loc[:,test_idxs] qx_feature_scale_test = qx_feature_scale[:,test_idxs] W0 = tf.Variable( tf.random_normal([num_kmers, 1], mean=0.0, stddev=0.01), name="W0") # B = tf.Variable( # tf.random_normal([1, num_tissues], mean=0.0, stddev=0.01), # name="B") W_prior = tfd.Normal( loc=0.0, scale=0.1, name="W_prior") W = tf.Variable( tf.random_normal([num_kmers, num_tissues], mean=0.0, stddev=0.01), name="W") X = tf.placeholder(tf.float32, shape=(None, num_kmers), name="X") # Y = B + tf.matmul(X, W0 + W) Y = tf.matmul(X, W0 + W) print(Y) x_scale_prior = tfd.InverseGamma( concentration=0.001, rate=0.001, name="x_scale_prior") x_scale = tf.nn.softplus(tf.Variable(tf.fill([seqs_train_len], -3.0))) x_mu = tf.matmul( tf.constant(tissue_matrix), tf.transpose(Y)) # [num_samples, num_features] print(x_mu) x_prior = tfd.Normal( loc=x_mu, scale=x_scale, name="x_prior") x_likelihood_loc = tf.placeholder(tf.float32, [num_samples, None]) x_likelihood_scale = tf.placeholder(tf.float32, [num_samples, None]) x_likelihood = ed.Normal( loc=x_likelihood_loc, scale=x_likelihood_scale, name="x_likelihood") # Using likelihood x = tf.Variable( qx_feature_loc_train, name="x") # x = x_likelihood_loc # x = x_mu log_prior = \ tf.reduce_sum(x_prior.log_prob(x)) + \ tf.reduce_sum(x_scale_prior.log_prob(x_scale)) + \ tf.reduce_sum(W_prior.log_prob(W)) log_likelihood = tf.reduce_sum(x_likelihood.distribution.log_prob(x)) log_posterior = log_prior + log_likelihood # Using point estimates # x = qx_feature_loc_train # log_prior = \ # tf.reduce_sum(x_prior.log_prob(x)) + \ # tf.reduce_sum(x_scale_prior.log_prob(x_scale)) + \ # tf.reduce_sum(W_prior.log_prob(W)) # log_posterior = log_prior sess = tf.Session() optimizer = tf.train.AdamOptimizer(learning_rate=1e-3) train = optimizer.minimize(-log_posterior) sess.run(tf.global_variables_initializer()) train_feed_dict = { X: kmer_usage_matrix_train, x_likelihood_loc: qx_feature_loc_train, x_likelihood_scale: qx_feature_scale_train } test_feed_dict = { X: kmer_usage_matrix_test, x_likelihood_loc: qx_feature_loc_test, x_likelihood_scale: qx_feature_scale_test } n_iter = 1000 mad_sample = median_absolute_deviance_sample(x_mu, x_likelihood) for iter in range(n_iter): # _, log_prior_value, log_likelihood_value = sess.run( # [train, log_prior, log_likelihood], # feed_dict=train_feed_dict) sess.run( [train], feed_dict=train_feed_dict) # print((log_prior_value, log_likelihood_value)) if iter % 100 == 0: print(iter) print(est_expected_median_absolute_deviance(sess, mad_sample, train_feed_dict)) print(est_expected_median_absolute_deviance(sess, mad_sample, test_feed_dict)) print(sess.run(tf.reduce_min(x_scale))) print(sess.run(tf.reduce_max(x_scale))) # print(sess.run(log_prior, feed_dict=train_feed_dict)) # print(sess.run(log_likelihood, feed_dict=train_feed_dict)) return sess.run(W0), sess.run(W)