def test_laue_StudentTLikelihood(dof, laue_inputs): likelihood = StudentTLikelihood(dof)(laue_inputs) iobs = BaseModel.get_intensities(laue_inputs) sigiobs = BaseModel.get_uncertainties(laue_inputs) ipred = fake_ipred(laue_inputs) l_true = tfd.StudentT(dof, iobs, sigiobs) iconv = likelihood.convolve(ipred) test = likelihood.log_prob(ipred).numpy() expected = l_true.log_prob(iobs).numpy() nobs = BaseModel.get_harmonic_id(laue_inputs).max() + 1 test = likelihood.log_prob(ipred).numpy() expected = l_true.log_prob(iobs).numpy().T #The zero padded entries at the end of the input will disagree #with the expected values. This is fine, because they will not #contribute to the gradient test = test[:, :nobs] expected = expected[:, :nobs] assert np.array_equal(expected.shape, test.shape) assert np.allclose(expected, test) #Test batches larger than 1 ipred = np.concatenate((ipred, ipred, ipred), axis=0) likelihood.convolve(ipred).numpy() test = likelihood.log_prob(ipred).numpy() test = test[:, :nobs] assert np.array_equiv(expected, test)
def test_invalid_model_spec_raises_error(self): observed_time_series = tf.ones([2]) design_matrix = tf.eye(2) with self.assertRaisesRegexp(ValueError, 'Weights prior must be a univariate normal'): gibbs_sampler.build_model_for_gibbs_fitting( observed_time_series, design_matrix=design_matrix, weights_prior=tfd.StudentT(df=10, loc=0., scale=1.), level_variance_prior=tfd.InverseGamma(0.01, 0.01), observation_noise_variance_prior=tfd.InverseGamma(0.01, 0.01)) with self.assertRaisesRegexp( ValueError, 'Level variance prior must be an inverse gamma'): gibbs_sampler.build_model_for_gibbs_fitting( observed_time_series, design_matrix=design_matrix, weights_prior=tfd.Normal(loc=0., scale=1.), level_variance_prior=tfd.LogNormal(0., 3.), observation_noise_variance_prior=tfd.InverseGamma(0.01, 0.01)) with self.assertRaisesRegexp( ValueError, 'noise variance prior must be an inverse gamma'): gibbs_sampler.build_model_for_gibbs_fitting( observed_time_series, design_matrix=design_matrix, weights_prior=tfd.Normal(loc=0., scale=1.), level_variance_prior=tfd.InverseGamma(0.01, 0.01), observation_noise_variance_prior=tfd.LogNormal(0., 3.))
def __call__(self): """Get the distribution object from the backend""" if get_backend() == 'pytorch': import torch.distributions as tod return tod.studentT.StudentT(self.df, self.loc, self.scale) else: from tensorflow_probability import distributions as tfd return tfd.StudentT(self.df, self.loc, self.scale)
def model_fn(self): # regression in latent space w = yield JDCRoot( Independent( tfd.Normal(loc=tf.zeros([self.num_factors, self.k]), scale=tf.fill([self.num_factors, self.k], 10.0)))) z_scale = yield JDCRoot( Independent( tfd.HalfCauchy(loc=tf.zeros([self.num_factors, self.k]), scale=1.0))) F_test = yield JDCRoot( Independent( tfd.OneHotCategorical(logits=tf.zeros([ self.num_testing_samples, self.num_factors - self.num_confounders ])))) F_full = tf.concat([tf.expand_dims(self.F, 0), F_test], axis=-2) z = yield Independent( tfd.Normal(loc=tf.matmul(F_full, w), scale=tf.matmul(F_full, z_scale))) x_bias = yield JDCRoot( Independent( tfd.Normal(loc=tf.fill([self.num_features], np.float32(self.x_bias_loc0)), scale=np.float32(self.x_bias_scale0)))) # decoded log-expression space x_loc = x_bias + self.decoder(z) - self.sample_scales x_scale_concentration_c = yield JDCRoot( Independent( tfd.HalfCauchy(loc=tf.zeros([self.kernel_regression_degree]), scale=1.0))) x_scale_mode_c = yield JDCRoot( Independent( tfd.HalfCauchy(loc=tf.zeros([self.kernel_regression_degree]), scale=1.0))) weights = kernel_regression_weights(self.kernel_regression_bandwidth, x_bias, self.x_scale_hinges) x_scale = yield Independent( mean_variance_model(weights, x_scale_concentration_c, x_scale_mode_c)) # log expression distribution x = yield Independent(tfd.StudentT(df=1.0, loc=x_loc, scale=x_scale)) if not self.use_point_estimates: rnaseq_reads = yield tfd.Independent( rnaseq_approx_likelihood_from_vars(self.vars, x))
def robit(x, df=1): """ Applies the CDF from the Student t distribution as the activation rather than a sigmoid. :param x: :param df: degrees of freedom for the student T distribution :return: """ from tensorflow_probability import distributions return distributions.StudentT(df, 0, 1).cdf(x)
def _base_dist(self, *args, **kwargs): """ Half student-T base distribution. A HalfStudentT is the absolute value of a StudentT. """ return tfd.TransformedDistribution( distribution=tfd.StudentT(*args, **kwargs), bijector=tfp.bijectors.AbsoluteValue(), name="HalfStudentT", )
def __call__(self): """Get the distribution object from the backend""" if get_backend() == "pytorch": import torch.distributions as tod return tod.studentT.StudentT( self["df"], self["loc"], self["scale"] ) else: from tensorflow_probability import distributions as tfd return tfd.StudentT(self["df"], self["loc"], self["scale"])
def test_mono_StudentTLikelihood(dof, mono_inputs): likelihood = StudentTLikelihood(dof)(mono_inputs) iobs = BaseModel.get_intensities(mono_inputs) sigiobs = BaseModel.get_uncertainties(mono_inputs) l_true = tfd.StudentT( dof, tf.squeeze(iobs), tf.squeeze(sigiobs), ) z = l_true.sample() assert np.allclose(likelihood.log_prob(z), l_true.log_prob(z))
def _base_dist(self, nu: IntTensorLike, sigma: TensorLike, *args, **kwargs): """ Half student-T base distribution. A HalfStudentT is the absolute value of a StudentT. """ return tfd.TransformedDistribution( distribution=tfd.StudentT(df=nu, scale=sigma, loc=0, *args, **kwargs), bijector=tfp.bijectors.AbsoluteValue(), name="HalfStudentT", )
def __init__(self, Fobs, SigFobs, dof, observed=None): """ Parameters ---------- Fobs : array numpy array or tf.Tensor containing observed structure factors amplitudes from a reference structure. SigFobs : array numpy array or tf.Tensor containing error estimates for structure factors amplitudes from a reference structure. dof : float degrees of freedom for the student's t distribution. observed : array (optional) boolean numpy array or tf.Tensor which has True for all observed miller indices. """ super().__init__(observed) loc = np.array(Fobs, dtype=np.float32) scale = np.array(SigFobs, dtype=np.float32) self.base_dist = tfd.StudentT(dof, loc, scale)
def estimate_gmm_precision(qx_loc, qx_scale, fixed_expression=False, profile_trace=False, tensorboard_summaries=False, batch_size=100, err_scale=0.2, edge_cutoff=0.7): num_samples = qx_loc.shape[0] n = qx_loc.shape[1] batch_size = min(batch_size, n) # [num_samples, n] if fixed_expression: qx = qx_loc else: qx = ed.Normal(loc=qx_loc, scale=qx_scale, name="qx") b = np.mean(qx_loc, axis=0) # variational estimate of w # ------------------------- qw_loc_init = tf.placeholder(tf.float32, (batch_size, n), name="qw_loc_init") qw_loc_init_value = np.zeros((batch_size, n), dtype=np.float32) qw_loc = tf.Variable(qw_loc_init, name="qw_loc") qw = qw_loc # variational estimate of w_scale # ------------------------------- qw_scale_loc_init_value = np.full((batch_size, n), -3.0, dtype=np.float32) qw_scale_loc_init = tf.placeholder(tf.float32, (batch_size, n), name="qw_scale_loc_init") qw_scale_loc = tf.Variable(qw_scale_loc_init, name="qw_scale_loc") qw_scale = tf.nn.softplus(qw_scale_loc) # estimate of b # ------------- by_init_value = np.zeros((batch_size, ), dtype=np.float32) by_init = tf.placeholder(tf.float32, (batch_size, ), name="by_init") by = tf.Variable(by_init, name="by", trainable=False) # [batch_size] # w # - w_scale_prior = tfd.HalfCauchy(loc=0.0, scale=1.0, name="w_scale_prior") # qw_scale can be shrunk all the way to zero, producing NaNs qw_scale = tf.clip_by_value(qw_scale, 1e-4, 10000.0) scale_tau = 0.1 w_prior = tfd.Normal(loc=0.0, scale=qw_scale * scale_tau, name="w_prior") # [n, batch_size] mask_init = tf.placeholder(tf.float32, (batch_size, n), name="mask_init") mask_init_value = np.empty([batch_size, n], dtype=np.float32) mask = tf.Variable(mask_init, name="mask", trainable=False) qw_masked = qw * mask # [batch_size, n] qx_std = qx - b # [num_samples, n] # CONDITIONAL CORRELATION # qxqw = tf.matmul(qx_std, qw_masked, transpose_b=True) # [num_samples, batch_size] # y_dist_loc = qxqw + by # UNCONDITIONAL CORRELATION qxqw = tf.expand_dims(qx_std, 1) * tf.expand_dims( qw_masked, 0) # [num_samples, num_batches, n] y_dist_loc = tf.expand_dims(tf.expand_dims(by, 0), -1) + qxqw # [num_samples, num_batches, n] y_dist = tfd.StudentT(loc=y_dist_loc, scale=err_scale, df=10.0) y_slice_start_init = tf.placeholder( tf.int32, 2, name="y_slice_start_init") # set to [0, j] y_slice_start = tf.Variable(y_slice_start_init, name="y_slice_start", trainable=False) y = tf.slice(qx, y_slice_start, [num_samples, batch_size]) # [num_samples, batch_size] # y = tf.Print(y, [tf.square(y_dist_loc - tf.expand_dims(y, -1))], "y", summarize=16) # objective function # ------------------ y = tf.expand_dims(y, -1) y_log_prob = tf.reduce_sum(y_dist.log_prob(y)) w_log_prob = tf.reduce_sum(w_prior.log_prob(qw_masked)) w_scale_log_prob = tf.reduce_sum(w_scale_prior.log_prob(qw_scale)) log_posterior = y_log_prob + w_log_prob + w_scale_log_prob elbo = log_posterior optimizer = tf.train.AdamOptimizer(learning_rate=1e-2) train = optimizer.minimize(-elbo) sess = tf.Session() niter = 1000 feed_dict = dict() feed_dict[qw_scale_loc_init] = qw_scale_loc_init_value feed_dict[qw_loc_init] = qw_loc_init_value feed_dict[mask_init] = mask_init_value feed_dict[by_init] = by_init_value qx_loc_means = np.mean(qx_loc, axis=0) # check_ops = tf.add_check_numerics_ops() if tensorboard_summaries: # tf.summary.histogram("qw_loc_param", qw_loc) # tf.summary.histogram("qw_scale_param", qw_scale_param) tf.summary.scalar("y_log_prob", y_log_prob) tf.summary.scalar("w_log_prob", w_log_prob) tf.summary.scalar("w_scale_log_prob", w_scale_log_prob) tf.summary.scalar("qw min", tf.reduce_min(qw)) tf.summary.scalar("qw max", tf.reduce_max(qw)) tf.summary.scalar("qw_scale min", tf.reduce_min(qw_scale)) tf.summary.scalar("qw_scale max", tf.reduce_max(qw_scale)) # tf.summary.histogram("qw_scale_loc_param", qw_scale_loc) # tf.summary.histogram("qw_scale_scale_param", qw_scale_scale) edges = dict() count = 0 num_batches = math.ceil(n / batch_size) for batch_num in range(num_batches): # deal with n not necessarily being divisible by batch_size if batch_num == num_batches - 1: start_j = n - batch_size else: start_j = batch_num * batch_size fillmask(mask_init_value, start_j, batch_size) feed_dict[y_slice_start_init] = np.array([0, start_j], dtype=np.int32) for k in range(batch_size): by_init_value[k] = b[start_j + k] sess.run(tf.global_variables_initializer(), feed_dict=feed_dict) # if requested, just benchmark one run of the training operation and return if profile_trace: print("WRITING PROFILING DATA") options = tf.RunOptions(trace_level=tf.RunOptions.FULL_TRACE) run_metadata = tf.RunMetadata() sess.run(train, options=options, run_metadata=run_metadata) fetched_timeline = timeline.Timeline(run_metadata.step_stats) chrome_trace = fetched_timeline.generate_chrome_trace_format() with open('log/timeline.json', 'w') as f: f.write(chrome_trace) break if tensorboard_summaries: train_writer = tf.summary.FileWriter( "log/" + "batch-" + str(batch_num), sess.graph) tf.summary.scalar("elbo", elbo) merged_summary = tf.summary.merge_all() for t in range(niter): # _, elbo_val = sess.run([train, elbo]) # _, entropy_val, log_posterior_val, elbo_val = sess.run([train, entropy, log_posterior, elbo]) _, y_log_prob_value, w_log_prob_value, w_scale_log_prob_value = sess.run( [train, y_log_prob, w_log_prob, w_scale_log_prob]) if t % 100 == 0: # print((t, elbo_val, log_posterior_val, entropy_val)) print((y_log_prob_value, w_log_prob_value, w_scale_log_prob_value)) # print((t, elbo_val)) if tensorboard_summaries: train_writer.add_summary(sess.run(merged_summary), t) print("") print("batch") print(start_j) # qw_scale_min, qw_scale_mean, qw_scale_max = sess.run( # [tf.reduce_min(qw_scale), tf.reduce_mean(qw_scale), tf.reduce_max(qw_scale)]) # print(("qw_scale span", qw_scale_min, qw_scale_mean, qw_scale_max)) # lower_credible = sess.run(qw.distribution.quantile(0.01)) # upper_credible = sess.run(qw.distribution.quantile(0.99)) lower_credible = upper_credible = sess.run(qw) print("credible span") print(np.max(lower_credible)) print(np.min(upper_credible)) print("nonzeros") print(np.sum((lower_credible > edge_cutoff))) print(np.sum((upper_credible < -edge_cutoff))) for k in range(batch_size): neighbors = [] for j in range(n): if lower_credible[k, j] > edge_cutoff or upper_credible[ k, j] < -edge_cutoff: neighbors.append( (j, lower_credible[k, j], upper_credible[k, j])) edges[start_j + k] = neighbors count += 1 if count > 4: break return edges
def _base_dist(self, mu: TensorLike, sigma: TensorLike, nu: IntTensorLike, *args, **kwargs): return tfd.StudentT(df=nu, loc=mu, scale=sigma)
def _create_dist(self): if self._softplus_scale: return tfd.StudentTWithAbsDfSoftplusScale( self._df_variable, self._loc_variable, self._scale_variable) return tfd.StudentT(self._df_variable, self._loc_variable, self._scale_variable)
def call(self, inputs): return tfd.StudentT(self.dof, *self.get_loc_and_scale(inputs))
def test_StudentTReferencePrior(mc_samples): p = StudentTReferencePrior(Fobs[observed], SigFobs[observed], 4., observed) q = tfd.StudentT(4, Fobs, SigFobs) ReferencePrior_test(p, q, mc_samples)
def skew_t_lpdf(x, nu, loc, scale, skew, clip_min=-100): z = (tf.clip_by_value(x, clip_min, np.inf) - loc) / scale u = skew * z * tf.sqrt((nu + 1) / (nu + z * z)) kernel = (tfd.StudentT(nu, 0, 1).log_prob(z) + tfd.StudentT(nu + 1, 0, 1).log_cdf(u)) return kernel + tf.math.log(2 / scale)
def _init_distribution(conditions, **kwargs): df, loc, scale = conditions["df"], conditions["loc"], conditions[ "scale"] return tfd.StudentT(df=df, loc=loc, scale=scale, **kwargs)
def dist(self, loc, scale): loc = tf.squeeze(loc) scale = tf.squeeze(scale) return tfd.StudentT(self.dof, loc, scale)