def model(X, base_pred, add_resid=True, log_ls_resid=None): r"""Defines the sparse adaptive ensemble model. y ~ sum{ f_k(x) * w_k } + delta(x) + epsilon w_k ~ LogisticNormal ( 0, sigma_k ) delta(x) ~ GaussianProcess ( 0, k(x) ) epsilon ~ Normal ( 0, sigma_e ) where the LogisticNormal is sparse_softmax transformed Normals. Args: X: (np.ndarray) Input features of dimension (N, D) base_pred: (dict of np.ndarray) A dictionary of out-of-sample prediction from base models. For each item in the dictionary, key is the model name, and value is the model prediction with dimension (N, ). add_resid: (bool) Whether to add residual process to model. log_ls_resid: (float32) length-scale parameter for residual GP. If None then will estimate with normal prior. Returns: (tf.Tensors of float32) model parameters. """ # convert data type F = np.asarray(list(base_pred.values())).T F = tf.convert_to_tensor(F, dtype=tf.float32) X = tf.convert_to_tensor(X, dtype=tf.float32) # check dimension N, D = X.shape for key, value in base_pred.items(): if not value.shape == (N,): raise ValueError( "All base-model predictions should have shape ({},), but" "observed {} for '{}'".format(N, value.shape, key)) # specify prior for lengthscale and observation noise if log_ls_resid is None: log_ls_resid = ed.Normal(loc=_LS_PRIOR_MEAN, scale=_LS_PRIOR_SDEV, name="ls_resid") # specify logistic normal priors for ensemble weight temp = ed.Normal(loc=_TEMP_PRIOR_MEAN, scale=_TEMP_PRIOR_SDEV, name='temp') W = sparse_logistic_weight(base_pred, temp, name="ensemble_weight") # specify ensemble prediction FW = tf.matmul(F, W) ensemble_mean = tf.reduce_sum(FW, axis=1, name="ensemble_mean") # specify residual process if add_resid: ensemble_resid = gp.prior(X, ls=tf.exp(log_ls_resid), kernel_func=gp.rbf, name="ensemble_resid") else: ensemble_resid = 0. # specify observation noise sigma = ed.Normal(loc=_NOISE_PRIOR_MEAN, scale=_NOISE_PRIOR_SDEV, name="sigma") # specify observation y = ed.MultivariateNormalDiag(loc=ensemble_mean + ensemble_resid, scale_identity_multiplier=tf.exp(sigma), name="y") return y
def model(cfg): ed.Normal(0., 1., name='normal', sample_shape=cfg.shape_for_normal)
def __call__(self, x): """Computes regularization given an ed.Normal random variable as input.""" if not isinstance(x, ed.RandomVariable): raise ValueError('Input must be an ed.RandomVariable.') random_variable = ed.Normal(loc=self.mean, scale=self.stddev) return random_variable.distribution.kl_divergence(x.distribution)
def variational_model(qw_mean, qw_stddv, qz_mean, qz_stddv): qw = ed.Normal(loc=qw_mean, scale=qw_stddv, name="qw") qz = ed.Normal(loc=qz_mean, scale=qz_stddv, name="qz") return qw, qz
def variational(): loc = tf1.get_variable("loc", []) qz = ed.Normal(loc=loc, scale=0.5, name="qz") return qz
def model(): x = ed.Normal(loc=0., scale=1., name="x") y = ed.Normal(loc=x, scale=1., name="y") return x + y
def model(): loc = ed.Normal(loc=0., scale=1., name="loc") x = ed.Normal(loc=loc, scale=0.5, sample_shape=5, name="x") return x
def model_flat(X, base_pred, family_tree=None, ls_weight=1., ls_resid=1., **kwargs): r"""Defines the sparse adaptive ensemble model. y ~ N(f, sigma^2) f(x) ~ gaussian_process(sum{ f_model(x) * w_model(x) }, k_resid(x)) w_model = tail_free_process(w0_model) w0_model(x) ~ gaussian_process(0, k_w(x)) where the tail_free_process is defined by sparse_ensemble_weight. Args: X: (np.ndarray) Input features of dimension (N, D) base_pred: (dict of np.ndarray) A dictionary of out-of-sample prediction from base models. For each item in the dictionary, key is the model name, and value is the model prediction with dimension (N, ). ls_weight: (float32) lengthscale for the kernel of ensemble weight GPs. ls_resid: (float32) lengthscale for the kernel of residual process GP. family_tree: (dict of list or None) A dictionary of list of strings to specify the family tree between models, if None then assume there's no structure (i.e. flat). **kwargs: Additional parameters to pass to sparse_ensemble_weight. Returns: (tf.Tensors of float32) model parameters. """ # check dimension N, D = X.shape for key, value in base_pred.items(): if not value.shape == (N, ): raise ValueError( "All base-model predictions should have shape ({},), but" "observed {} for '{}'".format(N, value.shape, key)) # specify tail-free priors for ensemble weight if not family_tree: temp = ed.Normal(loc=tail_free._TEMP_PRIOR_MEAN, scale=tail_free._TEMP_PRIOR_SDEV, name='temp') else: # specify a list of temp parameters for each node in the tree temp = ed.Normal(loc=[tail_free._TEMP_PRIOR_MEAN] * len(family_tree), scale=tail_free._TEMP_PRIOR_SDEV, name='temp') # specify ensemble weight W = sparse_conditional_weight(X, base_pred, temp, family_tree=family_tree, ls=ls_weight, name="ensemble_weight", **kwargs) # specify ensemble prediction F = np.asarray(list(base_pred.values())).T FW = tf.multiply(F, W) ensemble_mean = tf.reduce_sum(FW, axis=1, name="ensemble_mean") # specify residual process ensemble_resid = gp.prior(X, ls_resid, kernel_func=gp.rbf, name="ensemble_resid") # specify observation noise sigma = ed.Normal(loc=_NOISE_PRIOR_MEAN, scale=_NOISE_PRIOR_SDEV, name="sigma") # specify observation y = ed.MultivariateNormalDiag(loc=ensemble_mean + ensemble_resid, scale_identity_multiplier=tf.exp(sigma), name="y") return y
def sample(cfg): mu = ed.Normal(0., 1., name="mu")
def variational_dgpr(X, Zm, Zs, ls=1., kern_func=rbf, ridge_factor=1e-3, mfvi_mixture=False, n_mixture=1): """Defines the mean-field variational family for GPR. Args: X: (np.ndarray of float32) input training features, with dimension (Nx, D). Zm: (np.ndarray of float32) inducing points for mean, shape (Nm, D). Zs: (np.ndarray of float32) inducing points for covar, shape (Ns, D). ls: (float32) length scale parameter. kern_func: (function) kernel function. ridge_factor: (float32) small ridge factor to stabilize Cholesky decomposition mfvi_mixture: (float32) Whether to output variational family with a mixture of MFVI. n_mixture: (int) Number of MFVI mixture component to add. Returns: q_f, q_sig: (ed.RandomVariable) variational family. q_f_mean, q_f_sdev: (tf.Variable) variational parameters for q_f """ X = tf.convert_to_tensor(X) Zm = tf.convert_to_tensor(Zm) Zs = tf.convert_to_tensor(Zs) Nx, Nm, Ns = X.shape.as_list()[0], Zm.shape.as_list()[0], Zs.shape.as_list( )[0] # 1. Prepare constants # compute matrix constants Kxx = kern_func(X, ls=ls) Kmm = kern_func(Zm, ls=ls) Kxm = kern_func(X, Zm, ls=ls) Kxs = kern_func(X, Zs, ls=ls) Kss = kern_func(Zs, ls=ls, ridge_factor=ridge_factor) # 2. Define variational parameters # define mean and variance for sigma q_sig_mean = tf.get_variable(shape=[], name='q_sig_mean') q_sig_sdev = tf.exp(tf.get_variable(shape=[], name='q_sig_sdev')) # define free parameters (i.e. mean and full covariance of f_latent) m = tf.get_variable(shape=[Nm, 1], name='qf_m') s = tf.get_variable(shape=[Ns * (Ns + 1) / 2], name='qf_s') L = fill_triangular(s, name='qf_chol') # components for KL objective H = tf.eye(Ns) + tf.matmul(L, tf.matmul(Kss, L), transpose_a=True) cond_cov_inv = tf.matmul(L, tf.matrix_solve(H, tf.transpose(L))) func_norm_mm = tf.matmul(m, tf.matmul(Kmm, m), transpose_a=True) log_det_ss = tf.log(tf.matrix_determinant(H)) cond_norm_ss = tf.reduce_sum(tf.multiply(Kss, cond_cov_inv)) # compute sparse gp variational parameter (i.e. mean and covariance of P(f_obs | f_latent)) qf_mean = tf.squeeze(tf.tensordot(Kxm, m, [[1], [0]]), name='qf_mean') qf_cov = (Kxx - tf.matmul(Kxs, tf.matmul(cond_cov_inv, Kxs, transpose_b=True)) + ridge_factor * tf.eye(Nx, dtype=tf.float32)) # define variational family mixture_par_list = [] if mfvi_mixture: gp_dist = dist_util.VariationalGaussianProcessDecoupledDistribution( loc=qf_mean, covariance_matrix=qf_cov, func_norm_mm=func_norm_mm, log_det_ss=log_det_ss, cond_norm_ss=cond_norm_ss) q_f, mixture_par_list = inference_util.make_mfvi_sgp_mixture_family( n_mixture=n_mixture, N=Nx, gp_dist=gp_dist, name='q_f') else: q_f = dist_util.VariationalGaussianProcessDecoupled( loc=qf_mean, covariance_matrix=qf_cov, func_norm_mm=func_norm_mm, log_det_ss=log_det_ss, cond_norm_ss=cond_norm_ss, name='q_f') q_sig = ed.Normal(loc=q_sig_mean, scale=q_sig_sdev, name='q_sig') return q_f, q_sig, qf_mean, qf_cov, mixture_par_list
def model_tailfree(X, base_pred, family_tree=None, log_ls_weight=None, log_ls_resid=None, **kwargs): r"""Defines the sparse adaptive ensemble model. y ~ N(f, sigma^2) f(x) ~ gaussian_process(sum{ f_model(x) * w_model(x) }, k_resid(x)) w_model = tail_free_process(w0_model) w0_model(x) ~ gaussian_process(0, k_w(x)) where the tail_free_process is defined by sparse_ensemble_weight. Args: X: (np.ndarray) Input features of dimension (N, D) base_pred: (dict of np.ndarray) A dictionary of out-of-sample prediction from base models. For each item in the dictionary, key is the model name, and value is the model prediction with dimension (N, ). family_tree: (dict of list or None) A dictionary of list of strings to specify the family tree between models, if None then assume there's no structure (i.e. flat). log_ls_weight: (float32) length-scale parameter for weight GP. If None then will estimate with normal prior. log_ls_resid: (float32) length-scale parameter for residual GP. If None then will estimate with normal prior. **kwargs: Additional parameters to pass to tail_free.prior. Returns: (tf.Tensors of float32) model parameters. """ # check dimension N, D = X.shape for key, value in base_pred.items(): if not value.shape == (N, ): raise ValueError( "All base-model predictions should have shape ({},), but" "observed {} for '{}'".format(N, value.shape, key)) # specify prior for lengthscale and observation noise if log_ls_weight is None: log_ls_weight = ed.Normal(loc=_LS_PRIOR_MEAN, scale=_LS_PRIOR_SDEV, name="ls_weight") if log_ls_resid is None: log_ls_resid = ed.Normal(loc=_LS_PRIOR_MEAN, scale=_LS_PRIOR_SDEV, name="ls_resid") sigma = ed.Normal(loc=_NOISE_PRIOR_MEAN, scale=_NOISE_PRIOR_SDEV, name="sigma") # specify tail-free priors for ensemble weight ensemble_weights, model_names = tail_free.prior(X, base_pred, family_tree=family_tree, ls=tf.exp(log_ls_weight), name="ensemble_weight", **kwargs) # specify ensemble prediction base_models = np.asarray([base_pred[name] for name in model_names]).T FW = tf.multiply(base_models, ensemble_weights) ensemble_mean = tf.reduce_sum(FW, axis=1, name="ensemble_mean") # specify residual process ensemble_resid = gp.prior(X, ls=tf.exp(log_ls_resid), kernel_func=gp.rbf, name="ensemble_resid") # specify observation y = ed.MultivariateNormalDiag(loc=ensemble_mean + ensemble_resid, scale_identity_multiplier=tf.exp(sigma), name="y") return y
def variational_sgpr(X, Z, ls=1., kern_func=rbf, ridge_factor=1e-3, mfvi_mixture=False, n_mixture=1): """Defines the mean-field variational family for GPR. Args: X: (np.ndarray of float32) input training features, with dimension (Nx, D). Z: (np.ndarray of float32) inducing points, with dimension (Nz, D). ls: (float32) length scale parameter. kern_func: (function) kernel function. ridge_factor: (float32) small ridge factor to stabilize Cholesky decomposition mfvi_mixture: (float32) Whether to output variational family with a mixture of MFVI. n_mixture: (int) Number of MFVI mixture component to add. Returns: q_f, q_sig: (ed.RandomVariable) variational family. q_f_mean, q_f_sdev: (tf.Variable) variational parameters for q_f mixture_par_list: (list of tf.Variable) variational parameters for MFVI mixture ('mixture_logits', 'mixture_logits_mfvi_mix', 'mean_mfvi', 'sdev_mfvi') if mfvi_mixture=True, else []. """ X = tf.convert_to_tensor(X) Z = tf.convert_to_tensor(Z) Nx, Nz = X.shape.as_list()[0], Z.shape.as_list()[0] # 1. Prepare constants # compute matrix constants Kxx = kern_func(X, ls=ls) Kxz = kern_func(X, Z, ls=ls) Kzz = kern_func(Z, ls=ls, ridge_factor=ridge_factor) # compute null covariance matrix using Cholesky decomposition Kzz_chol_inv = tf.matrix_inverse(tf.cholesky(Kzz)) Kzz_inv = tf.matmul(Kzz_chol_inv, Kzz_chol_inv, transpose_a=True) Kxz_Kzz_chol_inv = tf.matmul(Kxz, Kzz_chol_inv, transpose_b=True) Kxz_Kzz_inv = tf.matmul(Kxz, Kzz_inv) Sigma_pre = Kxx - tf.matmul( Kxz_Kzz_chol_inv, Kxz_Kzz_chol_inv, transpose_b=True) # 2. Define variational parameters # define mean and variance for sigma q_sig_mean = tf.get_variable(shape=[], name='q_sig_mean') q_sig_sdev = tf.exp(tf.get_variable(shape=[], name='q_sig_sdev')) # define free parameters (i.e. mean and full covariance of f_latent) m = tf.get_variable(shape=[Nz], name='qf_m') s = tf.get_variable( shape=[Nz * (Nz + 1) / 2], # initializer=tf.zeros_initializer(), name='qf_s') L = fill_triangular(s, name='qf_chol') S = tf.matmul(L, L, transpose_b=True) # compute sparse gp variational parameter (i.e. mean and covariance of P(f_obs | f_latent)) qf_mean = tf.tensordot(Kxz_Kzz_inv, m, [[1], [0]], name='qf_mean') qf_cov = ( Sigma_pre + tf.matmul(Kxz_Kzz_inv, tf.matmul(S, Kxz_Kzz_inv, transpose_b=True)) + ridge_factor * tf.eye(Nx, dtype=tf.float32)) # define variational family mixture_par_list = [] if mfvi_mixture: gp_dist = tfd.MultivariateNormalFullCovariance( loc=qf_mean, covariance_matrix=qf_cov) q_f, mixture_par_list = inference_util.make_mfvi_sgp_mixture_family( n_mixture=n_mixture, N=Nx, gp_dist=gp_dist, name='q_f') else: q_f = ed.MultivariateNormalFullCovariance(loc=qf_mean, covariance_matrix=qf_cov, name='q_f') q_sig = ed.Normal(loc=q_sig_mean, scale=q_sig_sdev, name='q_sig') return q_f, q_sig, qf_mean, qf_cov, mixture_par_list
def simple(cfg): ed.Normal(0., 1., name='normal')
def model(cfg): ed.Normal(0., 1., name='normal')
def estimate_gmm_precision(qx_loc, qx_scale, fixed_expression=False, profile_trace=False, tensorboard_summaries=False, batch_size=100, err_scale=0.2, edge_cutoff=0.7): num_samples = qx_loc.shape[0] n = qx_loc.shape[1] batch_size = min(batch_size, n) # [num_samples, n] if fixed_expression: qx = qx_loc else: qx = ed.Normal(loc=qx_loc, scale=qx_scale, name="qx") b = np.mean(qx_loc, axis=0) # variational estimate of w # ------------------------- qw_loc_init = tf.placeholder(tf.float32, (batch_size, n), name="qw_loc_init") qw_loc_init_value = np.zeros((batch_size, n), dtype=np.float32) qw_loc = tf.Variable(qw_loc_init, name="qw_loc") qw = qw_loc # variational estimate of w_scale # ------------------------------- qw_scale_loc_init_value = np.full((batch_size, n), -3.0, dtype=np.float32) qw_scale_loc_init = tf.placeholder(tf.float32, (batch_size, n), name="qw_scale_loc_init") qw_scale_loc = tf.Variable(qw_scale_loc_init, name="qw_scale_loc") qw_scale = tf.nn.softplus(qw_scale_loc) # estimate of b # ------------- by_init_value = np.zeros((batch_size, ), dtype=np.float32) by_init = tf.placeholder(tf.float32, (batch_size, ), name="by_init") by = tf.Variable(by_init, name="by", trainable=False) # [batch_size] # w # - w_scale_prior = tfd.HalfCauchy(loc=0.0, scale=1.0, name="w_scale_prior") # qw_scale can be shrunk all the way to zero, producing NaNs qw_scale = tf.clip_by_value(qw_scale, 1e-4, 10000.0) scale_tau = 0.1 w_prior = tfd.Normal(loc=0.0, scale=qw_scale * scale_tau, name="w_prior") # [n, batch_size] mask_init = tf.placeholder(tf.float32, (batch_size, n), name="mask_init") mask_init_value = np.empty([batch_size, n], dtype=np.float32) mask = tf.Variable(mask_init, name="mask", trainable=False) qw_masked = qw * mask # [batch_size, n] qx_std = qx - b # [num_samples, n] # CONDITIONAL CORRELATION # qxqw = tf.matmul(qx_std, qw_masked, transpose_b=True) # [num_samples, batch_size] # y_dist_loc = qxqw + by # UNCONDITIONAL CORRELATION qxqw = tf.expand_dims(qx_std, 1) * tf.expand_dims( qw_masked, 0) # [num_samples, num_batches, n] y_dist_loc = tf.expand_dims(tf.expand_dims(by, 0), -1) + qxqw # [num_samples, num_batches, n] y_dist = tfd.StudentT(loc=y_dist_loc, scale=err_scale, df=10.0) y_slice_start_init = tf.placeholder( tf.int32, 2, name="y_slice_start_init") # set to [0, j] y_slice_start = tf.Variable(y_slice_start_init, name="y_slice_start", trainable=False) y = tf.slice(qx, y_slice_start, [num_samples, batch_size]) # [num_samples, batch_size] # y = tf.Print(y, [tf.square(y_dist_loc - tf.expand_dims(y, -1))], "y", summarize=16) # objective function # ------------------ y = tf.expand_dims(y, -1) y_log_prob = tf.reduce_sum(y_dist.log_prob(y)) w_log_prob = tf.reduce_sum(w_prior.log_prob(qw_masked)) w_scale_log_prob = tf.reduce_sum(w_scale_prior.log_prob(qw_scale)) log_posterior = y_log_prob + w_log_prob + w_scale_log_prob elbo = log_posterior optimizer = tf.train.AdamOptimizer(learning_rate=1e-2) train = optimizer.minimize(-elbo) sess = tf.Session() niter = 1000 feed_dict = dict() feed_dict[qw_scale_loc_init] = qw_scale_loc_init_value feed_dict[qw_loc_init] = qw_loc_init_value feed_dict[mask_init] = mask_init_value feed_dict[by_init] = by_init_value qx_loc_means = np.mean(qx_loc, axis=0) # check_ops = tf.add_check_numerics_ops() if tensorboard_summaries: # tf.summary.histogram("qw_loc_param", qw_loc) # tf.summary.histogram("qw_scale_param", qw_scale_param) tf.summary.scalar("y_log_prob", y_log_prob) tf.summary.scalar("w_log_prob", w_log_prob) tf.summary.scalar("w_scale_log_prob", w_scale_log_prob) tf.summary.scalar("qw min", tf.reduce_min(qw)) tf.summary.scalar("qw max", tf.reduce_max(qw)) tf.summary.scalar("qw_scale min", tf.reduce_min(qw_scale)) tf.summary.scalar("qw_scale max", tf.reduce_max(qw_scale)) # tf.summary.histogram("qw_scale_loc_param", qw_scale_loc) # tf.summary.histogram("qw_scale_scale_param", qw_scale_scale) edges = dict() count = 0 num_batches = math.ceil(n / batch_size) for batch_num in range(num_batches): # deal with n not necessarily being divisible by batch_size if batch_num == num_batches - 1: start_j = n - batch_size else: start_j = batch_num * batch_size fillmask(mask_init_value, start_j, batch_size) feed_dict[y_slice_start_init] = np.array([0, start_j], dtype=np.int32) for k in range(batch_size): by_init_value[k] = b[start_j + k] sess.run(tf.global_variables_initializer(), feed_dict=feed_dict) # if requested, just benchmark one run of the training operation and return if profile_trace: print("WRITING PROFILING DATA") options = tf.RunOptions(trace_level=tf.RunOptions.FULL_TRACE) run_metadata = tf.RunMetadata() sess.run(train, options=options, run_metadata=run_metadata) fetched_timeline = timeline.Timeline(run_metadata.step_stats) chrome_trace = fetched_timeline.generate_chrome_trace_format() with open('log/timeline.json', 'w') as f: f.write(chrome_trace) break if tensorboard_summaries: train_writer = tf.summary.FileWriter( "log/" + "batch-" + str(batch_num), sess.graph) tf.summary.scalar("elbo", elbo) merged_summary = tf.summary.merge_all() for t in range(niter): # _, elbo_val = sess.run([train, elbo]) # _, entropy_val, log_posterior_val, elbo_val = sess.run([train, entropy, log_posterior, elbo]) _, y_log_prob_value, w_log_prob_value, w_scale_log_prob_value = sess.run( [train, y_log_prob, w_log_prob, w_scale_log_prob]) if t % 100 == 0: # print((t, elbo_val, log_posterior_val, entropy_val)) print((y_log_prob_value, w_log_prob_value, w_scale_log_prob_value)) # print((t, elbo_val)) if tensorboard_summaries: train_writer.add_summary(sess.run(merged_summary), t) print("") print("batch") print(start_j) # qw_scale_min, qw_scale_mean, qw_scale_max = sess.run( # [tf.reduce_min(qw_scale), tf.reduce_mean(qw_scale), tf.reduce_max(qw_scale)]) # print(("qw_scale span", qw_scale_min, qw_scale_mean, qw_scale_max)) # lower_credible = sess.run(qw.distribution.quantile(0.01)) # upper_credible = sess.run(qw.distribution.quantile(0.99)) lower_credible = upper_credible = sess.run(qw) print("credible span") print(np.max(lower_credible)) print(np.min(upper_credible)) print("nonzeros") print(np.sum((lower_credible > edge_cutoff))) print(np.sum((upper_credible < -edge_cutoff))) for k in range(batch_size): neighbors = [] for j in range(n): if lower_credible[k, j] > edge_cutoff or upper_credible[ k, j] < -edge_cutoff: neighbors.append( (j, lower_credible[k, j], upper_credible[k, j])) edges[start_j + k] = neighbors count += 1 if count > 4: break return edges
def estimate_splicing_code( qx_feature_loc, qx_feature_scale, donor_seqs, acceptor_seqs, alt_donor_seqs, alt_acceptor_seqs, donor_cons, acceptor_cons, alt_donor_cons, alt_acceptor_cons, tissues): num_samples = len(tissues) num_tissues = np.max(tissues) tissue_matrix = np.zeros((num_samples, num_tissues), dtype=np.float32) for (i, j) in enumerate(tissues): tissue_matrix[i, j-1] = 1 seqs = np.hstack( [donor_seqs, acceptor_seqs, alt_donor_seqs, alt_acceptor_seqs]) # [ num_features, seq_length, 4 ] cons = np.hstack( [donor_cons, acceptor_cons, alt_donor_cons, alt_acceptor_cons]) seqs = np.concatenate((seqs, np.expand_dims(cons, 2)), axis=2) print(seqs.shape) # sys.exit() num_features = seqs.shape[0] # split into testing and training data shuffled_feature_idxs = np.arange(num_features) np.random.shuffle(shuffled_feature_idxs) seqs_train_len = int(np.floor(0.75 * num_features)) seqs_test_len = num_features - seqs_train_len print(num_features) print(seqs_train_len) print(seqs_test_len) print(qx_feature_loc.shape) print(qx_feature_scale.shape) train_idxs = shuffled_feature_idxs[:seqs_train_len] test_idxs = shuffled_feature_idxs[seqs_train_len:] seqs_train = seqs[train_idxs] seqs_test = seqs[test_idxs] qx_feature_loc_train = qx_feature_loc[:,train_idxs] qx_feature_scale_train = qx_feature_scale[:,train_idxs] qx_feature_loc_test = qx_feature_loc[:,test_idxs] qx_feature_scale_test = qx_feature_scale[:,test_idxs] # invented data to test my intuition # seqs_train = np.array( # [[[1.0, 0.0], # [1.0, 0.0], # [1.0, 0.0], # [1.0, 0.0]], # [[0.0, 1.0], # [0.0, 1.0], # [0.0, 1.0], # [0.0, 1.0]]], # dtype=np.float32) # seqs_test = np.copy(seqs_train) # tissue_matrix = np.array( # [[1], # [1], # [1]], # dtype=np.float32) # qx_feature_loc_train = np.array( # [[-1.0, 1.0], # [-1.1, 1.1], # # [-0.5, 0.5]], # [0.9, -0.9]], # dtype=np.float32) # qx_feature_scale_train = np.array( # [[0.1, 0.1], # [0.1, 0.1], # # [0.1, 0.1]], # [1.0, 1.0]], # dtype=np.float32) # qx_feature_loc_test = np.copy(qx_feature_loc_train) # qx_feature_scale_test = np.copy(qx_feature_scale_train) # num_tissues = 1 # num_samples = qx_feature_loc_train.shape[0] # seqs_train_len = 2 # print(qx_feature_loc_train) # print(qx_feature_scale_train) # sys.exit() keep_prob = tf.placeholder(tf.float32) # model lyr0_input = tf.placeholder(tf.float32, (None, seqs_train.shape[1], seqs_train.shape[2])) # lyr0 = tf.layers.flatten(lyr0_input) lyr0 = lyr0_input print(lyr0) training_flag = tf.placeholder(tf.bool) conv1 = tf.layers.conv1d( inputs=lyr0, filters=32, kernel_size=4, activation=tf.nn.leaky_relu, kernel_regularizer=tf.contrib.layers.l2_regularizer(1e-1), name="conv1") conv1_dropout = tf.layers.dropout( inputs=conv1, rate=0.5, training=training_flag, name="conv1_dropout") pool1 = tf.layers.max_pooling1d( inputs=conv1_dropout, pool_size=2, strides=2, name="pool1") conv2 = tf.layers.conv1d( inputs=pool1, filters=64, kernel_size=4, activation=tf.nn.leaky_relu, kernel_regularizer=tf.contrib.layers.l2_regularizer(1e-1), name="conv2") pool2 = tf.layers.max_pooling1d( inputs=conv2, pool_size=2, strides=2, name="pool2") pool2_flat = tf.layers.flatten( pool2, name="pool2_flat") # pool2_flat = tf.layers.flatten(conv1_dropout) dense1 = tf.layers.dense( inputs=pool2_flat, units=256, activation=tf.nn.leaky_relu, kernel_regularizer=tf.contrib.layers.l2_regularizer(1e-1), name="dense1") # dropout1 = tf.layers.dropout( # inputs=dense1, # rate=0.5, # training=training_flag, # name="dropout1") prediction_layer = tf.layers.dense( # inputs=dropout1, inputs=dense1, units=num_tissues, activation=tf.identity) # [num_features, num_tissues] # TODO: eventually this should be a latent variable # x_scale = 0.2 x_scale_prior = tfd.InverseGamma( concentration=0.001, rate=0.001, name="x_scale_prior") x_scale = tf.nn.softplus(tf.Variable(tf.fill([seqs_train_len], -3.0))) # x_scale = tf.constant(0.1) print(tissue_matrix.shape) x_mu = tf.matmul( tf.constant(tissue_matrix), tf.transpose(prediction_layer)) # [num_samples, num_features] x_prior = tfd.Normal( loc=x_mu, # loc=0.0, scale=x_scale, name="x_prior") # x_prior = tfd.StudentT( # loc=x_mu, # scale=x_scale, # df=2.0, # name="x_prior") x_likelihood_loc = tf.placeholder(tf.float32, [num_samples, None]) x_likelihood_scale = tf.placeholder(tf.float32, [num_samples, None]) x_likelihood = ed.Normal( loc=x_likelihood_loc, scale=x_likelihood_scale, name="x_likelihood") # x = x_likelihood x = tf.Variable( qx_feature_loc_train, # tf.random_normal(qx_feature_loc_train.shape), # tf.zeros(qx_feature_loc_train.shape), # qx_feature_loc_train + qx_feature_scale_train * np.float32(np.random.randn(*qx_feature_loc_train.shape)), # trainable=False, name="x") print("X") print(x) # x_delta = tf.Variable( # # qx_feature_loc_train, # # tf.random_normal(qx_feature_loc_train.shape), # tf.zeros(qx_feature_loc_train.shape), # # trainable=False, # name="x") # x_delta = tf.Print(x_delta, # [tf.reduce_min(x_delta), tf.reduce_max(x_delta)], "x_delta span") # x = tf.Print(x, # [tf.reduce_min(x - qx_feature_loc_train), tf.reduce_max(x - qx_feature_loc_train)], # "x deviance from init") # print(x_prior.log_prob(x)) # print(x_likelihood.log_prob(x)) # sys.exit() # log_prior = tf.reduce_sum(x_prior.log_prob(x_delta)) # log_likelihood = tf.reduce_sum(x_likelihood.distribution.log_prob(x_mu + x_delta)) log_prior = tf.reduce_sum(x_prior.log_prob(x)) + tf.reduce_sum(x_scale_prior.log_prob(x_scale)) log_likelihood = tf.reduce_sum(x_likelihood.distribution.log_prob(x)) log_posterior = log_prior + log_likelihood # log_posterior = x_likelihood.distribution.log_prob(x_mu) sess = tf.Session() optimizer = tf.train.AdamOptimizer(learning_rate=1e-3) train = optimizer.minimize(-log_posterior) sess.run(tf.global_variables_initializer()) # dropout doesn't seem to do much.... train_feed_dict = { training_flag: True, # training_flag: False, lyr0_input: seqs_train, x_likelihood_loc: qx_feature_loc_train, x_likelihood_scale: qx_feature_scale_train } test_feed_dict = { training_flag: False, lyr0_input: seqs_test, x_likelihood_loc: qx_feature_loc_test, x_likelihood_scale: qx_feature_scale_test } n_iter = 1000 mad_sample = median_absolute_deviance_sample(x_mu, x_likelihood) for iter in range(n_iter): # _, log_prior_value, log_likelihood_value = sess.run( # [train, log_prior, log_likelihood], # feed_dict=train_feed_dict) sess.run( [train], feed_dict=train_feed_dict) # print((log_prior_value, log_likelihood_value)) if iter % 100 == 0: # print(iter) # print("x") # print(sess.run(x)) # print("x likelihood") # print(sess.run(x_likelihood.distribution.log_prob(x), feed_dict=train_feed_dict)) # print("x_mu") # print(sess.run(x_mu, feed_dict=train_feed_dict)) # print(sess.run(x_mu, feed_dict=test_feed_dict)) # print("x_mu likelihood") # print(sess.run(x_likelihood.distribution.log_prob(x_mu), feed_dict=train_feed_dict)) # print(sess.run(x_likelihood.distribution.log_prob(x_mu), feed_dict=test_feed_dict)) print(sess.run(tf.reduce_sum(x_likelihood.distribution.log_prob(x_mu)), feed_dict=train_feed_dict)) print(sess.run(tf.reduce_sum(x_likelihood.distribution.log_prob(x_mu)), feed_dict=test_feed_dict)) print(sess.run(tfp.distributions.percentile(x_likelihood.distribution.log_prob(x_mu), 50.0), feed_dict=train_feed_dict)) print(sess.run(tfp.distributions.percentile(x_likelihood.distribution.log_prob(x_mu), 50.0), feed_dict=test_feed_dict)) print(est_expected_median_absolute_deviance(sess, mad_sample, train_feed_dict)) print(est_expected_median_absolute_deviance(sess, mad_sample, test_feed_dict)) print(est_expected_median_absolute_deviance(sess, mad_sample, train_feed_dict)) print(est_expected_median_absolute_deviance(sess, mad_sample, test_feed_dict))
def model(): return ed.Normal(0., 1., name="x")
def estimate_splicing_code_from_kmers( qx_feature_loc, qx_feature_scale, kmer_usage_matrix, tissues): num_samples = len(tissues) num_tissues = np.max(tissues) tissue_matrix = np.zeros((num_samples, num_tissues), dtype=np.float32) for (i, j) in enumerate(tissues): tissue_matrix[i, j-1] = 1 num_features = kmer_usage_matrix.shape[0] num_kmers = kmer_usage_matrix.shape[1] # split into testing and training data shuffled_feature_idxs = np.arange(num_features) np.random.shuffle(shuffled_feature_idxs) seqs_train_len = int(np.floor(0.75 * num_features)) seqs_test_len = num_features - seqs_train_len train_idxs = shuffled_feature_idxs[:seqs_train_len] test_idxs = shuffled_feature_idxs[seqs_train_len:] kmer_usage_matrix_train = kmer_usage_matrix[train_idxs] kmer_usage_matrix_test = kmer_usage_matrix[test_idxs] qx_feature_loc_train = qx_feature_loc[:,train_idxs] qx_feature_scale_train = qx_feature_scale[:,train_idxs] qx_feature_loc_test = qx_feature_loc[:,test_idxs] qx_feature_scale_test = qx_feature_scale[:,test_idxs] W0 = tf.Variable( tf.random_normal([num_kmers, 1], mean=0.0, stddev=0.01), name="W0") # B = tf.Variable( # tf.random_normal([1, num_tissues], mean=0.0, stddev=0.01), # name="B") W_prior = tfd.Normal( loc=0.0, scale=0.1, name="W_prior") W = tf.Variable( tf.random_normal([num_kmers, num_tissues], mean=0.0, stddev=0.01), name="W") X = tf.placeholder(tf.float32, shape=(None, num_kmers), name="X") # Y = B + tf.matmul(X, W0 + W) Y = tf.matmul(X, W0 + W) print(Y) x_scale_prior = tfd.InverseGamma( concentration=0.001, rate=0.001, name="x_scale_prior") x_scale = tf.nn.softplus(tf.Variable(tf.fill([seqs_train_len], -3.0))) x_mu = tf.matmul( tf.constant(tissue_matrix), tf.transpose(Y)) # [num_samples, num_features] print(x_mu) x_prior = tfd.Normal( loc=x_mu, scale=x_scale, name="x_prior") x_likelihood_loc = tf.placeholder(tf.float32, [num_samples, None]) x_likelihood_scale = tf.placeholder(tf.float32, [num_samples, None]) x_likelihood = ed.Normal( loc=x_likelihood_loc, scale=x_likelihood_scale, name="x_likelihood") # Using likelihood x = tf.Variable( qx_feature_loc_train, name="x") # x = x_likelihood_loc # x = x_mu log_prior = \ tf.reduce_sum(x_prior.log_prob(x)) + \ tf.reduce_sum(x_scale_prior.log_prob(x_scale)) + \ tf.reduce_sum(W_prior.log_prob(W)) log_likelihood = tf.reduce_sum(x_likelihood.distribution.log_prob(x)) log_posterior = log_prior + log_likelihood # Using point estimates # x = qx_feature_loc_train # log_prior = \ # tf.reduce_sum(x_prior.log_prob(x)) + \ # tf.reduce_sum(x_scale_prior.log_prob(x_scale)) + \ # tf.reduce_sum(W_prior.log_prob(W)) # log_posterior = log_prior sess = tf.Session() optimizer = tf.train.AdamOptimizer(learning_rate=1e-3) train = optimizer.minimize(-log_posterior) sess.run(tf.global_variables_initializer()) train_feed_dict = { X: kmer_usage_matrix_train, x_likelihood_loc: qx_feature_loc_train, x_likelihood_scale: qx_feature_scale_train } test_feed_dict = { X: kmer_usage_matrix_test, x_likelihood_loc: qx_feature_loc_test, x_likelihood_scale: qx_feature_scale_test } n_iter = 1000 mad_sample = median_absolute_deviance_sample(x_mu, x_likelihood) for iter in range(n_iter): # _, log_prior_value, log_likelihood_value = sess.run( # [train, log_prior, log_likelihood], # feed_dict=train_feed_dict) sess.run( [train], feed_dict=train_feed_dict) # print((log_prior_value, log_likelihood_value)) if iter % 100 == 0: print(iter) print(est_expected_median_absolute_deviance(sess, mad_sample, train_feed_dict)) print(est_expected_median_absolute_deviance(sess, mad_sample, test_feed_dict)) print(sess.run(tf.reduce_min(x_scale))) print(sess.run(tf.reduce_max(x_scale))) # print(sess.run(log_prior, feed_dict=train_feed_dict)) # print(sess.run(log_likelihood, feed_dict=train_feed_dict)) return sess.run(W0), sess.run(W)
def model_builtin(): return ed.Normal(1., 0.1, name="x")
def neals_funnel(): x1 = ed.Normal(loc=0., scale=3., name='x1') x2 = ed.Normal(loc=0., scale=tf.exp(x1 / 2.), name='x2') return x1, x2
def normal_with_unknown_mean(): loc = ed.Normal(loc=0., scale=1., name="loc") x = ed.Normal(loc=loc, scale=0.5, name="x") return x
def latent_normal(shape, mean, stdev): if not isinstance(mean, list): mean = tf.constant(mean, shape=shape) stdev = tf.constant(stdev, shape=shape) prior = ed.Normal(loc=mean, scale=stdev) return prior
def variational_model(qw_mean, qw_stddv, qb_mean, qb_stddv): qw = ed.Normal(loc=qw_mean, scale=qw_stddv, name="qw") qb = ed.Normal(loc=qb_mean, scale=qb_stddv, name="qb") return qw, qb
def sparse_conditional_weight(X, parent_name, child_names, base_weights=None, temp=None, kernel_func=gp.rbf, link_func=sparse_softmax, ridge_factor=1e-3, **kernel_kwargs): """Defines the conditional distribution of model given parent in the tail-free tree. Defines the feature-dependent conditional distribution of model as: w(model | x ) = link_func( w_model(x) ) w_model(x) ~ gaussian_process[0, k_w(x)] Args: X: (np.ndarray) Input features of dimension (N, D) parent_name: (str) The name of the mother node. child_names: (list of str) A list of model names for each child in the family. base_weights: (tf.Tensor of float32 or None) base logits to be passed to link_func corresponding to each child. It has dimension (batch_size, num_obs, num_model). temp: (tf.Tensor of float32 or None) temperature parameter corresponding to the parent node to be passed to link_func, it has dimension (batch_size, ). kernel_func: (function) kernel function for base ensemble, with args (X, **kwargs). link_func: (function) a link function that transforms the unnormalized base ensemble weights to a K-dimension simplex. This function has args (logits, temp) ridge_factor: (float32) ridge factor to stabilize Cholesky decomposition. **kernel_kwargs: Additional parameters to pass to kernel_func through gp.prior. Returns: (list of tf.Tensor) List normalized ensemble weights, dimension (N, M) with dtype float32. """ num_model = len(child_names) # define random variables: temperature and raw GP weights if not isinstance(temp, tf.Tensor): temp = ed.Normal(loc=_TEMP_PRIOR_MEAN, scale=_TEMP_PRIOR_SDEV, name='{}_{}'.format(TEMP_NAME_PREFIX, parent_name)) if not isinstance(base_weights, tf.Tensor): base_weights = tf.stack([ gp.prior(X, kernel_func=kernel_func, ridge_factor=ridge_factor, name='{}_{}'.format(BASE_WEIGHT_NAME_PREFIX, model_name), **kernel_kwargs) for model_name in child_names ], axis=-1) # define transformed random variables weight_transformed = link_func(base_weights, tf.exp(temp), name='{}_{}'.format(COND_WEIGHT_NAME_PREFIX, parent_name)) # split into list then return # TODO(jereliu): Ugly code. weight_transformed = tf.split(weight_transformed, num_model, axis=-1) weight_transformed = [ tf.squeeze(weight, axis=-1) for weight in weight_transformed ] return weight_transformed
def normal_with_unknown_mean(): loc = ed.Normal(loc=0., scale=1., name="loc") x = ed.Normal(loc=loc, scale=0.5, sample_shape=5) return x
""" import numpy as np x_train = np.linspace(-3, 3, num=50) y_train = np.cos(x_train) + np.random.normal(0, 0.1, size=50) x_train = x_train.astype(np.float32).reshape((50, 1)) y_train = y_train.astype(np.float32).reshape((50, 1)) import tensorflow as tf import tensorflow_probability as tfp #from edward.models import Normal from tensorflow_probability import edward2 as ed W_0 = ed.Normal(loc=tf.zeros([1, 2]), scale=tf.ones([1, 2])) W_1 = ed.Normal(loc=tf.zeros([2, 1]), scale=tf.ones([2, 1])) b_0 = ed.Normal(loc=tf.zeros(2), scale=tf.ones(2)) b_1 = ed.Normal(loc=tf.zeros(1), scale=tf.ones(1)) x = x_train y = ed.Normal(loc=tf.matmul(tf.tanh(tf.matmul(x, W_0) + b_0), W_1) + b_1, scale=0.1) # #qW_0 = ed.Normal(loc=tf.get_variable("qW_0/loc", [1, 2]), # scale=tf.nn.softplus(tf.get_variable("qW_0/scale", [1, 2]))) #qW_1 = ed.Normal(loc=tf.get_variable("qW_1/loc", [2, 1]), # scale=tf.nn.softplus(tf.get_variable("qW_1/scale", [2, 1]))) #qb_0 = ed.Normal(loc=tf.get_variable("qb_0/loc", [2]), # scale=tf.nn.softplus(tf.get_variable("qb_0/scale", [2])))