def test_missing_data(self): from GPy import kern from GPy.models.bayesian_gplvm_minibatch import BayesianGPLVMMiniBatch from GPy.examples.dimensionality_reduction import _simulate_matern D1, D2, D3, N, num_inducing, Q = 13, 5, 8, 400, 3, 4 _, _, Ylist = _simulate_matern(D1, D2, D3, N, num_inducing, False) Y = Ylist[0] inan = np.random.binomial(1, .9, size=Y.shape).astype(bool) # 80% missing data Ymissing = Y.copy() Ymissing[inan] = np.nan k = kern.Linear(Q, ARD=True) + kern.White(Q, np.exp(-2)) # + kern.bias(Q) m = BayesianGPLVMMiniBatch(Ymissing, Q, init="random", num_inducing=num_inducing, kernel=k, missing_data=True) assert(m.checkgrad()) mul, varl = m.predict(m.X) k = kern.RBF(Q, ARD=True) + kern.White(Q, np.exp(-2)) # + kern.bias(Q) m2 = BayesianGPLVMMiniBatch(Ymissing, Q, init="random", num_inducing=num_inducing, kernel=k, missing_data=True) assert(m.checkgrad()) m2.kern.rbf.lengthscale[:] = 1e6 m2.X[:] = m.X.param_array m2.likelihood[:] = m.likelihood[:] m2.kern.white[:] = m.kern.white[:] mu, var = m.predict(m.X) np.testing.assert_allclose(mul, mu) np.testing.assert_allclose(varl, var) q50 = m.predict_quantiles(m.X, (50,)) np.testing.assert_allclose(mul, q50[0])
def test_missing_data(self): from GPy import kern from GPy.models.bayesian_gplvm_minibatch import BayesianGPLVMMiniBatch from GPy.examples.dimensionality_reduction import _simulate_matern D1, D2, D3, N, num_inducing, Q = 13, 5, 8, 400, 3, 4 _, _, Ylist = _simulate_matern(D1, D2, D3, N, num_inducing, False) Y = Ylist[0] inan = np.random.binomial(1, .9, size=Y.shape).astype( bool) # 80% missing data Ymissing = Y.copy() Ymissing[inan] = np.nan k = kern.Linear(Q, ARD=True) + kern.White(Q, np.exp(-2)) # + kern.bias(Q) m = BayesianGPLVMMiniBatch(Ymissing, Q, init="random", num_inducing=num_inducing, kernel=k, missing_data=True) assert (m.checkgrad()) k = kern.RBF(Q, ARD=True) + kern.White(Q, np.exp(-2)) # + kern.bias(Q) m = BayesianGPLVMMiniBatch(Ymissing, Q, init="random", num_inducing=num_inducing, kernel=k, missing_data=True) assert (m.checkgrad())
def mrd_simulation_missing_data(optimize=True, verbose=True, plot=True, plot_sim=True, **kw): from GPy import kern from GPy.models import MRD D1, D2, D3, N, num_inducing, Q = 60, 20, 36, 60, 6, 5 _, _, Ylist = _simulate_matern(D1, D2, D3, N, num_inducing, plot_sim) k = kern.Linear(Q, ARD=True) + kern.White(Q, variance=1e-4) inanlist = [] for Y in Ylist: inan = _np.random.binomial(1, .6, size=Y.shape).astype(bool) inanlist.append(inan) Y[inan] = _np.nan m = MRD(Ylist, input_dim=Q, num_inducing=num_inducing, kernel=k, inference_method=None, initx="random", initz='permute', **kw) if optimize: print("Optimizing Model:") m.optimize('bfgs', messages=verbose, max_iters=8e3, gtol=.1) if plot: m.X.plot("MRD Latent Space 1D") m.plot_scales() return m
def mrd_simulation(optimize=True, verbose=True, plot=True, plot_sim=True, **kw): from GPy import kern from GPy.models import MRD D1, D2, D3, N, num_inducing, Q = 60, 20, 36, 60, 6, 5 _, _, Ylist = _simulate_sincos(D1, D2, D3, N, num_inducing, plot_sim) k = kern.Linear(Q, ARD=True) + kern.White(Q, variance=1e-4) m = MRD(Ylist, input_dim=Q, num_inducing=num_inducing, kernel=k, initx="PCA_concat", initz='permute', **kw) m['.*noise'] = [Y.var() / 40. for Y in Ylist] if optimize: print("Optimizing Model:") m.optimize(messages=verbose, max_iters=8e3) if plot: m.X.plot("MRD Latent Space 1D") m.plot_scales() return m
def optimize(self, views, latent_dims=7, messages=True, max_iters=8e3, save_model=False): if (self.kernel): if (self.kernel == 'rbf'): print("Chosen kernel: RBF") print("Chosen lengthscale: " + self.lengthscale) k = kern.RBF(latent_dims, ARD=True, lengthscale=self.lengthscale) + kern.White( latent_dims, variance=1e-4) + GPy.kern.Bias(latent_dims) elif (self.kernel == 'linear'): print("Chosen kernel: Linear") k = kern.Linear(latent_dims, ARD=True) + kern.White( latent_dims, variance=1e-4) + GPy.kern.Bias(latent_dims) else: print("No kernel or chosen - using RBF with lengthscale 10...") k = kern.RBF(latent_dims, ARD=True, lengthscale=10) + kern.White( latent_dims, variance=1e-4) + GPy.kern.Bias(latent_dims) print("Number of inducing inputs: " + str(self.num_inducing)) m = MRD(views, input_dim=latent_dims, num_inducing=self.num_inducing, kernel=k, normalizer=False) print("Optimizing Model...") m.optimize(messages=True, max_iters=8e3) if (save_model): pickle.dump(m, open(save_model, "wb"), protocol=2) self.model = m
def latent_functions_prior(Q, lenghtscale=None, variance=None, input_dim=None): if lenghtscale is None: lenghtscale = np.random.rand(Q) else: lenghtscale = lenghtscale if variance is None: variance = np.random.rand(Q) else: variance = variance kern_list = [] for q in range(Q): kern_q = kern.RBF(input_dim=input_dim, lengthscale=lenghtscale[q], variance=variance[q], name='rbf') + kern.White(input_dim, variance=1e-8) # \ kern_q.name = 'kern_q' + str(q) kern_list.append(kern_q) return kern_list
def main(): sample_info = pd.read_csv('MOB_sample_info.csv', index_col=0) df = pd.read_csv('data/Rep11_MOB_0.csv', index_col=0) df = df.loc[sample_info.index] df = df.T[df.sum(0) >= 3].T # Filter practically unobserved genes dfm = NaiveDE.stabilize(df.T).T res = NaiveDE.regress_out(sample_info, dfm.T, 'np.log(total_counts)').T X = sample_info[['x', 'y']].values times = pd.DataFrame(columns=['N', 'time']) Ns = [50, 100, 200, 300, 500, 750, 1000, 2000] j = 0 for N in Ns: for i in range(5): Y = res.sample(N, axis=1).values.T t0 = time() m = GPclust.MOHGP(X=X, Y=Y, kernF=kern.RBF(2) + kern.Bias(2), kernY=kern.RBF(1) + kern.White(1), K=5, prior_Z='DP') m.hyperparam_opt_args['messages'] = False m.optimize(step_length=0.1, verbose=False, maxiter=2000) times.loc[j] = [N, time() - t0] print(times.loc[j]) j += 1 times.to_csv('AEH_times.csv')
def WN(): return _Gk.White(1) def C(): return _Gk.Bias(1)