def testLassoBackgroundModel(estimator,Xfg,Xbg,y,use_Kgeno=True,**kwargs): """ run a linear mixed model on the foreground SNPs (Xfg), while estimating SNPs for the background covariance matrix via the Lasso usting the background SNPs(Xbg). estimator : (Lmm-)Lasso model Xbg : SNP data for the background model Xfg : SNP data for the foreground model y : phenotype vector use_Kgene : if True (default), background covariance matrix is estimated and used. """ if use_Kgeno: Kbg = compute_linear_kernel(Xbg) estimator.fit(Xfg,y,Kbg) iactive = estimator.coef_!=0 if iactive.any(): Kfg = compute_linear_kernel(Xbg,iactive) vd = VarianceDecomposition(y) vd.addRandomEffect(is_noise=True) vd.addRandomEffect(Kbg) vd.addRandomEffect(Kfg) vd.optimize() K = vd.gp.getCovar().K() else: K = Kbg else: estimator.fit(Xbg,y) idx = estimator.coef_!=0 K = compute_linear_kernel(Xbg,idx) lmm = qtl.test_lmm(Xfg,y,K=K,**kwargs) pv = lmm.getPv() return pv
def testLassoBackgroundModel(estimator, Xfg, Xbg, y, use_Kgeno=True, **kwargs): """ run a linear mixed model on the foreground SNPs (Xfg), while estimating SNPs for the background covariance matrix via the Lasso usting the background SNPs(Xbg). estimator : (Lmm-)Lasso model Xbg : SNP data for the background model Xfg : SNP data for the foreground model y : phenotype vector use_Kgene : if True (default), background covariance matrix is estimated and used. """ if use_Kgeno: Kbg = compute_linear_kernel(Xbg) estimator.fit(Xfg, y, Kbg) iactive = estimator.coef_ != 0 if iactive.any(): Kfg = compute_linear_kernel(Xbg, iactive) vd = VarianceDecomposition(y) vd.addRandomEffect(is_noise=True) vd.addRandomEffect(Kbg) vd.addRandomEffect(Kfg) vd.optimize() K = vd.gp.getCovar().K() else: K = Kbg else: estimator.fit(Xbg, y) idx = estimator.coef_ != 0 K = compute_linear_kernel(Xbg, idx) lmm = qtl.test_lmm(Xfg, y, K=K, **kwargs) pv = lmm.getPv() return pv
def fit(self, X, y, K, standardize=False, verbose=False,**lasso_args): """ fitting the model X: SNP data y: phenotype data K: backgroundcovariance matrix standardize: if True, genotypes and phenotypes are standardized """ if y.ndim == 2: assert y.shape[1]==1, 'Only one phenotype can be processed at at time.' y = y.flatten() time_start = time.time() [n_s, n_f] = X.shape assert X.shape[0] == y.shape[0], 'dimensions do not match' assert K.shape[0] == K.shape[1], 'dimensions do not match' assert K.shape[0] == X.shape[0], 'dimensions do not match' """ standardizing genotypes and phenotypes """ if standardize: X -= X.mean(axis=0) X /= X.std(axis=0) y -= y.mean(axis=0) y /= y.std(axis=0) """ training null model """ vd = VarianceDecomposition(y) vd.addRandomEffect(is_noise=True) vd.addRandomEffect(K) vd.optimize() varComps = vd.getVarianceComps() delta0 = varComps[0,0]/varComps.sum() self.varComps = varComps S,U = LA.eigh(K) """ rotating data """ Sdi = 1. / (S + delta0) Sdi_sqrt = SP.sqrt(Sdi) SUX = SP.dot(U.T, X) SUX = SUX * SP.tile(Sdi_sqrt, (n_f, 1)).T SUy = SP.dot(U.T, y) SUy = Sdi_sqrt * SUy """ fitting lasso """ super(LmmLasso, self).fit(SUX, SUy, **lasso_args) yhat = super(LmmLasso, self).predict(X) self.w_ridge = LA.solve(K + delta0 * SP.eye(n_s), y - yhat) time_end = time.time() time_diff = time_end - time_start if verbose: print '... finished in %.2fs'%(time_diff) return self
def fit(self, X, y, K, standardize=False, verbose=False, **lasso_args): """ fitting the model X: SNP data y: phenotype data K: backgroundcovariance matrix standardize: if True, genotypes and phenotypes are standardized """ if y.ndim == 2: assert y.shape[ 1] == 1, 'Only one phenotype can be processed at at time.' y = y.flatten() time_start = time.time() [n_s, n_f] = X.shape assert X.shape[0] == y.shape[0], 'dimensions do not match' assert K.shape[0] == K.shape[1], 'dimensions do not match' assert K.shape[0] == X.shape[0], 'dimensions do not match' """ standardizing genotypes and phenotypes """ if standardize: X -= X.mean(axis=0) X /= X.std(axis=0) y -= y.mean(axis=0) y /= y.std(axis=0) """ training null model """ vd = VarianceDecomposition(y) vd.addRandomEffect(is_noise=True) vd.addRandomEffect(K) vd.optimize() varComps = vd.getVarianceComps() delta0 = varComps[0, 0] / varComps.sum() self.varComps = varComps S, U = LA.eigh(K) """ rotating data """ Sdi = 1. / (S + delta0) Sdi_sqrt = SP.sqrt(Sdi) SUX = SP.dot(U.T, X) SUX = SUX * SP.tile(Sdi_sqrt, (n_f, 1)).T SUy = SP.dot(U.T, y) SUy = Sdi_sqrt * SUy """ fitting lasso """ super(LmmLasso, self).fit(SUX, SUy, **lasso_args) yhat = super(LmmLasso, self).predict(X) self.w_ridge = LA.solve(K + delta0 * SP.eye(n_s), y - yhat) time_end = time.time() time_diff = time_end - time_start if verbose: print '... finished in %.2fs' % (time_diff) return self