def crossvalidate(self, y, alphas, n_splits=10): """ lmmlasso cross-validation to get optimal alpha alphas = list of alphas to perform cross-validation over y = phenotype """ lasso = lmmlasso.LmmLasso(warm_start=True, fit_intercept=False, tol=0.5) X = self.E K = self.K assert K is not None, 'no kinship matrix defined' MSE_train, MSE_test, W_nonzero, rsquared = lmmlasso.runCrossValidation( lasso, self.E, y, alphas, n_splits=n_splits, K=K, verbose=True) train_inter = sp.interpolate.UnivariateSpline( x=alphas, y=(MSE_train.mean(axis=0))).derivative( n=2) #Interpolating the values for alphas within the range test_inter = sp.interpolate.UnivariateSpline( x=alphas, y=(MSE_test.mean(axis=0))).derivative(n=2) alphas_inter = (sp.linspace(min(alphas), max(alphas), 100)) idx_train = sp.argmin(train_inter(alphas_inter)) # :/ idx_test = sp.argmin(test_inter(alphas_inter)) # :/ alpha_cv = (float(alphas_inter[idx_train]) + float(alphas_inter[idx_test])) / 2 self.alpha = alpha_cv return self.alpha
def fitmodel(self, y, alpha=None, tol=0.05): """ fit the model """ if alpha is None: assert self.alpha is not None, 'Set an alpha value' else: self.alpha = alpha assert y.shape[0] == self.G.shape[ 0], 'No. of observations does not match' lasso = lmmlasso.LmmLasso(warm_start=True, fit_intercept=False, tol=tol) X = self.E lasso.set_params(alpha=self.alpha) assert self.K is not None, 'Include a Kinship matrix' lasso = lasso.fit(X, y, K=self.K) self.feweights = lasso.coef_ #fixed effect weights self.feweights = np.reshape(self.feweights, (-1, 1)) self.lasso = lasso
#Running LMMLASSO alphas = 2.**(sp.linspace(-2,10,10)) #list of alphas to test n_splits=10 N = X.shape[0] kf = KFold(n_splits,shuffle=True,random_state=None) n_alphas = len(alphas) MSE_train = sp.zeros((n_splits,n_alphas)) MSE_test = sp.zeros((n_splits,n_alphas)) W_nonzero = sp.zeros((n_splits,n_alphas)) kf.get_n_splits(X) os.chdir("/srv/uom-data1-q.unimelb.edu.au/6300-afournier/home/student.unimelb.edu.au/andhikap/Clim_GWAS/Clim_GWAS_2") import lmmlasso lasso = lmmlasso.LmmLasso(warm_start=True,fit_intercept=False,tol=0.5) #note the tolerance value MSE_train,MSE_test,W_nonzero = lmmlasso.runCrossValidation(lasso,X,y,alphas,n_splits=10,K=K,verbose=True) MSE_train_inter=sp.interpolate.UnivariateSpline(x=alphas, y=(MSE_train.mean(axis=0))).derivative(n=2) #something about the rotation here is different from the original script MSE_test_inter=sp.interpolate.UnivariateSpline(x=alphas, y=(MSE_test.mean(axis=0))).derivative(n=2) alphas_inter = 2.**(sp.linspace(2,12,100)) idx_train = sp.argmin(MSE_train_inter(alphas_inter)) idx_test = sp.argmin(MSE_test_inter(alphas_inter)) alpha_cv = (float(alphas_inter[idx_train])+float(alphas_inter[idx_test]))/2 import pylab as pl pl.figure(figsize=[20,4]) pls = pl.subplot(1,3,1) pls.plot(sp.log2(alphas),MSE_train.mean(axis=0),linewidth=2) pl.axvline(sp.log2(alpha_cv),color='r')
which='both', bottom=False, top=False, labelbottom=False) plt.ylabel('-log10(p-value)') plt.xlabel('SNPs') plt.title('GWAS using LMM; F-test; Unique SNPs only') plt.show() #Running LMM lasso (100k SNPs + covariance matrix) import lmmlasso #Cross validation to get the optimal parameters alphas = 2.**(sp.linspace(2, 12, 10)) alphas = alphas[::-1] from lmmlasso import runCrossValidation lasso = lmmlasso.LmmLasso( ) #No need to set parameters because these will be decided through cross-validation [may need to set tolerance higher, use tol=0.05 as a baseline] MSE_train, MSE_test, W_nonzero = lmmlasso.runCrossValidation(lasso, SNP_data, Pheno_data, alphas, n_splits=10, K=K_data, verbose=True) #Then from Alex's code.. import pylab as pl MSE_train_inter = sp.interpolate.UnivariateSpline(x=np.flip(alphas, axis=0), y=np.flip( MSE_train.mean(axis=0), axis=0)).derivative(n=2)
else: print( str(n_snps_start - SNP_in) + '/' + str(n_snps_start) + ' SNPs did not meet the MAF threshold') print( str(SNP_data.shape[1]) + '/' + str(SNP_in) + ' SNPs included in this GWAS') END_load = datetime.now() print('Files were loaded in ' + str(END_load - START)) # running cross-validation alphas = 2.**(sp.linspace(2, 12, 10)) alphas = alphas[::-1] # running LMM-Lasso lasso = lmmlasso.LmmLasso(warm_start=True, fit_intercept=False) MSE_train, MSE_test, W_nonzero = lmmlasso.runCrossValidation(lasso, SNP_data, Pheno_data, alphas, n_folds=10, K=K_data, verbose=True) # the dirty version #idx = sp.argmin(MSE_test.mean(axis=0)) #alpha_cv = alphas[idx] #OR # the verion with secondary derivatives MSE_train_inter = sp.interpolate.UnivariateSpline( x=np.flip(alphas, axis=0), y=np.flip(MSE_train.mean(axis=0),