Пример #1
0
    def crossvalidate(self, y, alphas, n_splits=10):
        """
		lmmlasso cross-validation to get optimal alpha
		alphas = list of alphas to perform cross-validation over
		y = phenotype
		"""
        lasso = lmmlasso.LmmLasso(warm_start=True,
                                  fit_intercept=False,
                                  tol=0.5)
        X = self.E
        K = self.K

        assert K is not None, 'no kinship matrix defined'
        MSE_train, MSE_test, W_nonzero, rsquared = lmmlasso.runCrossValidation(
            lasso, self.E, y, alphas, n_splits=n_splits, K=K, verbose=True)
        train_inter = sp.interpolate.UnivariateSpline(
            x=alphas, y=(MSE_train.mean(axis=0))).derivative(
                n=2)  #Interpolating the values for alphas within the range
        test_inter = sp.interpolate.UnivariateSpline(
            x=alphas, y=(MSE_test.mean(axis=0))).derivative(n=2)
        alphas_inter = (sp.linspace(min(alphas), max(alphas), 100))
        idx_train = sp.argmin(train_inter(alphas_inter))  # :/
        idx_test = sp.argmin(test_inter(alphas_inter))  # :/
        alpha_cv = (float(alphas_inter[idx_train]) +
                    float(alphas_inter[idx_test])) / 2
        self.alpha = alpha_cv
        return self.alpha
Пример #2
0
    def fitmodel(self, y, alpha=None, tol=0.05):
        """
		fit the model
		"""
        if alpha is None:
            assert self.alpha is not None, 'Set an alpha value'
        else:
            self.alpha = alpha
        assert y.shape[0] == self.G.shape[
            0], 'No. of observations does not match'
        lasso = lmmlasso.LmmLasso(warm_start=True,
                                  fit_intercept=False,
                                  tol=tol)
        X = self.E
        lasso.set_params(alpha=self.alpha)
        assert self.K is not None, 'Include a Kinship matrix'
        lasso = lasso.fit(X, y, K=self.K)
        self.feweights = lasso.coef_  #fixed effect weights
        self.feweights = np.reshape(self.feweights, (-1, 1))
        self.lasso = lasso
Пример #3
0
	#Running LMMLASSO
	alphas = 2.**(sp.linspace(-2,10,10)) #list of alphas to test
	n_splits=10
	N = X.shape[0]
	kf = KFold(n_splits,shuffle=True,random_state=None)
	n_alphas = len(alphas)
	MSE_train = sp.zeros((n_splits,n_alphas))
	MSE_test  = sp.zeros((n_splits,n_alphas))
	W_nonzero = sp.zeros((n_splits,n_alphas))
	kf.get_n_splits(X)

	os.chdir("/srv/uom-data1-q.unimelb.edu.au/6300-afournier/home/student.unimelb.edu.au/andhikap/Clim_GWAS/Clim_GWAS_2")

	import lmmlasso

	lasso = lmmlasso.LmmLasso(warm_start=True,fit_intercept=False,tol=0.5) #note the tolerance value

	MSE_train,MSE_test,W_nonzero = lmmlasso.runCrossValidation(lasso,X,y,alphas,n_splits=10,K=K,verbose=True)
	MSE_train_inter=sp.interpolate.UnivariateSpline(x=alphas, y=(MSE_train.mean(axis=0))).derivative(n=2) #something about the rotation here is different from the original script
	MSE_test_inter=sp.interpolate.UnivariateSpline(x=alphas, y=(MSE_test.mean(axis=0))).derivative(n=2)

	alphas_inter = 2.**(sp.linspace(2,12,100))
	idx_train = sp.argmin(MSE_train_inter(alphas_inter)) 
	idx_test = sp.argmin(MSE_test_inter(alphas_inter))
	alpha_cv = (float(alphas_inter[idx_train])+float(alphas_inter[idx_test]))/2

	import pylab as pl
	pl.figure(figsize=[20,4])
	pls = pl.subplot(1,3,1)
	pls.plot(sp.log2(alphas),MSE_train.mean(axis=0),linewidth=2)
	pl.axvline(sp.log2(alpha_cv),color='r')
Пример #4
0
                which='both',
                bottom=False,
                top=False,
                labelbottom=False)
plt.ylabel('-log10(p-value)')
plt.xlabel('SNPs')
plt.title('GWAS using LMM; F-test; Unique SNPs only')
plt.show()

#Running LMM lasso (100k SNPs + covariance matrix)
import lmmlasso
#Cross validation to get the optimal parameters
alphas = 2.**(sp.linspace(2, 12, 10))
alphas = alphas[::-1]
from lmmlasso import runCrossValidation
lasso = lmmlasso.LmmLasso(
)  #No need to set parameters because these will be decided through cross-validation [may need to set tolerance higher, use tol=0.05 as a baseline]
MSE_train, MSE_test, W_nonzero = lmmlasso.runCrossValidation(lasso,
                                                             SNP_data,
                                                             Pheno_data,
                                                             alphas,
                                                             n_splits=10,
                                                             K=K_data,
                                                             verbose=True)

#Then from Alex's code..
import pylab as pl

MSE_train_inter = sp.interpolate.UnivariateSpline(x=np.flip(alphas, axis=0),
                                                  y=np.flip(
                                                      MSE_train.mean(axis=0),
                                                      axis=0)).derivative(n=2)
Пример #5
0
    else:
        print(
            str(n_snps_start - SNP_in) + '/' + str(n_snps_start) +
            ' SNPs did not meet the MAF threshold')
        print(
            str(SNP_data.shape[1]) + '/' + str(SNP_in) +
            ' SNPs included in this GWAS')
        END_load = datetime.now()
        print('Files were loaded in ' + str(END_load - START))

    # running cross-validation
    alphas = 2.**(sp.linspace(2, 12, 10))
    alphas = alphas[::-1]

    # running LMM-Lasso
    lasso = lmmlasso.LmmLasso(warm_start=True, fit_intercept=False)
    MSE_train, MSE_test, W_nonzero = lmmlasso.runCrossValidation(lasso,
                                                                 SNP_data,
                                                                 Pheno_data,
                                                                 alphas,
                                                                 n_folds=10,
                                                                 K=K_data,
                                                                 verbose=True)

    # the dirty version
    #idx = sp.argmin(MSE_test.mean(axis=0))
    #alpha_cv = alphas[idx]
    #OR
    # the verion with secondary derivatives
    MSE_train_inter = sp.interpolate.UnivariateSpline(
        x=np.flip(alphas, axis=0), y=np.flip(MSE_train.mean(axis=0),