Пример #1
0
 def __init__(self,
              snps_test,
              phenotype,
              K=None,
              snps_K=None,
              covariates=None,
              h2=None,
              interact_with_snp=None,
              nGridH2=10,
              standardizer=None,
              add_bias=True,
              normalize_K=True,
              blocksize=10000):
     if standardizer is None:
         standardizer = pysnptools.standardizer.Unit()
     self.standardizer = standardizer
     self.phenotype = GWAS.check_pheno_format(phenotype=phenotype)
     self.covariates = GWAS.check_pheno_format(phenotype=covariates)
     self.snps_test = snps_test
     self.snps_K = snps_K
     self.K = K
     self.linreg = False
     if (self.K is None) and (self.snps_K is None):
         G = np.zeros((self.sample_count, 0))
         self.linreg = True
     elif self.K is None:
         G = None
         self.K = snps_K.kernel(standardizer=standardizer,
                                blocksize=blocksize)
         if normalize_K:
             self.K /= self.K.diagonal().mean()
     elif snps_K is None:
         G = None
         if normalize_K:
             self.K = self.K / self.K.diagonal().mean()
     else:
         raise NotImplementedError("either K or snps_K has to be None")
     if add_bias and (self.covariates is None):
         pass  # this case is treated in LMM()
     elif add_bias:
         bias = pd.Series(np.ones((self.covariates.shape[0])),
                          index=self.covariates.index)
         self.covariates['bias'] = bias
     elif (add_bias == False) and self.covariates is None:
         raise NotImplementedError(
             "currently a model with neither bias term, nor covariates is not supported."
         )
     self.interact_with_snp = interact_with_snp
     self.nGridH2 = nGridH2
     if self.covariates is not None:
         self.lmm = lmm_cov.LMM(X=self.covariates.values,
                                Y=None,
                                G=G,
                                K=self.K,
                                inplace=True)
     else:
         self.lmm = lmm_cov.LMM(X=None, Y=None, G=G, K=self.K, inplace=True)
     self._find_h2()
Пример #2
0
 def __init__(self,
              Y,
              X=None,
              appendbias=False,
              forcefullrank=False,
              G0=None,
              K0=None,
              nullModel=None,
              altModel=None):
     association.varcomp_test.__init__(self,
                                       Y=Y,
                                       X=X,
                                       appendbias=appendbias)
     N = self.Y.shape[0]
     self.forcefullrank = forcefullrank
     self.nullModel = nullModel
     self.altModel = altModel
     self.G0 = G0
     self.K0 = K0
     self.__testGcalled = False
     self.lmm = lmm.LMM(forcefullrank=self.forcefullrank,
                        X=self.X,
                        linreg=None,
                        Y=self.Y[:, np.newaxis],
                        G=self.G0,
                        K=self.K0,
                        regressX=True)
     self.model0 = self.lmm.findH2(
     )  # The null model only has a single kernel and only needs to find h2
     self.model1 = None
Пример #3
0
def est_h2(Y, K, covariates=None, nGridH2=10000, plot=True, verbose=True):
    """
    This function implements the Bayesian heritability estimate from Furlotte et al., 2014

    Furlotte, Nicholas A., David Heckerman, and Christoph Lippert.
    "Quantifying the uncertainty in heritability." Journal of human genetics 59.5 (2014): 269-275.

    Args:
        Y:              [N x 1] np.ndarray of phenotype values
        K:              [N x N] np.ndarray of kinship values
        covariates:     [N x D] np.ndarray of covariate values [default: None]
        plot:           Boolean, create a plot? [default: True]
        verbose:        print results? [default: True]

    returns:
        REML estimate of h^2 (as in Yang et al. 2010)
        posterior mean of h^2
        posterior variance of h^2
        h2 values on a grid
        posterior for h2 values on a grid
    """
    lmm = lmm_cov.LMM(forcefullrank=False,
                      X=covariates,
                      linreg=None,
                      Y=Y,
                      G=None,
                      K=K,
                      regressX=True,
                      inplace=False)
    h2 = lmm.findH2()
    h2_posterior = lmm.posterior_h2(nGridH2=nGridH2)
    logp = -h2_posterior[2]
    grid = h2_posterior[1]
    post_h2 = np.exp(logp -
                     logp.max()) / np.exp(logp -
                                          logp.max()).sum() * logp.shape[0]
    h2_mean = (np.exp(logp - logp.max()) *
               grid[:, np.newaxis]).sum() / np.exp(logp - logp.max()).sum()
    h2_var = (np.exp(logp - logp.max()) * (grid[:, np.newaxis] - h2_mean)**
              2.0).sum() / np.exp(logp - logp.max()).sum()
    if plot:
        import pylab as plt
        plt.figure()
        plt.plot([h2['h2'], h2['h2']], [0, 1], "r")
        plt.plot([h2_mean, h2_mean], [0, 1], "g")
        plt.legend([
            "REML-estimate = %.3f" % h2['h2'],
            "posterior mean = %.3f" % h2_mean
        ])
        plt.plot(grid.flatten(), post_h2.flatten())
        plt.xlabel("$h^2$")
        plt.ylabel("$p( h^2 | Data)$")
    if verbose:
        print("max[h^2] = %.5f  E[h^2] = %.5f +- %.5f" %
              (h2['h2'], h2_mean, np.sqrt(h2_var)))
    return h2, h2_mean, h2_var, grid, post_h2
Пример #4
0
def assoc_scan(Y, X, covariates=None, K=None):
    lmm = lmm_cov.LMM(X=covariates, Y=Y, G=X, K=K)
    # opt = lmm.find_log_delta(X.shape[1], nGridH2=100, REML=False)
    opt = lmm.findH2(nGridH2=100)
    h2 = opt['h2']
    res = lmm.nLLeval(h2=h2,
                      delta=None,
                      dof=None,
                      scale=1.0,
                      penalty=0.0,
                      snps=X)
    chi2stats = res['beta'] * res['beta'] / res['variance_beta']
    p_values = st.chi2.sf(chi2stats, 1)[:, 0]
    return p_values, h2