Exemplo n.º 1
0
def fitTwo(y, K1, K2, X0=None, wgrids=100):
    '''
	 Simple function to fit a model with two variance components.
	 It works by running the standard pylmm algorithm in a loop
	 where at each iteration of the loop a new kinship is generated
	 as a linear combination of the original two.
      '''

    # Create a uniform grid
    W = np.array(range(wgrids)) / float(wgrids)
    Res = []
    LLs = []

    for w in W:
        # heritability will be estimated for linear combo of kinships
        K = w * K1 + (1.0 - w) * K2
        sys.stderr.write("Fitting weight %0.2f\n" % (w))
        L = LMM(y, K, X0=X0)
        R = L.fit()
        Res.append(R)
        LLs.append(R[-1])

        del K

    L = np.array(LLs)
    i = np.where(L == L.max())[0]
    if len(i) > 1:
        sys.stderr.write("WARNING: Found multiple maxes using first one\n")

    i = i[0]
    hmax, beta, sigma, LL = Res[i]
    w = W[i]

    h1 = w * hmax
    h2 = (1.0 - w) * hmax
    e = (1.0 - hmax)

    return h1, h2, e, beta, sigma, LL
Exemplo n.º 2
0
def fitTwo(y,K1,K2,X0=None,wgrids=100):
      '''
	 Simple function to fit a model with two variance components.
	 It works by running the standard pylmm algorithm in a loop
	 where at each iteration of the loop a new kinship is generated
	 as a linear combination of the original two.
      '''

      # Create a uniform grid
      W = np.array(range(wgrids)) / float(wgrids)
      Res = []
      LLs = []

      for w in W:
	 # heritability will be estimated for linear combo of kinships
	 K = w*K1 + (1.0 - w)*K2
	 sys.stderr.write("Fitting weight %0.2f\n" % (w))
	 L = LMM(y,K,X0=X0)
	 R = L.fit()
	 Res.append(R)
	 LLs.append(R[-1])
      
	 del K

      L = np.array(LLs)
      i = np.where(L == L.max())[0]
      if len(i) > 1:
	 sys.stderr.write("WARNING: Found multiple maxes using first one\n")

      i = i[0]
      hmax,beta,sigma,LL = Res[i]
      w = W[i]

      h1 = w * hmax 
      h2 = (1.0 - w) * hmax 
      e = (1.0 - hmax) 

      return h1,h2,e,beta,sigma,LL
Exemplo n.º 3
0
def compute_snp(collect):
   snp = collect[0]
   id = collect[1]
   # result = []
   # Check SNPs for missing values
   x = snp[keep].reshape((n,1))  # all the SNPs
   v = np.isnan(x).reshape((-1,))
   if v.sum():
      keeps = True - v
      xs = x[keeps,:]
      if keeps.sum() <= 1 or xs.var() <= 1e-6: 
         # PS.append(np.nan)
         # TS.append(np.nan)
         # result.append(formatResult(id,np.nan,np.nan,np.nan,np.nan))
         # continue
         return formatResult(id,np.nan,np.nan,np.nan,np.nan)

      # Its ok to center the genotype -  I used options.normalizeGenotype to 
      # force the removal of missing genotypes as opposed to replacing them with MAF.
      if not options.normalizeGenotype:
         xs = (xs - xs.mean()) / np.sqrt(xs.var())
      Ys = Y[keeps]
      X0s = X0[keeps,:]
      Ks = K[keeps,:][:,keeps]
      if options.kfile2:
         K2s = K2[keeps,:][:,keeps]
         Ls = LMM_withK2(Ys,Ks,X0=X0s,verbose=options.verbose,K2=K2s)
      else:
         Ls = LMM(Ys,Ks,X0=X0s,verbose=options.verbose)
      if options.refit:
        Ls.fit(X=xs,REML=options.REML)
      else:
         #try:
         Ls.fit(REML=options.REML)
         #except: pdb.set_trace()
      ts,ps,beta,betaVar = Ls.association(xs,REML=options.REML,returnBeta=True)
   else:
      if x.var() == 0: 
         # PS.append(np.nan)
         # TS.append(np.nan)
         # result.append(formatResult(id,np.nan,np.nan,np.nan,np.nan)) # writes nan values
         return formatResult(id,np.nan,np.nan,np.nan,np.nan)
         # continue

      if options.refit:
         L.fit(X=x,REML=options.REML)
      # This is where it happens
      ts,ps,beta,betaVar = L.association(x,REML=options.REML,returnBeta=True)
   
   return formatResult(id,beta,np.sqrt(betaVar).sum(),ts,ps)
Exemplo n.º 4
0
    def __init__(self, Y, K, Kva=[], Kve=[], norm=True, X0=None, verbose=True):

        # We are assuming that X0 is the same for each phenotype

        self.verbose = verbose

        if len(Kva) == 0 or len(Kve) == 0: Kva, Kve = self.getEigen(K)
        self.K = K
        self.Kva = Kva
        self.Kve = Kve
        if sum(self.Kva <= 0):
            sys.stderr.write("Cleaning %d eigen values\n" %
                             (sum(self.Kva < 0)))
            self.Kva[self.Kva <= 0] = 1e-6

        self.Y = self.cleanPhenos(Y)
        if norm: self.Y = self.normPhenos(self.Y)
        self.N = Y.shape[0]
        self.M = Y.shape[1]
        if X0 == None: X0 = self._getDefaultX0()
        self.X0 = X0

        self.LMMs = []
        for i in range(self.M):
            self.LMMs.append(
                LMM(Y[:, i], self.K, self.Kva, self.Kve, X0=self.X0))
            # Fitting under the NULL where the SNP has no effect
            self.LMMs[i].fit()

        self._cacheLLStuff()

        self.mxCor = None
        self.R = None

        self.gcors = None
        self.ecors = None
        self.ngrids = 100
        self._setDefaultGcorsandEcors(self.ngrids)
Exemplo n.º 5
0
def compute_snp(j, snp_ids, q=None):
    # print(j,len(snp_ids),"\n")
    result = []
    for snp_id in snp_ids:
        # j,snp_id = collect
        snp, id = snp_id
        # id = collect[1]
        # result = []
        # Check SNPs for missing values
        x = snp[keep].reshape((n, 1))  # all the SNPs
        v = np.isnan(x).reshape((-1, ))
        if v.sum():
            # NOTE: this code appears to be unreachable!
            if options.verbose:
                sys.stderr.write("Found missing values in " + str(x))
            keeps = True - v
            xs = x[keeps, :]
            if keeps.sum() <= 1 or xs.var() <= 1e-6:
                # PS.append(np.nan)
                # TS.append(np.nan)
                # result.append(formatResult(id,np.nan,np.nan,np.nan,np.nan))
                # continue
                result.append(formatResult(id, np.nan, np.nan, np.nan, np.nan))
                continue

            # Its ok to center the genotype -  I used options.normalizeGenotype to
            # force the removal of missing genotypes as opposed to replacing them with MAF.
            if not options.normalizeGenotype:
                xs = (xs - xs.mean()) / np.sqrt(xs.var())
            Ys = Y[keeps]
            X0s = X0[keeps, :]
            Ks = K[keeps, :][:, keeps]
            if options.kfile2:
                K2s = K2[keeps, :][:, keeps]
                Ls = LMM_withK2(Ys,
                                Ks,
                                X0=X0s,
                                verbose=options.verbose,
                                K2=K2s)
            else:
                Ls = LMM(Ys, Ks, X0=X0s, verbose=options.verbose)
            if options.refit:
                Ls.fit(X=xs, REML=options.REML)
            else:
                #try:
                Ls.fit(REML=options.REML)
                #except: pdb.set_trace()
            ts, ps, beta, betaVar = Ls.association(xs,
                                                   REML=options.REML,
                                                   returnBeta=True)
        else:
            if x.var() == 0:
                # Note: this code appears to be unreachable!

                # PS.append(np.nan)
                # TS.append(np.nan)
                # result.append(formatResult(id,np.nan,np.nan,np.nan,np.nan)) # writes nan values
                result.append(formatResult(id, np.nan, np.nan, np.nan, np.nan))
                continue

            if options.refit:
                L.fit(X=x, REML=options.REML)
            # This is where it happens
            ts, ps, beta, betaVar = L.association(x,
                                                  REML=options.REML,
                                                  returnBeta=True)
        result.append(formatResult(id, beta, np.sqrt(betaVar).sum(), ts, ps))
        # compute_snp.q.put([j,formatResult(id,beta,np.sqrt(betaVar).sum(),ts,ps)])
    # print [j,result[0]]," in result queue\n"
    if not q:
        q = compute_snp.q
    q.put([j, result])
    return j
Exemplo n.º 6
0
    Kve = []

# Only load the decomposition if we did not remove individuals.
# Otherwise it would not be correct and we would have to compute it again.
if not v.sum() and options.eigenfile:
    if options.verbose:
        sys.stderr.write("Loading pre-computed eigendecomposition...\n")
    Kva = np.load(options.eigenfile + ".Kva")
    Kve = np.load(options.eigenfile + ".Kve")
else:
    Kva = []
    Kve = []

# CREATE LMM object for association
n = K.shape[0]
if not options.kfile2: L = LMM(Y, K, Kva, Kve, X0, verbose=options.verbose)
else: L = LMM_withK2(Y, K, Kva, Kve, X0, verbose=options.verbose, K2=K2)

# Fit the null model -- if refit is true we will refit for each SNP, so no reason to run here
if not options.refit:
    if options.verbose: sys.stderr.write("Computing fit for null model\n")
    L.fit()
    if options.verbose and not options.kfile2:
        sys.stderr.write("\t heritability=%0.3f, sigma=%0.3f\n" %
                         (L.optH, L.optSigma))
    if options.verbose and options.kfile2:
        sys.stderr.write("\t heritability=%0.3f, sigma=%0.3f, w=%0.3f\n" %
                         (L.optH, L.optSigma, L.optW))


def compute_snp(j, snp_ids, q=None):
Exemplo n.º 7
0
   Kva = []
   Kve = []

# Only load the decomposition if we did not remove individuals.
# Otherwise it would not be correct and we would have to compute it again.
if not v.sum() and options.eigenfile:
   if options.verbose: sys.stderr.write("Loading pre-computed eigendecomposition...\n")
   Kva = np.load(options.eigenfile + ".Kva")
   Kve = np.load(options.eigenfile + ".Kve")
else: 
   Kva = []
   Kve = []

# CREATE LMM object for association
n = K.shape[0]
if not options.kfile2:  L = LMM(Y,K,Kva,Kve,X0,verbose=options.verbose)
else:  L = LMM_withK2(Y,K,Kva,Kve,X0,verbose=options.verbose,K2=K2)

# Fit the null model -- if refit is true we will refit for each SNP, so no reason to run here
if not options.refit: 
   if options.verbose: sys.stderr.write("Computing fit for null model\n")
   L.fit()
   if options.verbose and not options.kfile2: sys.stderr.write("\t heritability=%0.3f, sigma=%0.3f\n" % (L.optH,L.optSigma))
   if options.verbose and options.kfile2: sys.stderr.write("\t heritability=%0.3f, sigma=%0.3f, w=%0.3f\n" % (L.optH,L.optSigma,L.optW))

# Buffers for pvalues and t-stats
PS = []
TS = []
count = 0
out = open(outFile,'w')
printOutHead()
Exemplo n.º 8
0
def compute_snp(j,snp_ids,q = None):
   # print(j,len(snp_ids),"\n")
   result = []
   for snp_id in snp_ids:
      # j,snp_id = collect
      snp,id = snp_id
      # id = collect[1]
      # result = []
      # Check SNPs for missing values
      x = snp[keep].reshape((n,1))  # all the SNPs
      v = np.isnan(x).reshape((-1,))
      if v.sum():
         # NOTE: this code appears to be unreachable!
         if options.verbose:
            sys.stderr.write("Found missing values in "+str(x))
         keeps = True - v
         xs = x[keeps,:]
         if keeps.sum() <= 1 or xs.var() <= 1e-6: 
            # PS.append(np.nan)
            # TS.append(np.nan)
            # result.append(formatResult(id,np.nan,np.nan,np.nan,np.nan))
            # continue
            result.append(formatResult(id,np.nan,np.nan,np.nan,np.nan))
            continue

         # Its ok to center the genotype -  I used options.normalizeGenotype to 
         # force the removal of missing genotypes as opposed to replacing them with MAF.
         if not options.normalizeGenotype:
            xs = (xs - xs.mean()) / np.sqrt(xs.var())
         Ys = Y[keeps]
         X0s = X0[keeps,:]
         Ks = K[keeps,:][:,keeps]
         if options.kfile2:
            K2s = K2[keeps,:][:,keeps]
            Ls = LMM_withK2(Ys,Ks,X0=X0s,verbose=options.verbose,K2=K2s)
         else:
            Ls = LMM(Ys,Ks,X0=X0s,verbose=options.verbose)
         if options.refit:
           Ls.fit(X=xs,REML=options.REML)
         else:
            #try:
            Ls.fit(REML=options.REML)
            #except: pdb.set_trace()
         ts,ps,beta,betaVar = Ls.association(xs,REML=options.REML,returnBeta=True)
      else:
         if x.var() == 0:
            # Note: this code appears to be unreachable!

            # PS.append(np.nan)
            # TS.append(np.nan)
            # result.append(formatResult(id,np.nan,np.nan,np.nan,np.nan)) # writes nan values
            result.append(formatResult(id,np.nan,np.nan,np.nan,np.nan))
            continue

         if options.refit:
            L.fit(X=x,REML=options.REML)
         # This is where it happens
         ts,ps,beta,betaVar = L.association(x,REML=options.REML,returnBeta=True)
      result.append(formatResult(id,beta,np.sqrt(betaVar).sum(),ts,ps))
      # compute_snp.q.put([j,formatResult(id,beta,np.sqrt(betaVar).sum(),ts,ps)])
   # print [j,result[0]]," in result queue\n"
   if not q:
      q = compute_snp.q
   q.put([j,result])
   return j
Exemplo n.º 9
0
   Kva = []
   Kve = []

# Only load the decomposition if we did not remove individuals.
# Otherwise it would not be correct and we would have to compute it again.
if not v.sum() and options.eigenfile:
   if options.verbose: sys.stderr.write("Loading pre-computed eigendecomposition...\n")
   Kva = np.load(options.eigenfile + ".Kva")
   Kve = np.load(options.eigenfile + ".Kve")
else: 
   Kva = []
   Kve = []

# CREATE LMM object for association
n = K.shape[0]
if not options.kfile2:  L = LMM(Y,K,Kva,Kve,X0,verbose=options.verbose)
else:  L = LMM_withK2(Y,K,Kva,Kve,X0,verbose=options.verbose,K2=K2)

# Fit the null model -- if refit is true we will refit for each SNP, so no reason to run here
if not options.refit: 
   if options.verbose: sys.stderr.write("Computing fit for null model\n")
   L.fit()
   if options.verbose and not options.kfile2: sys.stderr.write("\t heritability=%0.3f, sigma=%0.3f\n" % (L.optH,L.optSigma))
   if options.verbose and options.kfile2: sys.stderr.write("\t heritability=%0.3f, sigma=%0.3f, w=%0.3f\n" % (L.optH,L.optSigma,L.optW))

def compute_snp(j,snp_ids,q = None):
   # print(j,len(snp_ids),"\n")
   result = []
   for snp_id in snp_ids:
      # j,snp_id = collect
      snp,id = snp_id
Exemplo n.º 10
0
           Kva = []
           Kve = []

# Only load the decomposition if we did not remove individuals.
# Otherwise it would not be correct and we would have to compute it again.
        if not v.sum() and options.eigenfile:
           if options.verbose: sys.stderr.write("Loading pre-computed eigendecomposition...\n")
           Kva = np.load(options.eigenfile + ".Kva")
           Kve = np.load(options.eigenfile + ".Kve")
        else:
           Kva = []
           Kve = []

# CREATE LMM object for association
        n = K.shape[0]
        if not options.kfile2:  L = LMM(Y,K,Kva,Kve,X0,verbose=options.verbose)
        else:  L = LMM_withK2(Y,K,Kva,Kve,X0,verbose=options.verbose,K2=K2)
# Fit the null model -- if refit is true we will refit for each SNP, so no reason to run here
        #out = open(outFile,'w') ### Joo Change start
        if not options.refit:
           if options.verbose: sys.stderr.write("Computing fit for null model\n")
           L.fit()
           if options.verbose and not options.kfile2:
                sys.stderr.write("\t heritability=%0.3f, sigma=%0.3f\n" % (L.optH,L.optSigma))
                #out.write("%0.5f\t%0.5f\t%0.5f\n" % (L.optH, L.optH, (1-L.optH)))
                out.write("%0.5f\t%0.5f\t%0.5f\n" % (L.optH, L.optH*L.optSigma, L.optSigma*(1-L.optH)))
                #out.write("heritability=%0.5f, sigma=%0.5f\n" % (L.optH,L.optSigma))
                #out.write("varG=%0.5f, varE=%0.5f\n" %(L.optH*L.optSigma, L.optSigma*(1-L.optH)))
           if options.verbose and options.kfile2:
                sys.stderr.write("\t heritability=%0.3f, sigma=%0.3f, w=%0.3f\n" % (L.optH,L.optSigma,L.optW))
                #out.write("%0.5f\t%0.5f\t%0.5f\n" % (L.optH, L.optH, (1-L.optH)))