Exemplo n.º 1
0
def set_snps0(SNPs0,sample_size,i_exclude=None, forcefullrank=False,blocksize=10000):
    '''
    In full rank case, loads up the SNPs in blocks, and construct the kernel.
    In low rank case, loads up all SNPs in to memory
    '''    
    if SNPs0 is None:
        return None, None
    if SNPs0.has_key("K"):
        K0 = SNPs0["K"]
        G0 = None
    elif SNPs0.has_key("data"):
        K0 = None
        G0 = SNPs0["data"]["snps"]
    else:        
        #full rank
        if len(SNPs0["snp_set"]) > sample_size or forcefullrank:# N = Y.shape[0]                      
            SNPs0["K"] = psd.build_kernel_blocked(snpreader=SNPs0["reader"], snp_idx=SNPs0["snp_set"].to_index, blocksize=blocksize,allowlowrank=forcefullrank)
            K0 = SNPs0["K"]
            G0 = None
        else:
            #low rank            
            K0 = None
            SNPs0["data"] = SNPs0["snp_set"].read()
            SNPs0["data"]["snps"] = up.standardize(SNPs0["data"]["snps"])
            G0 = SNPs0["data"]["snps"]

    #lrt_up should never do exclusion, because set_snps0 should only get called once, in run_once, without exclusion
    #exclude. So this is only for score test and lrt. 
    if i_exclude is not None:
        if K0 is not None:
            #Also note in the full rank case with exclusion, for score, one could in principle use low rank updates to make it faster,
            #when the number of excluded SNPs is small: it wold be cubic in num_excluded * num_inner*num_outer iterations, versus now
            #where it is cubic in N in the outer loop only once
            K_up = psd.build_kernel_blocked(snpreader=SNPs0["reader"], snp_idx=np.array(SNPs0["snp_set"].to_index)[i_exclude], blocksize=blocksize,allowlowrank=forcefullrank)
            K0 = K0 - K_up
        elif G0 is not None:
            G0 = G0[:,~i_exclude]                        
        num_snps = SNPs0["num_snps"] - i_exclude.sum()
    else:
        num_snps = SNPs0["num_snps"]
    #intersect data?
        
    #normalize:
    if K0 is not None:
        K0 = K0 / num_snps#K0.diagonal().mean()
    elif G0 is not None:
        G0 = G0 / np.sqrt( num_snps )#(G0*G0).mean() ) # computes the sqrt of the mean of the diagonal of K=GG^T; *  means pointwise multiplication 
    return G0, K0
Exemplo n.º 2
0
def set_snps0(SNPs0,sample_size,i_exclude=None, forcefullrank=False,blocksize=10000):
    '''
    In full rank case, loads up the SNPs in blocks, and construct the kernel.
    In low rank case, loads up all SNPs in to memory
    '''    
    if SNPs0 is None:
        return None, None
    if "K" in SNPs0:
        K0 = SNPs0["K"]
        G0 = None
    elif "data" in SNPs0:
        K0 = None
        G0 = SNPs0["data"]["snps"]
    else:        
        #full rank
        if len(SNPs0["snp_set"]) > sample_size or forcefullrank:# N = Y.shape[0]                      
            SNPs0["K"] = psd.build_kernel_blocked(snpreader=SNPs0["reader"], snp_idx=SNPs0["snp_set"].to_index, blocksize=blocksize,allowlowrank=forcefullrank)
            K0 = SNPs0["K"]
            G0 = None
        else:
            #low rank            
            K0 = None
            SNPs0["data"] = SNPs0["snp_set"].read()
            SNPs0["data"]["snps"] = up.standardize(SNPs0["data"]["snps"])
            G0 = SNPs0["data"]["snps"]

    #lrt_up should never do exclusion, because set_snps0 should only get called once, in run_once, without exclusion
    #exclude. So this is only for score test and lrt. 
    if i_exclude is not None:
        if K0 is not None:
            #Also note in the full rank case with exclusion, for score, one could in principle use low rank updates to make it faster,
            #when the number of excluded SNPs is small: it wold be cubic in num_excluded * num_inner*num_outer iterations, versus now
            #where it is cubic in N in the outer loop only once
            K_up = psd.build_kernel_blocked(snpreader=SNPs0["reader"], snp_idx=np.array(SNPs0["snp_set"].to_index)[i_exclude], blocksize=blocksize,allowlowrank=forcefullrank)
            K0 = K0 - K_up
        elif G0 is not None:
            G0 = G0[:,~i_exclude]                        
        num_snps = SNPs0["num_snps"] - i_exclude.sum()
    else:
        num_snps = SNPs0["num_snps"]
    #intersect data?
        
    #normalize:
    if K0 is not None:
        K0 = K0 / num_snps#K0.diagonal().mean()
    elif G0 is not None:
        G0 = G0 / np.sqrt( num_snps )#(G0*G0).mean() ) # computes the sqrt of the mean of the diagonal of K=GG^T; *  means pointwise multiplication 
    return G0, K0
Exemplo n.º 3
0
def computePC(file, filepath = None, numpc = [5]):
    if filepath is not None:
        fn = os.path.join(filepath,file)
    else:
        fn = file
    if type(numpc) is int or type(numpc) is float:
        numpc = [numpc]
    alt_snpreader = Bed(fn)
    print "computing K"
    K = dist.build_kernel_blocked(fn,alt_snpreader=alt_snpreader)
    print "computing the Eigenvalue decomposition of K"
    [s_all,u_all] = LA.eigh(K)
    s_all=s_all[::-1]
    u_all=u_all[:,::-1]
    for numpcs in numpc:
        #import pdb; pdb.set_trace()
        print "saving %i PCs from %s" %(numpcs,fn)
        
        #import pdb; pdb.set_trace()

        s=s_all[0:numpcs]
        u = u_all[:,0:numpcs]
        outu = np.zeros((u_all.shape[0],numpcs+2),dtype = "|S20")
        outu[:,0:2] = alt_snpreader.original_iids
        outu[:,2::]=u
        fnout = getEigvecs_fn(fn,numpcs)
    
        np.savetxt(fnout,outu,fmt="%s",delimiter = "\t")
        fnout = "%s_pc%i.vals"%(fn,numpcs)
        #outs = np.zeros((s.shape[0],u.shape[1]+2),dtype = "|S20")
        np.savetxt(fnout,s,fmt="%.5f",delimiter = "\t")
    return s_all,u_all
Exemplo n.º 4
0
def computePC(file, filepath=None, numpc=[5]):
    if filepath is not None:
        fn = os.path.join(filepath, file)
    else:
        fn = file
    if type(numpc) is int or type(numpc) is float:
        numpc = [numpc]
    alt_snpreader = Bed(fn)
    print "computing K"
    K = dist.build_kernel_blocked(fn, alt_snpreader=alt_snpreader)
    print "computing the Eigenvalue decomposition of K"
    [s_all, u_all] = LA.eigh(K)
    s_all = s_all[::-1]
    u_all = u_all[:, ::-1]
    for numpcs in numpc:
        #import pdb; pdb.set_trace()
        print "saving %i PCs from %s" % (numpcs, fn)

        #import pdb; pdb.set_trace()

        s = s_all[0:numpcs]
        u = u_all[:, 0:numpcs]
        outu = np.zeros((u_all.shape[0], numpcs + 2), dtype="|S20")
        outu[:, 0:2] = alt_snpreader.original_iids
        outu[:, 2::] = u
        fnout = getEigvecs_fn(fn, numpcs)

        np.savetxt(fnout, outu, fmt="%s", delimiter="\t")
        fnout = "%s_pc%i.vals" % (fn, numpcs)
        #outs = np.zeros((s.shape[0],u.shape[1]+2),dtype = "|S20")
        np.savetxt(fnout, s, fmt="%.5f", delimiter="\t")
    return s_all, u_all