def set_snps0(SNPs0,sample_size,i_exclude=None, forcefullrank=False,blocksize=10000): ''' In full rank case, loads up the SNPs in blocks, and construct the kernel. In low rank case, loads up all SNPs in to memory ''' if SNPs0 is None: return None, None if SNPs0.has_key("K"): K0 = SNPs0["K"] G0 = None elif SNPs0.has_key("data"): K0 = None G0 = SNPs0["data"]["snps"] else: #full rank if len(SNPs0["snp_set"]) > sample_size or forcefullrank:# N = Y.shape[0] SNPs0["K"] = psd.build_kernel_blocked(snpreader=SNPs0["reader"], snp_idx=SNPs0["snp_set"].to_index, blocksize=blocksize,allowlowrank=forcefullrank) K0 = SNPs0["K"] G0 = None else: #low rank K0 = None SNPs0["data"] = SNPs0["snp_set"].read() SNPs0["data"]["snps"] = up.standardize(SNPs0["data"]["snps"]) G0 = SNPs0["data"]["snps"] #lrt_up should never do exclusion, because set_snps0 should only get called once, in run_once, without exclusion #exclude. So this is only for score test and lrt. if i_exclude is not None: if K0 is not None: #Also note in the full rank case with exclusion, for score, one could in principle use low rank updates to make it faster, #when the number of excluded SNPs is small: it wold be cubic in num_excluded * num_inner*num_outer iterations, versus now #where it is cubic in N in the outer loop only once K_up = psd.build_kernel_blocked(snpreader=SNPs0["reader"], snp_idx=np.array(SNPs0["snp_set"].to_index)[i_exclude], blocksize=blocksize,allowlowrank=forcefullrank) K0 = K0 - K_up elif G0 is not None: G0 = G0[:,~i_exclude] num_snps = SNPs0["num_snps"] - i_exclude.sum() else: num_snps = SNPs0["num_snps"] #intersect data? #normalize: if K0 is not None: K0 = K0 / num_snps#K0.diagonal().mean() elif G0 is not None: G0 = G0 / np.sqrt( num_snps )#(G0*G0).mean() ) # computes the sqrt of the mean of the diagonal of K=GG^T; * means pointwise multiplication return G0, K0
def set_snps0(SNPs0,sample_size,i_exclude=None, forcefullrank=False,blocksize=10000): ''' In full rank case, loads up the SNPs in blocks, and construct the kernel. In low rank case, loads up all SNPs in to memory ''' if SNPs0 is None: return None, None if "K" in SNPs0: K0 = SNPs0["K"] G0 = None elif "data" in SNPs0: K0 = None G0 = SNPs0["data"]["snps"] else: #full rank if len(SNPs0["snp_set"]) > sample_size or forcefullrank:# N = Y.shape[0] SNPs0["K"] = psd.build_kernel_blocked(snpreader=SNPs0["reader"], snp_idx=SNPs0["snp_set"].to_index, blocksize=blocksize,allowlowrank=forcefullrank) K0 = SNPs0["K"] G0 = None else: #low rank K0 = None SNPs0["data"] = SNPs0["snp_set"].read() SNPs0["data"]["snps"] = up.standardize(SNPs0["data"]["snps"]) G0 = SNPs0["data"]["snps"] #lrt_up should never do exclusion, because set_snps0 should only get called once, in run_once, without exclusion #exclude. So this is only for score test and lrt. if i_exclude is not None: if K0 is not None: #Also note in the full rank case with exclusion, for score, one could in principle use low rank updates to make it faster, #when the number of excluded SNPs is small: it wold be cubic in num_excluded * num_inner*num_outer iterations, versus now #where it is cubic in N in the outer loop only once K_up = psd.build_kernel_blocked(snpreader=SNPs0["reader"], snp_idx=np.array(SNPs0["snp_set"].to_index)[i_exclude], blocksize=blocksize,allowlowrank=forcefullrank) K0 = K0 - K_up elif G0 is not None: G0 = G0[:,~i_exclude] num_snps = SNPs0["num_snps"] - i_exclude.sum() else: num_snps = SNPs0["num_snps"] #intersect data? #normalize: if K0 is not None: K0 = K0 / num_snps#K0.diagonal().mean() elif G0 is not None: G0 = G0 / np.sqrt( num_snps )#(G0*G0).mean() ) # computes the sqrt of the mean of the diagonal of K=GG^T; * means pointwise multiplication return G0, K0
def computePC(file, filepath = None, numpc = [5]): if filepath is not None: fn = os.path.join(filepath,file) else: fn = file if type(numpc) is int or type(numpc) is float: numpc = [numpc] alt_snpreader = Bed(fn) print "computing K" K = dist.build_kernel_blocked(fn,alt_snpreader=alt_snpreader) print "computing the Eigenvalue decomposition of K" [s_all,u_all] = LA.eigh(K) s_all=s_all[::-1] u_all=u_all[:,::-1] for numpcs in numpc: #import pdb; pdb.set_trace() print "saving %i PCs from %s" %(numpcs,fn) #import pdb; pdb.set_trace() s=s_all[0:numpcs] u = u_all[:,0:numpcs] outu = np.zeros((u_all.shape[0],numpcs+2),dtype = "|S20") outu[:,0:2] = alt_snpreader.original_iids outu[:,2::]=u fnout = getEigvecs_fn(fn,numpcs) np.savetxt(fnout,outu,fmt="%s",delimiter = "\t") fnout = "%s_pc%i.vals"%(fn,numpcs) #outs = np.zeros((s.shape[0],u.shape[1]+2),dtype = "|S20") np.savetxt(fnout,s,fmt="%.5f",delimiter = "\t") return s_all,u_all
def computePC(file, filepath=None, numpc=[5]): if filepath is not None: fn = os.path.join(filepath, file) else: fn = file if type(numpc) is int or type(numpc) is float: numpc = [numpc] alt_snpreader = Bed(fn) print "computing K" K = dist.build_kernel_blocked(fn, alt_snpreader=alt_snpreader) print "computing the Eigenvalue decomposition of K" [s_all, u_all] = LA.eigh(K) s_all = s_all[::-1] u_all = u_all[:, ::-1] for numpcs in numpc: #import pdb; pdb.set_trace() print "saving %i PCs from %s" % (numpcs, fn) #import pdb; pdb.set_trace() s = s_all[0:numpcs] u = u_all[:, 0:numpcs] outu = np.zeros((u_all.shape[0], numpcs + 2), dtype="|S20") outu[:, 0:2] = alt_snpreader.original_iids outu[:, 2::] = u fnout = getEigvecs_fn(fn, numpcs) np.savetxt(fnout, outu, fmt="%s", delimiter="\t") fnout = "%s_pc%i.vals" % (fn, numpcs) #outs = np.zeros((s.shape[0],u.shape[1]+2),dtype = "|S20") np.savetxt(fnout, s, fmt="%.5f", delimiter="\t") return s_all, u_all