# loops over genes for gene in genes: print '.. Analyzing gene %s' % gene gene_group = fout.create_group(gene) print ' .. Importing data' try: Xc, info = data.getGenotypes(gene, return_info=True) except: print 'Error: no SNPs found in cis' continue Y = data.getPhenotypes(gene, peer=opt.peer, gauss=True) o = gene_group.create_group('snp_info') smartDumpDictHdf5(info, o) if opt.perm: if opt.seed is not None: sp.random.seed(opt.seed) idxs = sp.random.permutation(Xc.shape[0]) Xc = Xc[idxs, :] if 1: print " .. single trait analysis" lmm = QTL.test_lmm(Xc, Y, K=K) pv = lmm.getPv() RV = {} RV['pv'] = pv RV['qv'] = FDR.qvalues(pv) RV['beta'] = lmm.getBetaSNP()
except: print geneID, 'failed' continue #append the temp table into the big table for key in temp.keys(): smartAppend(table, key, temp[key]) f.close() for key in table.keys(): table[key] = sp.array(table[key]) print '.. correct for multiple testing' table['pv_bonf'][table['pv_bonf'] > 1] = 1. table['qv_all'] = FDR.qvalues(table['pv_bonf']) print 'no eQTLs at FDR 0.10:', (table['qv_all'] < 0.10).sum() print 'no genes:', table['qv_all'].shape[0] fout = h5py.File(out_file, 'w') smartDumpDictHdf5(table, fout) fout.close() else: f = h5py.File(out_file, 'r') R = {} for key in f.keys(): R[key] = f[key][:] f.close() pdb.set_trace()
Mloc = sp.loadtxt(f_geneloc, dtype=object, delimiter='\t') assert (Mloc[1:, 0] == M[1:, 0]).all(), 'gene id do not match' # filtering genes of the following criteria: # 1. on autosomal chromosome # 2. expressed in at least half of the samples # (where expressed means more than 5 counts) chrom = Mloc[1:, 1] Iaut = sp.array([(chrom[i] not in ['M', 'X', 'Y']) for i in range(chrom.shape[0])]) expr = M[1:][:, 1:].astype(float).T Iexpr = ((expr > cut1).mean(0) > cut2) Igene = sp.logical_and(Iaut, Iexpr) # export data RV = {} RV['matrix'] = expr[:, Igene] RV['col_header'] = {} RV['col_header']['gene_ID'] = M[1:, 0][Igene] RV['col_header']['gene_chrom'] = chrom[Igene].astype(float) RV['col_header']['gene_start'] = Mloc[1:, 2][Igene].astype(float) RV['col_header']['gene_end'] = Mloc[1:, 3][Igene].astype(float) RV['row_header'] = {} RV['row_header']['sample_ID'] = M[0, 1:] #pdb.set_trace() fout = h5py.File(out_file, 'w') smartDumpDictHdf5(RV, fout) fout.close()
fout = h5py.File(out_file, 'w') genoGroup = fout.create_group('genotypes') RV = {} n_chroms = 22 for chrom_i in range(1, n_chroms+1): print '.. copying chromosome %d'%chrom_i # load genotype in_file = os.path.join(in_dir, 'chrom%d.h5' % chrom_i) fin = h5py.File(in_file, 'r') chromGroup = genoGroup.create_group('chrom%d' % chrom_i) chromGroup.create_dataset('matrix',data=fin['genotypes']['matrix'][:].T) chromGroup.create_dataset('RRM',data=fin['RRM'][:]) h5py.h5o.copy(fin['genotypes'].id, 'col_headers', chromGroup.id, 'col_headers') h5py.h5o.copy(fin['genotypes'].id, 'row_headers', chromGroup.id, 'row_headers') # summing up Kpop if 'RRM' not in RV.keys(): RV['RRM'] = fin['RRM'][:] else: RV['RRM'] += fin['RRM'][:] fin.close() smartDumpDictHdf5(RV, genoGroup) fout.close()