def find_vstructures(bfile, pfile, gfile, cisfile, assoc0file, window, vfile, startTraitIdx, nTraits): """ running association scan input: pfile : phenotype file cfile : covariance file ffile : fixed effects file cisfile : file containing cis anchors assoc0file : file containing the results from the initial association scan vfile : file containing v-structures """ preader = phenoReader.PhenoReaderFile(pfile) greader = bedReader.BedReader(bfile) model = gnetlmm.GNetLMM(preader, greader, window=window) genecorr_reader = reader.FileReader(gfile + '.pv') model.set_genecorr_reader(genecorr_reader) assoc0Reader = reader.FileReader(assoc0file + '.pv') model.set_assoc0_reader(assoc0Reader) model.load_cis_anchors(cisfile) model.find_vstructures(startTraitIdx, nTraits) model.save_vstructures(vfile + '.csv')
def gene_has_anchor(bfile, pfile, assoc0, anchor_thresh, anchorfile, window, cis=True): """ tests if a gene has a cis anchor input: bfile : binary bed file (bfile.bed, bfile.bim and bfile.fam are required) pfile : phenotype file assoc0file : basefilename of initial association scan anchor_thresh : thrshold for anchor-associations anchor_file : filename for saving cis assocaitions window : maximal distance between cis-snp and gene cis : if set, look for cis-associations only (default: True) """ preader = phenoReaderFile.PhenoReaderFile(pfile) greader = bedReader.BedReader(bfile) model = gnetlmm.GNetLMM(preader, greader, window=window) assoc0Reader = reader.FileReader(assoc0 + '.pv') model.set_assoc0_reader(assoc0Reader) model.gene_has_anchor(anchor_thresh, cis) model.save_anchors(anchorfile)
def scan(bfile, pfile, cfile, ffile, vfile, assocfile, startTraitIdx, nTraits): """ running association scan input: bfile : basefilename of plink file pfile : phenotype file cfile : covariance file ffile : fixed effects file vfile : file containing vstructures assocfile : file for saving results """ K = None if cfile is not None: K = np.loadtxt(cfile) Covs = None if ffile is not None: Covs = np.loadtxt(ffile) preader = phenoReader.PhenoReaderFile(pfile) greader = bedReader.BedReader(bfile) model = gnetlmm.GNetLMM(preader, greader, Covs=Covs, K=K) model.load_vstructures(vfile + ".csv") model.update_associations(startTraitIdx, nTraits) model.save_updates(assocfile)
def merge_assoc0_scan(assoc0file, nSnps, bfile): """ merging associations files input: assoc0 : basename of assoc0 results nSnps : number of SNPs in each block bfile : basename of bed file """ greader = bedReader.BedReader(bfile) F = greader.get_nrows() fn_beta = [] fn_pv = [] f = 0 while f < F: _fn = assoc0file + '.startSnp_%d' % f _fn_beta = '%s.beta.matrix' % _fn _fn_pv = '%s.pv.matrix' % _fn assert os.path.exists(_fn_beta), 'File %s is missing' % (_fn_beta) assert os.path.exists(_fn_pv), 'File %s is missing' % (_fn_pv) fn_beta.append(_fn_beta) fn_pv.append(_fn_pv) f += nSnps merge_files(fn_beta, assoc0file + '.beta.matrix') merge_files(fn_pv, assoc0file + '.pv.matrix')
def __init__(self, bfile, dist_min=100): """ initializes object input: bfile : basefilename of plink file dist_causal_snps : minimal distance between two causal SNPs """ self.dist_min = dist_min self.bfile = bfile self.genoreader = bedReader.BedReader(self.bfile) self.chrom = self.genoreader.getSnpChrom() self.pos = self.genoreader.getSnpPos() self.rs = self.genoreader.getSnpIds() self.N = self.genoreader.fam.shape[0] self.F = self.chrom.shape[0]
def find_vstructures(bfile, pfile, gfile, anchorfile, assoc0file, window, vfile, startTraitIdx, nTraits, corr_thresh, ind_thresh, max_genes): """ running association scan input: bfile : blink file pfile : phenotype file gfile : correlation file anchorfile : file containing anchors assoc0file : file containing the results from the initial association scan window : window size vfile : file containing v-structures startTraitIdx : index of first trait to be analyzed nTraits : number of traits to be analyzed corr_thresh : q-value for calling a correlation significant ind_thresh : q-value for calling a correlation not significant max_genes : maximal number of genes in the conditioning set """ preader = phenoReaderFile.PhenoReaderFile(pfile) greader = bedReader.BedReader(bfile) model = gnetlmm.GNetLMM(preader, greader, window=window, thresh_corr=corr_thresh, thresh_ind=ind_thresh) genecorr_reader = reader.FileReader(gfile + '.pv') model.set_genecorr_reader(genecorr_reader) assoc0Reader = reader.FileReader(assoc0file + '.pv') model.set_assoc0_reader(assoc0Reader) model.load_anchors(anchorfile) model.find_vstructures(startTraitIdx, nTraits, max_genes) if np.isfinite(nTraits): vfile += ".startTrait_%d" % startTraitIdx model.save_vstructures(vfile + '.csv')
def plot_power(bfile, pfile, assoc0file, assocfile, plotfile, window): # reading out p-values score = {} assoc0Reader = reader.FileReader(assoc0file + '.pv') score['LMM'] = -np.log10(assoc0Reader.getMatrix()) assocReader = reader.FileReader(assocfile + '.pv') score['GNetLMM'] = -np.log10(assocReader.getMatrix()) # get network Agene = np.loadtxt(pfile + '.Agene') # gene info preader = phenoReader.PhenoReaderFile(pfile) gene_start = preader.getGeneStart() gene_end = preader.getGeneEnd() gene_chrom = preader.getGeneChrom() # snp info breader = bedReader.BedReader(bfile) snp_pos = breader.getSnpPos() snp_chrom = breader.getSnpChrom() P_cis, P_trans = get_groundtruth(Agene, snp_pos, snp_chrom, gene_start, gene_chrom, window) # delete cis-associations for key in score.keys(): score[key][P_cis] = 0 # compute receiver operator characteristics FPR = {} TPR = {} for key in score.keys(): FPR[key], TPR[key] = roc.roc(P_trans, score[key]) # plotting results plotting.plotROCcurve(['LMM', 'GNetLMM'], TPR, FPR, xlim=(0, 0.05), ylim=(0, 0.42), fn=plotfile)
def marginal_genecorr(bfile, pfile, gfile): """ running marginal gene-gene correlations Input: bfile : binary bed file (bfile.bed, bfile.bim and bfile.fam are required) pfile : phenotype file cfile : covariance matrix file ffile : covariates file gfile : basename of output file """ preader = phenoReader.PhenoReaderFile(pfile) greader = bedReader.BedReader(bfile) model = gnetlmm.GNetLMM(preader, greader) corr, pv = model.marginal_gene_correlations() write = writer.Writer(gfile + '.pv') write.writeMatrix(pv, fmt='%.4e') write = writer.Writer(gfile + '.corr') write.writeMatrix(corr, fmt='%.4f')
def initial_scan(bfile, pfile, cfile, ffile, assoc0file, startSnpIdx=0, nSnps=np.inf, memory_efficient=False): """ running initial scan using a standard linear mixed model Input: bfile : binary bed file (bfile.bed, bfile.bim and bfile.fam are required) pfile : phenotype file cfile : covariance matrix file ffile : covariates file assoc0file : basename of output file """ K = None if cfile is not None: K = np.loadtxt(cfile) Covs = None if ffile is not None: Covs = np.loadtxt(ffile) if np.isfinite(nSnps): assoc0file += ".startSnp_%d" % startSnpIdx preader = phenoReaderFile.PhenoReaderFile(pfile) greader = bedReader.BedReader(bfile) model = gnetlmm.GNetLMM(preader, greader, K=K, Covs=Covs) beta0, pv0 = model.initial_scan(startSnpIdx, nSnps, memory_efficient) write = writer.Writer(assoc0file + '.pv') write.writeMatrix(pv0, fmt='%.4e') write = writer.Writer(assoc0file + '.beta') write.writeMatrix(beta0, fmt='%.4f')
def create_nice_output(vfile, bfile, pfile, assoc0file, assocfile, blockfile, outfile): """ creating human readbable output file """ preader = phenoReaderFile.PhenoReaderFile(pfile) greader = bedReader.BedReader(bfile) snp_ids = greader.getSnpIds() gene_ids = preader.getGeneIds() vstruct = vstructures.VstructureFile(vfile + '.csv') assoc0Reader = reader.FileReader(assoc0file + '.pv') assocReader = assoc_results.AssocResultsList() assocReader.load_csv(assocfile + '.csv') blockReader = None if blockfile is not None: blockReader = assoc_results.AssocResultsList() blockReader.load_csv(blockfile + '.csv') f = open(outfile + '.csv','w') if blockReader is None: header = "Anchor Snp\t Anchor Gene\t Focal Gene\t Orthogonal Genes\t pv(LMM) \t pv(GNetLMM)" else: header = "Anchor Snp\t Anchor Gene\t Focal Gene\t Orthogonal Genes\t pv(LMM) \t pv(GNetLMM) \t pv(BlockLMM)" if assocReader.var_snps is not None: header += '\t varSnp(GnetLMM)' if assocReader.var_covs is not None: header += '\t varCovs(GnetLMM)' if assocReader.var_genes is not None: header += '\t varGenes(GnetLMM)' if blockReader is not None: if blockReader.var_snps is not None: header += '\t varSnp(BlockLMM)' if blockReader.var_covs is not None: header += '\t varCovs(BlockLMM)' if blockReader.var_genes is not None: header += '\t varGenes(BlockLMM)' header += "\n" f.write(header) for idx_focal_gene, idx_anchor_snp, idx_orth_gene, idx_anchor_gene in vstruct.iterator(full=True): focal_gene = ",".join(gene_ids[idx_focal_gene]) orth_gene = ",".join(gene_ids[idx_orth_gene]) anchor_snps = snp_ids[idx_anchor_snp] for i in range(len(anchor_snps)): try: anchor_gene = gene_ids[idx_anchor_gene[i]] except: anchor_gene = "" pv_lmm = assoc0Reader.getRows([idx_anchor_snp[i]])[:,idx_focal_gene][0,0] idx_gnet = np.logical_and(assocReader.focal_gene==idx_focal_gene, assocReader.snp_anchor==idx_anchor_snp[i]) if blockReader is not None: idx_block = np.logical_and(blockReader.focal_gene==idx_focal_gene, blockReader.snp_anchor==idx_anchor_snp[i]) if blockReader is None: line = "%s\t%s\t%s\t%s\t%.4e\t%.4e"%(anchor_snps[i],anchor_gene,focal_gene, orth_gene, pv_lmm, assocReader.pv[idx_gnet]) else: line = "%s\t%s\t%s\t%s\t%.4e\t%.4e\t%.4e"%(anchor_snps[i],anchor_gene,focal_gene, orth_gene, pv_lmm, assocReader.pv[idx_gnet], blockReader.pv[idx_block]) if assocReader.var_snps is not None: line += '\t%.4e'%assocReader.var_snps[idx_gnet] if assocReader.var_covs is not None: line += '\t%.4e'%assocReader.var_covs[idx_gnet] if assocReader.var_genes is not None: line += '\t%.4e'%assocReader.var_genes[idx_gnet] if blockReader is not None: if blockReader.var_snps is not None: line += '\t%.4e'%blockReader.var_snps[idx_block] if blockReader.var_covs is not None: line += '\t%.4e'%blockReader.var_covs[idx_block] if blockReader.var_genes is not None: line += '\t%.4e'%blockReader.var_genes[idx_block] line += "\n" f.write(line) f.close()