def simultaneousPermutationWithMultipleSignificanceLevels( significance_levels, n, categories, testset_size, iterations): print "Running permutation test on %d categories with test set size of %d drawn from %d items for %d iterations" % ( len(categories), testset_size, n, iterations) pbar = ProgressBar() pbar.setMaximum(iterations) pbar.updateProgress(0) total_sig_hypotheses = [0] * len(significance_levels) num_sig_levels = len(significance_levels) sampleset = xrange(0, n) for i in xrange(0, iterations): if i % 1 == 0: pbar.updateProgress(i) test_set = set(random.sample(sampleset, testset_size)) for j, s_c in enumerate(categories): pval = computeOverlapPval(s_c, test_set, n) for k in xrange(0, num_sig_levels): if pval <= significance_levels[k]: total_sig_hypotheses[k] += 1 pbar.finalize() return [ float(total_sig) / float(iterations) for total_sig in total_sig_hypotheses ]
def computeTraitStatistics(genes, pfilter): traitSet = set([key for key in gwasDB.__studyByTrait if len(gwasDB.getGenesForTrait(key)) > 0]) traitChi = {} pbar = ProgressBar() pbar.setMaximum(len(traitSet)) pbar.updateProgress(0) i=0 for trait in traitSet: if i % 5 == 0: pbar.updateProgress(0) i+=1 traitGenes = gwasDB.getGenesForTrait(trait,pfilter) listA = traitGenes & genes listC = traitGenes - genes a = len(listA) b = len(genes - traitGenes) c = len(listC) d = len(geneDB.__approved_symbols - (traitGenes | genes)) oddsratio = geneUtils.oddsRatio(a,b,c,d) kappa = geneUtils.kappaStatistic(a,b,c,d) fisher_exact = fisher.compute(a,b,c,d) fisher_p = fisher.significance(fisher_exact, a,b,c,d) traitChi[trait] = (a, oddsratio, kappa, len(traitGenes), fisher_exact, fisher_p, traitGenes) pbar.finalize() return traitChi
def createGeneListingsHTML(geneListDir): pbar = ProgressBar() pbar.setMaximum(len(gwasDB.__geneSet)) pbar.updateProgress(0) i=0 for gene in gwasDB.__geneSet: if i % 10 == 0: pbar.updateProgress(i) i+=1 genePage = htmltools.createPage("Gene Summary: " + geneDB.__original_names[gene], css_file='../genereport.css', scripts = {'../sorttable.js':'javascript'}) # Create the disease trait tables traits = [] for trait in gwasDB.__traitDict[gene]: traits.append(trait) traits = sorted(traits, key=lambda trait: -__traitMetaAnalysis[trait]['RE_chi'][0]) traitTable = [] for trait in traits: cnt = len(__traitMetaAnalysis[trait]['RE']) oddsratio = __traitMetaAnalysis[trait]['RE_chi'][4] kappa = __traitMetaAnalysis[trait]['RE_chi'][5] fisher_exact = __traitMetaAnalysis[trait]['RE_chi'][6] fisherp = __traitMetaAnalysis[trait]['RE_chi'][7] numgenes = __traitMetaAnalysis[trait]['geneset_size'] translate = trait.replace(" ","_").replace("/", " or ").replace("\\", " or ") if len(trait) > 38: trait = trait[:35] + "..." traitTable.append(["<a href=\"../traitlists/%s.html\">%s</a>" % (translate,trait), cnt, numgenes, "%.7f" % (fisher_exact), "%.7f" % (fisherp), "%.1f" % (oddsratio), "%.4f" % (kappa), ]) genePage.div("Gene %s, total traits: %d" % (geneDB.__original_names[gene],len(traitTable)), class_="header") htmltools.createTable(genePage, traitTable, ["Disease/Trait", "#RE Genes", "#Trait Genes", "fisher exact", "P-value", "oddsratio", "kappa"], "traitlisthead", None, ["traitcol","recol", "genecol","fishercol","pcol", "oddscol","kappacol"], "sortable", None) # Create drug bank links if gene not in drugDB.__drugDict: genePage.div("No drugs target gene %s" % (geneDB.__original_names[gene]), class_="header") else: drugbank_size = len(drugDB.__drugDict[gene]) genePage.div("%d drugs targeting gene %s" % (drugbank_size, geneDB.__original_names[gene]), class_="header") genePage.div.open(class_="druglist") genePage.ul.open() for drug in drugDB.__drugDict[gene]: link = "http://www.drugbank.ca/drugs/%s" % (drug) if drug not in drugDB.__drugs: genePage.li(oneliner.a(drug, href=link)) else: genePage.li(oneliner.a(drugDB.__drugs[drug]['name'], href=link)) genePage.ul.close() genePage.div.close() htmltools.savePage(genePage, os.sep.join([geneListDir, gene + ".html"])) pbar.finalize()
def computeTraitGeneLists(RE_genes, drug_genes, pfilter_cutoff): traitSet = set(gwasDB.__studyByTrait.keys()) pbar = ProgressBar() pbar.setMaximum(len(traitSet)) pbar.updateProgress(0) i = 0 for trait in traitSet: if i % 5 == 0: pbar.updateProgress(i) i+=1 traitGenes = gwasDB.getGenesForTrait(trait, pfilter_cutoff) if len(traitGenes) == 0: continue __traitMetaAnalysis[trait] = {} RE = [] for gene in traitGenes & RE_genes: count = len(gwasDB.getTraitsForGene(gene)) RE.append((gene, count)) __traitMetaAnalysis[trait]['RE'] = RE drug = [] for gene in traitGenes & drug_genes: count = len(gwasDB.getTraitsForGene(gene)) drug.append((gene, count)) __traitMetaAnalysis[trait]['drugbank'] = drug other = [] for gene in traitGenes - RE_genes - drug_genes: count = len(gwasDB.getTraitsForGene(gene)) other.append((gene,count)) __traitMetaAnalysis[trait]['other'] = other a = len(traitGenes & RE_genes) b = len(RE_genes - traitGenes) c = len(traitGenes - RE_genes) d = len(geneDB.__approved_symbols - (traitGenes | RE_genes)) oddsratio = geneUtils.oddsRatio(a,b,c,d) kappa = geneUtils.kappaStatistic(a,b,c,d) fisher_exact = fisher.compute(a,b,c,d) fisher_p = fisher.significance(fisher_exact, a,b,c,d) __traitMetaAnalysis[trait]['RE_chi'] = (a, b, c, d, oddsratio, kappa, fisher_exact, fisher_p) a = len(traitGenes & drug_genes) b = len(drug_genes - traitGenes) c = len(traitGenes - drug_genes) d = len(geneDB.__approved_symbols - (traitGenes | drug_genes)) oddsratio = geneUtils.oddsRatio(a,b,c,d) kappa = geneUtils.kappaStatistic(a,b,c,d) fisher_exact = fisher.compute(a,b,c,d) fisher_p = fisher.significance(fisher_exact, a,b,c,d) __traitMetaAnalysis[trait]['drugbank_chi'] = (a, b, c, d, oddsratio, kappa, fisher_exact, fisher_p) __traitMetaAnalysis[trait]['geneset_size'] = len(traitGenes) pbar.finalize()