def simultaneousPermutationWithMultipleSignificanceLevels(
        significance_levels, n, categories, testset_size, iterations):
    print "Running permutation test on %d categories with test set size of %d drawn from %d items for %d iterations" % (
        len(categories), testset_size, n, iterations)
    pbar = ProgressBar()
    pbar.setMaximum(iterations)
    pbar.updateProgress(0)

    total_sig_hypotheses = [0] * len(significance_levels)
    num_sig_levels = len(significance_levels)

    sampleset = xrange(0, n)
    for i in xrange(0, iterations):
        if i % 1 == 0:
            pbar.updateProgress(i)
        test_set = set(random.sample(sampleset, testset_size))
        for j, s_c in enumerate(categories):
            pval = computeOverlapPval(s_c, test_set, n)

            for k in xrange(0, num_sig_levels):
                if pval <= significance_levels[k]:
                    total_sig_hypotheses[k] += 1

    pbar.finalize()

    return [
        float(total_sig) / float(iterations)
        for total_sig in total_sig_hypotheses
    ]
def computeTraitStatistics(genes, pfilter):

    
    traitSet = set([key for key in gwasDB.__studyByTrait if len(gwasDB.getGenesForTrait(key)) > 0])
    
    traitChi = {}
    pbar = ProgressBar()

    pbar.setMaximum(len(traitSet))
    pbar.updateProgress(0)
    i=0
    for trait in traitSet:
        if i % 5 == 0:
            pbar.updateProgress(0)

        i+=1
        traitGenes = gwasDB.getGenesForTrait(trait,pfilter)

        listA = traitGenes & genes
        listC = traitGenes - genes

        a = len(listA)
        b = len(genes - traitGenes)
        c = len(listC)
        d = len(geneDB.__approved_symbols - (traitGenes | genes))

        oddsratio = geneUtils.oddsRatio(a,b,c,d)
        kappa = geneUtils.kappaStatistic(a,b,c,d)

        fisher_exact = fisher.compute(a,b,c,d)
        fisher_p = fisher.significance(fisher_exact, a,b,c,d)

        traitChi[trait] = (a, oddsratio, kappa, len(traitGenes),
                fisher_exact, fisher_p, traitGenes)

    pbar.finalize()
    return traitChi
def createGeneListingsHTML(geneListDir):

    pbar = ProgressBar()
    pbar.setMaximum(len(gwasDB.__geneSet))
    pbar.updateProgress(0)

    i=0
    for gene in gwasDB.__geneSet:
        if i % 10 == 0:
            pbar.updateProgress(i)
        i+=1
        genePage = htmltools.createPage("Gene Summary: " +
                geneDB.__original_names[gene], css_file='../genereport.css',
                scripts = {'../sorttable.js':'javascript'})
        
        
        # Create the disease trait tables
        traits = []
        for trait in gwasDB.__traitDict[gene]:
            traits.append(trait)
            
        traits = sorted(traits, key=lambda trait: -__traitMetaAnalysis[trait]['RE_chi'][0])
        
        traitTable = []
        for trait in traits:
            cnt       = len(__traitMetaAnalysis[trait]['RE'])
            oddsratio = __traitMetaAnalysis[trait]['RE_chi'][4]
            kappa     = __traitMetaAnalysis[trait]['RE_chi'][5]
            fisher_exact = __traitMetaAnalysis[trait]['RE_chi'][6]
            fisherp    = __traitMetaAnalysis[trait]['RE_chi'][7]
            numgenes  = __traitMetaAnalysis[trait]['geneset_size']
            translate = trait.replace(" ","_").replace("/", " or ").replace("\\", " or ")
            
            if len(trait) > 38:
                trait = trait[:35] + "..."
            traitTable.append(["<a href=\"../traitlists/%s.html\">%s</a>" %
                (translate,trait), cnt, numgenes, "%.7f" % (fisher_exact),
                "%.7f" % (fisherp), "%.1f" % (oddsratio), "%.4f" % (kappa), ])
            
        genePage.div("Gene %s, total traits: %d" % (geneDB.__original_names[gene],len(traitTable)), class_="header")
        
        htmltools.createTable(genePage, traitTable, ["Disease/Trait", "#RE Genes", 
            "#Trait Genes", "fisher exact", "P-value",
            "oddsratio", "kappa"], "traitlisthead", None,
            ["traitcol","recol",
                "genecol","fishercol","pcol",
                "oddscol","kappacol"], "sortable", None)
        
        # Create drug bank links
        
        
        if gene not in drugDB.__drugDict:
            genePage.div("No drugs target gene %s" % (geneDB.__original_names[gene]), class_="header")
        else:
            drugbank_size = len(drugDB.__drugDict[gene])
            
            genePage.div("%d drugs targeting gene %s" % (drugbank_size, geneDB.__original_names[gene]), class_="header")
            
            genePage.div.open(class_="druglist")
            genePage.ul.open()
            for drug in drugDB.__drugDict[gene]:
                link = "http://www.drugbank.ca/drugs/%s" % (drug)
                if drug not in drugDB.__drugs:
                    genePage.li(oneliner.a(drug, href=link))
                else:
                    genePage.li(oneliner.a(drugDB.__drugs[drug]['name'], href=link))
            genePage.ul.close()
            genePage.div.close()
            
        
        htmltools.savePage(genePage, os.sep.join([geneListDir, gene + ".html"]))
    pbar.finalize()
def computeTraitGeneLists(RE_genes, drug_genes, pfilter_cutoff):
    
    traitSet = set(gwasDB.__studyByTrait.keys())
    
    pbar = ProgressBar()
    pbar.setMaximum(len(traitSet))

    pbar.updateProgress(0)
    i = 0
    for trait in traitSet:
        if i % 5 == 0:
            pbar.updateProgress(i)
        i+=1
        traitGenes = gwasDB.getGenesForTrait(trait, pfilter_cutoff)
        
        if len(traitGenes) == 0: 
            continue
        
        __traitMetaAnalysis[trait] = {}
        
        
        RE = []
        for gene in traitGenes & RE_genes:
            
            count = len(gwasDB.getTraitsForGene(gene))
            RE.append((gene, count))
        
        __traitMetaAnalysis[trait]['RE'] = RE
        
        
        
        drug = []
        for gene in traitGenes & drug_genes:
            
            count = len(gwasDB.getTraitsForGene(gene))
            drug.append((gene, count))
        
        __traitMetaAnalysis[trait]['drugbank'] = drug
        
        
            
        other = []
        for gene in traitGenes - RE_genes - drug_genes:
            
            count = len(gwasDB.getTraitsForGene(gene))
            other.append((gene,count))
        
        __traitMetaAnalysis[trait]['other'] = other
        
        
        
        a = len(traitGenes & RE_genes)
        b = len(RE_genes - traitGenes)
        c = len(traitGenes - RE_genes)
        d = len(geneDB.__approved_symbols - (traitGenes | RE_genes))
        
        oddsratio = geneUtils.oddsRatio(a,b,c,d)
        kappa = geneUtils.kappaStatistic(a,b,c,d)
        fisher_exact = fisher.compute(a,b,c,d)
        fisher_p = fisher.significance(fisher_exact, a,b,c,d)
        
        __traitMetaAnalysis[trait]['RE_chi'] = (a, b, c, d,
                oddsratio, kappa, fisher_exact, fisher_p)
        
        a = len(traitGenes & drug_genes)
        b = len(drug_genes - traitGenes)
        c = len(traitGenes - drug_genes)
        d = len(geneDB.__approved_symbols - (traitGenes | drug_genes))
        
        oddsratio = geneUtils.oddsRatio(a,b,c,d)
        kappa = geneUtils.kappaStatistic(a,b,c,d)
        fisher_exact = fisher.compute(a,b,c,d)
        fisher_p = fisher.significance(fisher_exact, a,b,c,d)
        
        __traitMetaAnalysis[trait]['drugbank_chi'] = (a, b, c, d,
                oddsratio, kappa, fisher_exact, fisher_p)
        
        __traitMetaAnalysis[trait]['geneset_size'] = len(traitGenes)
    
    pbar.finalize()