示例#1
0
def do_go_analysis(go_data,
                   genes,
                   p_value_threshold,
                   method='weight',
                   elim_cut_off=0.01):
    """
    Do a topGO analysis on the genes.

    @arg method: Can be 'classic', 'weight' or 'elim'
    """

    # update the go data with the given genes
    go_data = update_go_data(go_data, genes)

    # get the significant groups
    if 'weight' == method:
        test_stat = r.new("weightCount",
                          testStatistic=r.GOFisherTest,
                          name="Fisher test",
                          sigRatio="ratio")
    elif 'classic' == method:
        test_stat = r.new("classicCount",
                          testStatistic=r.GOFisherTest,
                          name="Fisher test")
    elif 'elim' == method:
        test_stat = r.new("elimCount",
                          testStatistic=r.GOFisherTest,
                          name="Fisher test",
                          cutOff=elim_cut_off)
    else:
        raise ValueError('%s: Unknown topGO method' % method)
    sig_groups = r.getSigGroups(go_data, test_stat)

    args = {method: sig_groups}
    results_unthresholded = r.GenTable(
        go_data,
        #ranksOf="classic",
        orderBy=method,
        **args)

    # only keep those results above the threshold
    assert method == results_unthresholded.colnames()[
        5]  # make sure looking at correct column
    # which rows are above the threshold?
    passed = rpy2.robjects.BoolVector([
        p_value_from_r(p) <= p_value_threshold
        for p in results_unthresholded[5]
    ])
    return r.subset(results_unthresholded, passed)
示例#2
0
文件: topgo.py 项目: JohnReid/biopsy
def do_go_analysis(go_data, genes, p_value_threshold, method='weight', elim_cut_off=0.01):
    """
    Do a topGO analysis on the genes.

    @arg method: Can be 'classic', 'weight' or 'elim'
    """

    # update the go data with the given genes
    go_data = update_go_data(go_data, genes)

    # get the significant groups
    if 'weight' == method:
        test_stat = r.new(
            "weightCount",
            testStatistic=r.GOFisherTest,
            name="Fisher test",
            sigRatio="ratio"
        )
    elif 'classic' == method:
        test_stat = r.new(
            "classicCount",
            testStatistic=r.GOFisherTest,
            name="Fisher test"
        )
    elif 'elim' == method:
        test_stat = r.new(
            "elimCount",
            testStatistic=r.GOFisherTest,
            name="Fisher test",
            cutOff = elim_cut_off
        )
    else:
        raise ValueError('%s: Unknown topGO method' % method)
    sig_groups = r.getSigGroups(go_data, test_stat)

    args = {method : sig_groups}
    results_unthresholded = r.GenTable(
      go_data,
      #ranksOf="classic",
      orderBy=method,
      **args
    )

    # only keep those results above the threshold
    assert method == results_unthresholded.colnames()[5] # make sure looking at correct column
    # which rows are above the threshold?
    passed = rpy2.robjects.BoolVector([p_value_from_r(p) <= p_value_threshold for p in results_unthresholded[5]])
    return r.subset(results_unthresholded, passed)
示例#3
0
def go_enrichment(gene_association, subset, algo, minnode):
    significant = []
    geneID2GO = R.readMappings(file=gene_association)
    refset = R.names(geneID2GO)
    testset = R.scan(file=subset, what=R.character())
    # Double %% escapes % in strings1
    genes_of_interest = R("factor(as.integer(%s %%in%% %s))" % (refset.r_repr(), testset.r_repr()))
    # Use the setNames function instead of this R code,
    # names(genes_of_interest) <- refset
    # because Python cannot assign to values
    genes_of_interest = R.setNames(genes_of_interest, refset)
    for o in ["MF", "BP", "CC"]:
        GOdata = R.new(
            "topGOdata",
            ontology=o,
            allGenes=genes_of_interest,
            annot=R["annFUN.gene2GO"],
            gene2GO=geneID2GO,
            nodeSize=minnode,
        )
        test = R.runTest(GOdata, algorithm=algo, statistic="fisher")
        pvalues = list(R.score(test))
        terms = list(R.names(R.score(test)))
        terms, pvalues, padjusted = adjust_pvalues(terms, pvalues)
        for i, t in enumerate(terms):
            if pvalues[i] < 0.05:
                significant.append([t, str(pvalues[i]), str(padjusted[i])])
    return significant
def go_enrichment(genes2go, genes_list, algo, nodeSize):

    init_topGO()
    #init_qvalue()

    genes2go_map = R.readMappings(file=genes2go)
    subset = genes_list
    refset = R.names(genes2go_map)

    genes_of_interest = R("factor(as.integer(%s %%in%% %s))" % (refset.r_repr(), subset.r_repr()))
    genes_of_interest = R.setNames(genes_of_interest, refset)

    score = collections.defaultdict(dict)

    for o in ["MF", "BP", "CC"]:
    #for o in ["MF"]:
        GOdata = R.new("topGOdata",
                   ontology=o,
                   annot=R["annFUN.gene2GO"],
                   allGenes=genes_of_interest,
                   gene2GO=genes2go_map,
                   nodeSize=nodeSize
                   )

        scoreR = R.score(R.runTest(GOdata, algorithm=algo, statistic="fisher"))

        for i in range(len(scoreR)):
            if scoreR[i] < 0.05:
                score[scoreR.names[i]] = {"pval": scoreR[i]}

    score = collections.OrderedDict(sorted(score.items(), key=lambda t: t[1]))
    score = __add_adjusted_pvalues(score)
    score = __add_GO_info(score)

    __save_results(score)