def do_go_analysis(go_data, genes, p_value_threshold, method='weight', elim_cut_off=0.01): """ Do a topGO analysis on the genes. @arg method: Can be 'classic', 'weight' or 'elim' """ # update the go data with the given genes go_data = update_go_data(go_data, genes) # get the significant groups if 'weight' == method: test_stat = r.new("weightCount", testStatistic=r.GOFisherTest, name="Fisher test", sigRatio="ratio") elif 'classic' == method: test_stat = r.new("classicCount", testStatistic=r.GOFisherTest, name="Fisher test") elif 'elim' == method: test_stat = r.new("elimCount", testStatistic=r.GOFisherTest, name="Fisher test", cutOff=elim_cut_off) else: raise ValueError('%s: Unknown topGO method' % method) sig_groups = r.getSigGroups(go_data, test_stat) args = {method: sig_groups} results_unthresholded = r.GenTable( go_data, #ranksOf="classic", orderBy=method, **args) # only keep those results above the threshold assert method == results_unthresholded.colnames()[ 5] # make sure looking at correct column # which rows are above the threshold? passed = rpy2.robjects.BoolVector([ p_value_from_r(p) <= p_value_threshold for p in results_unthresholded[5] ]) return r.subset(results_unthresholded, passed)
def do_go_analysis(go_data, genes, p_value_threshold, method='weight', elim_cut_off=0.01): """ Do a topGO analysis on the genes. @arg method: Can be 'classic', 'weight' or 'elim' """ # update the go data with the given genes go_data = update_go_data(go_data, genes) # get the significant groups if 'weight' == method: test_stat = r.new( "weightCount", testStatistic=r.GOFisherTest, name="Fisher test", sigRatio="ratio" ) elif 'classic' == method: test_stat = r.new( "classicCount", testStatistic=r.GOFisherTest, name="Fisher test" ) elif 'elim' == method: test_stat = r.new( "elimCount", testStatistic=r.GOFisherTest, name="Fisher test", cutOff = elim_cut_off ) else: raise ValueError('%s: Unknown topGO method' % method) sig_groups = r.getSigGroups(go_data, test_stat) args = {method : sig_groups} results_unthresholded = r.GenTable( go_data, #ranksOf="classic", orderBy=method, **args ) # only keep those results above the threshold assert method == results_unthresholded.colnames()[5] # make sure looking at correct column # which rows are above the threshold? passed = rpy2.robjects.BoolVector([p_value_from_r(p) <= p_value_threshold for p in results_unthresholded[5]]) return r.subset(results_unthresholded, passed)
def go_enrichment(gene_association, subset, algo, minnode): significant = [] geneID2GO = R.readMappings(file=gene_association) refset = R.names(geneID2GO) testset = R.scan(file=subset, what=R.character()) # Double %% escapes % in strings1 genes_of_interest = R("factor(as.integer(%s %%in%% %s))" % (refset.r_repr(), testset.r_repr())) # Use the setNames function instead of this R code, # names(genes_of_interest) <- refset # because Python cannot assign to values genes_of_interest = R.setNames(genes_of_interest, refset) for o in ["MF", "BP", "CC"]: GOdata = R.new( "topGOdata", ontology=o, allGenes=genes_of_interest, annot=R["annFUN.gene2GO"], gene2GO=geneID2GO, nodeSize=minnode, ) test = R.runTest(GOdata, algorithm=algo, statistic="fisher") pvalues = list(R.score(test)) terms = list(R.names(R.score(test))) terms, pvalues, padjusted = adjust_pvalues(terms, pvalues) for i, t in enumerate(terms): if pvalues[i] < 0.05: significant.append([t, str(pvalues[i]), str(padjusted[i])]) return significant
def go_enrichment(genes2go, genes_list, algo, nodeSize): init_topGO() #init_qvalue() genes2go_map = R.readMappings(file=genes2go) subset = genes_list refset = R.names(genes2go_map) genes_of_interest = R("factor(as.integer(%s %%in%% %s))" % (refset.r_repr(), subset.r_repr())) genes_of_interest = R.setNames(genes_of_interest, refset) score = collections.defaultdict(dict) for o in ["MF", "BP", "CC"]: #for o in ["MF"]: GOdata = R.new("topGOdata", ontology=o, annot=R["annFUN.gene2GO"], allGenes=genes_of_interest, gene2GO=genes2go_map, nodeSize=nodeSize ) scoreR = R.score(R.runTest(GOdata, algorithm=algo, statistic="fisher")) for i in range(len(scoreR)): if scoreR[i] < 0.05: score[scoreR.names[i]] = {"pval": scoreR[i]} score = collections.OrderedDict(sorted(score.items(), key=lambda t: t[1])) score = __add_adjusted_pvalues(score) score = __add_GO_info(score) __save_results(score)