Exemplo n.º 1
0
class GoEnrich():
    def __init__(self):
        obodag = GODag("../Data/evaluation_reference/goslim_yeast.obo")
        background = [line.strip() for line in open('../Data/evaluation_reference/gene_list.txt')]
        geneid2gos_yeast = read_associations('../Data/evaluation_reference/geneid2gos_yeast.txt')

        self.goeaobj = GOEnrichmentStudy(
            background,
            geneid2gos_yeast,
            obodag,
            propogate_counts=False,
            alpha=0.05,
            methods=['fdr_bh'])

    def measure_enrichment(self,
                           gene_set=['YML106W', 'YKL135C', 'YDR516C',
                                     'YLR420W', 'YNL111C', 'YHR007C',
                                     'YLR014C', 'YKL216W', 'YNL078W',
                                     'YJR005W', 'YJL130C'],
                           run_name='base',
                           cluster_id=1):

        gene_ids = ['YML106W', 'YKL135C', 'YDR516C', 'YLR420W', 'YNL111C',
                    'YHR007C', 'YLR014C', 'YKL216W', 'YNL078W', 'YJR005W',
                    'YJL130C']

        goea_results_all = self.goeaobj.run_study(gene_ids)

        # we can get significant only
        # goea_results_sig = [r for r in goea_results_all if r.p_fdr_bh < 0.05]

        self.goeaobj.wr_txt("../Results/" + run_name + "_" + str(cluster_id) +
                            ".txt", goea_results_all)