示例#1
0
文件: wang.py 项目: sunyi000/goatools
 def _init_go2dag(self, goids):
     """Get all GO IDs in the DAG above and including GO IDs in goids arg"""
     # GO terms provided by user
     ##tic = timeit.default_timer()
     s_godag = self.godag
     rels = self.rels
     go_set_all = set(goids)
     go_set_cur = go_set_all.intersection(s_godag.keys())
     if go_set_cur != go_set_all:
         self._go_not_found(go_set_cur, go_set_all)
     # Ancestor GO terms for each user GO term
     ##tic = prt_hms(tic, '_init_go2dag GO IDs not found')
     go2ancestors = get_go2ancestors(self._get_goobjs(go_set_cur), rels)
     ##tic = prt_hms(tic, '_init_go2dag go2ancestors')
     go2depth = self._get_go2depth(go2ancestors, rels)
     ##tic = prt_hms(tic, '_init_go2dag go2depth')
     w_e = self.w_e
     # pylint: disable=line-too-long
     go2dag = {
         go: DagA(go, ancestors, go2depth, w_e, s_godag)
         for go, ancestors in go2ancestors.items()
     }
     ##tic = prt_hms(tic, '_init_go2dag DagA')
     # Add alt GO IDs
     for go_alt in go_set_cur.difference(go2ancestors.keys()):
         go_term = s_godag[go_alt]
         go_main = go_term.item_id
         go_depth = go_term.depth
         if go_depth != 0:
             go2dag[go_alt] = go2dag[go_main]
         elif go_depth == 0:
             go2dag[go_alt] = DagA(go_main, {}, go2depth, w_e, s_godag)
     ##tic = prt_hms(tic, '_init_go2dag ALT GO IDs')
     return go2dag
示例#2
0
 def get_go_lineage_of(self, terms):
     g = [self.GODAG[i] for i in terms]
     g = get_go2ancestors(g, False)
     gos = []
     for key in g:
         gos.append(key)
         gos.extend(g[key])
     return list(set(gos))
示例#3
0
 def __init__(self, go2obj, relationships, dcnt, go2letter):
     # Subset go2obj contains only items needed by go_sources
     self.go2obj = go2obj
     self.relationships = relationships
     self.dcnt = dcnt
     self.go2letter = go2letter
     # Ex: set(['part_of', 'regulates', 'negatively_regulates', 'positively_regulates'])
     _goobjs, _altgo2goobj = get_goobjs_altgo2goobj(self.go2obj)
     _r0 = not relationships  # True if not using relationships
     self.go2descendants = get_go2descendants(_goobjs, relationships)
     self.go2ancestors = get_go2ancestors(_goobjs, relationships)
     self.go2dcnt = cx.Counter({go: len(p) for go, p in self.go2descendants.items()})
     add_alt_goids(self.go2ancestors, _altgo2goobj)
     add_alt_goids(self.go2descendants, _altgo2goobj)
     add_alt_goids(self.go2dcnt, _altgo2goobj)
示例#4
0
    def _init_go2genes(self, relationship_set, godag):
        '''
            Fills in the genes annotated to each GO, including ancestors

            Due to the ontology structure, gene products annotated to
            a GO Terma are also annotated to all ancestors.
        '''
        go2geneset = defaultdict(set)
        go2up = get_go2ancestors(set(godag.values()), relationship_set)
        # Fill go-geneset dict with GO IDs in annotations and their corresponding counts
        for geneid, goids_anno in self.annots.items():
            # Make a union of all the terms for a gene, if term parents are
            # propagated but they won't get double-counted for the gene
            allterms = set()
            for goid_main in goids_anno:
                allterms.add(goid_main)
                if goid_main in go2up:
                    allterms.update(go2up[goid_main])
            # Add 1 for each GO annotated to this gene product
            for ancestor in allterms:
                go2geneset[ancestor].add(geneid)
        return dict(go2geneset)
示例#5
0
    def _init_go2genes(self, annots, relationships=None):
        '''
            Fills in the genes annotated to each GO, including ancestors

            Due to the ontology structure, gene products annotated to
            a GO Terma are also annotated to all ancestors.
        '''
        go2geneset = defaultdict(set)
        if relationships is None:
            relationships = {}
        go2up = get_go2ancestors(set(self.go2obj.values()), relationships)
        godag = self.go2obj
        go_alts = set()  # For alternate GO IDs
        goids_notfound = set()  # For missing GO IDs
        # Fill go2geneset with GO IDs in annotations and their corresponding counts
        for geneid, goids_anno in annots.items():
            # Make a union of all the terms for a gene, if term parents are
            # propagated but they won't get double-counted for the gene
            allterms = set()
            for goid_anno in goids_anno:
                if goid_anno in godag:
                    goid_main = godag[goid_anno].item_id
                    if goid_anno != goid_main:
                        go_alts.add(goid_anno)
                    allterms.add(goid_main)
                    if goid_main in go2up:
                        allterms |= go2up[goid_main]
                else:
                    goids_notfound.add(goid_anno)
            # Add 1 for each GO annotated to this gene product
            for ancestor in allterms:
                go2geneset[ancestor].add(geneid)
        if goids_notfound:
            print("{N} Assc. GO IDs not found in the GODag\n".format(
                N=len(goids_notfound)))
        return dict(go2geneset), go_alts
示例#6
0
def get_go2parents_go2obj(go2obj, relationships=None, prt=None):
    """Return go2parents (set of parent GO IDs) for all GO ID keys in go2obj."""
    goobjs, altgo2goobj = get_goobjs_altgo2goobj(go2obj)
    go2parents = get_go2ancestors(goobjs, relationships, prt)
    add_alt_goids(go2parents, altgo2goobj)
    return go2parents