Exemplo n.º 1
0
 def __init__(self):
     self.prt = sys.stdout
     _fin_assc = os.path.join(REPO, "goa_human.gaf")
     self.gene2gos_orig = dnld_assc(_fin_assc, go2obj=None, prt=self.prt)
     self.go2genes_orig = get_b2aset(self.gene2gos_orig)
     _num_genes = [len(gs) for gs in self.go2genes_orig.values()]
     self.min_genes = min(_num_genes)
     self.max_genes = max(_num_genes)
     assert self.gene2gos_orig == get_b2aset(self.go2genes_orig)
Exemplo n.º 2
0
 def __init__(self, go2obj, annots, relationships=None, **kws):
     '''
         Initialise the counts and
     '''
     _prt = kws.get('prt')
     # Backup
     self.go2obj = go2obj  # Full GODag
     self.annots, go_alts = clean_anno(annots, go2obj, _prt)[:2]
     # Genes annotated to all associated GO, including inherited up ancestors'
     _relationship_set = RelationshipCombos(go2obj).get_set(relationships)
     self.go2genes = self._init_go2genes(_relationship_set, go2obj)
     self.gene2gos = get_b2aset(self.go2genes)
     # Annotation main GO IDs (prefer main id to alt_id)
     self.goids = set(self.go2genes.keys())
     self.gocnts = Counter({go:len(geneset) for go, geneset in self.go2genes.items()})
     # Get total count for each branch: BP MF CC
     self.aspect_counts = {
         'biological_process': self.gocnts.get(NAMESPACE2GO['biological_process'], 0),
         'molecular_function': self.gocnts.get(NAMESPACE2GO['molecular_function'], 0),
         'cellular_component': self.gocnts.get(NAMESPACE2GO['cellular_component'], 0)}
     self._init_add_goid_alt(go_alts)
     self.gosubdag = GoSubDag(
         set(self.gocnts.keys()),
         go2obj,
         tcntobj=self,
         relationships=_relationship_set,
         prt=None)
     if _prt:
         self.prt_objdesc(_prt)
Exemplo n.º 3
0
def _run_get_id2gos(annoobjs):
    """Test get_id2gos"""
    for idx, obj in enumerate(annoobjs):
        print('\n{I}) get_id2gos {DESC} {NSs} {N:,} annotations'.format(
            I=idx,
            DESC=obj.get_desc(),
            NSs=obj.namespaces,
            N=len(obj.associations)))
        # If all namespaces are loaded, returns BP, else returns loaded NS
        print('Load all evidence codes')
        id2gos = obj.get_id2gos()
        assert id2gos, 'NO ANNOTATIONS FOUND'
        ## print(next(iter(obj.associations)))
        print('Load all evidence codes, except IEA')
        id2gos_inc = obj.get_id2gos(ev_include=INC_GOOD)
        id2gos_exc = obj.get_id2gos(ev_exclude={'IEA'})
        assert id2gos_exc, 'NO NON-IEA ANNOTATIONS FOUND'
        assert id2gos_inc == id2gos_exc, \
            'INC ALL({A}) != EXC IEA({I}): {DIF}'.format(
                A=len(id2gos_inc),
                I=len(id2gos_exc),
                # DIF=set(obj.get_id2gos(ev_inc=INC_GOOD).keys()).
                # symmetric_difference(obj.get_id2gos(ev_exclude={'IEA'})))
                DIF='')
        num_ids = len(id2gos)
        print('>>>>> {I} >>>>> get_id2gos {N:6,} go2ids[{B:6,}] {ANNO}'.format(
            I=idx, N=num_ids, ANNO=obj.get_desc(), B=len(get_b2aset(id2gos))))
        assert next(iter(next(iter(id2gos.values()))))[:3] == "GO:"
        if obj.filename[-16:] == 'data/association' and obj.godag is None:
            assert num_ids == 34284
Exemplo n.º 4
0
def _get_id2gos(file_id2gos, godag, name2go):
    """Get annotations"""
    if os.path.exists(file_id2gos):
        return IdToGosReader(file_id2gos, godag=godag).get_id2gos('CC')
    id2num = {
        name2go['A']: 10,
        name2go['B']: 10,
        name2go['C']: 10,
        name2go['D']: 10,
        name2go['E']: 10,
        name2go['F']: 10,
        name2go['G']: 10,
        name2go['H']: 10,
        name2go['I']: 30,
        name2go['L']: 30,
        name2go['M']: 20,
        name2go['N']: 30,
    }
    go2genes = cx.defaultdict(set)
    genenum = 0
    for goid, qty in id2num.items():
        for _ in range(qty):
            go2genes[goid].add(genenum)
            genenum += 1
    id2gos = get_b2aset(go2genes)
    IdToGosReader.wr_id2gos(file_id2gos, id2gos)
    return id2gos
Exemplo n.º 5
0
 def _adj_for_assc(self):
     """Print only GO IDs from associations and their ancestors."""
     if self.gene2gos:
         gos_assoc = set(get_b2aset(self.gene2gos).keys())
         if 'item_marks' not in self.kws:
             self.kws['item_marks'] = {go: '>' for go in gos_assoc}
         if 'include_only' not in self.kws:
             gosubdag = GoSubDag(gos_assoc, self.gosubdag.go2obj,
                                 self.gosubdag.relationships)
             self.kws['include_only'] = gosubdag.go2obj
Exemplo n.º 6
0
def _get_id2gos(file_id2gos, godag, name2go, name2num):
    """Get annotations"""
    if os.path.exists(file_id2gos):
        return IdToGosReader(file_id2gos, godag=godag).get_id2gos('CC')
    go2genes = cx.defaultdict(set)
    genenum = 0
    for name, qty in name2num.items():
        goid = name2go[name]
        for _ in range(qty):
            go2genes[goid].add(genenum)
            genenum += 1
    id2gos = get_b2aset(go2genes)
    IdToGosReader.wr_id2gos(file_id2gos, id2gos)
    return id2gos
Exemplo n.º 7
0
def describe_assc(org, fin_assc, go2obj, obj, prt):
    """Report statistics for a single association."""
    # Assc.       | # Assc| range      | 25th | median | 75th | mean | stddev
    # ------------|-------|------------|------|--------|------|------|-------
    # hsa GO/gene | 19394 | 1 to   212 |    5 |      9 |   17 |   13 |     14
    # hsa gene/GO | 17277 | 1 to 8,897 |    1 |      3 |    8 |   15 |    120
    #
    # mus GO/gene | 19870 | 1 to   261 |    5 |     10 |   18 |   14 |     15
    # mus gene/GO | 17491 | 1 to 7,009 |    1 |      3 |    8 |   16 |    129
    #
    # dme GO/gene | 12551 | 1 to   137 |    2 |      4 |    8 |    6 |      7
    # dme gene/GO |  7878 | 1 to 1,675 |    1 |      3 |    7 |   10 |     41
    gene2gos = dnld_assc(fin_assc, go2obj, prt=None)  # Associations
    go2genes = get_b2aset(gene2gos)
    assert gene2gos
    assert go2genes
    cnts_gos_p_gene = [len(gos) for gos in gene2gos.values()]
    cnts_genes_p_go = [len(genes) for genes in go2genes.values()]
    obj.prt_data("{ORG} GO/gene".format(ORG=org), cnts_gos_p_gene, prt)
    obj.prt_data("{ORG} gene/GO".format(ORG=org), cnts_genes_p_go, prt)
Exemplo n.º 8
0
 def get_annotations_reversed(self):
     """Return go2geneset for all GO IDs explicitly annotated to a gene"""
     return set.union(*get_b2aset(self.annots))
Exemplo n.º 9
0
 def get_go2chrs(sec2gos, sec2chr):
     """Dict: given a GO return a set of letters representing it's section membership(s)."""
     go2chrs = {}
     for goid, sections in get_b2aset(sec2gos).items():
         go2chrs[goid] = set(sec2chr[s] for s in sections)
     return go2chrs