示例#1
0
def test_semantic_similarity(usr_assc=None):
    """Computing basic semantic similarities between GO terms."""
    not_these = {'goa_uniprot_all.gaf', 'goa_uniprot_all_noiea.gaf'}
    assc_names = sorted(ASSOCIATIONS.difference(not_these))
    go2obj = get_go2obj()
    # http://current.geneontology.org/annotations/
    if usr_assc is not None:
        assc_names = [usr_assc]
    not_found = set()
    gaf2errs = cx.defaultdict(list)
    for assc_name in assc_names:  # Limit test numbers for speed
        tic = timeit.default_timer()
        # Get all the annotations from arabidopsis.
        fin_gaf = os.path.join(REPO, assc_name)
        if not os.path.exists(fin_gaf):
            dnld_annotation(fin_gaf)
        annoobj = GafReader(fin_gaf)
        for nta in annoobj.associations:
            if nta.GO_ID in go2obj:
                goterm = go2obj[nta.GO_ID]
                namespace_anno = NS2NAMESPACE.get(nta.NS)
                if namespace_anno != goterm.namespace:
                    gaf2errs[assc_name].append(nta)
            else:
                not_found.add(nta.GO_ID)
    print('{HMS} {N} Associations'.format(HMS=_hms(TIC), N=len(assc_names)))
    if not_found:
        _prt_not_found(not_found)
    if gaf2errs:
        _wr_errs('namespace_errors.txt', gaf2errs, go2obj)
示例#2
0
def test_semantic_similarity(usr_assc=None):
    """Computing basic semantic similarities between GO terms."""
    not_these = {'goa_uniprot_all.gaf', 'goa_uniprot_all_noiea.gaf'}
    associations = sorted(ASSOCIATIONS.difference(not_these))
    go2obj = get_go2obj()
    # goids = go2obj.keys()
    # http://current.geneontology.org/annotations/
    if usr_assc is not None:
        associations = [usr_assc]
    not_found = set()
    errs = []
    for assc_name in associations:  # Limit test numbers for speed
        tic = timeit.default_timer()
        # Get all the annotations from arabidopsis.
        fin_gaf = os.path.join(REPO, assc_name)
        if not os.path.exists(fin_gaf):
            dnld_annotation(fin_gaf)
        annoobj = GafReader(fin_gaf)
        #### for nspc in ['BP', 'MF', 'CC']:
        assc_gene2gos = annoobj.get_id2gos('all')
        if not assc_gene2gos:
            not_found.add(assc_name)
            continue

        # Calculate the information content of the single term, GO:0048364
        #       "Information content (GO:0048364) = 7.75481392334

        # Initialize the counts of each GO term.
        tcntobj = TermCounts(go2obj, assc_gene2gos)
        go_cnt = tcntobj.gocnts.most_common()

        #print tcntobj.gocnts.most_common()

        if go_cnt:
            print("{ASSC}".format(ASSC=assc_name))
            print(tcntobj.aspect_counts)
            gocnt_max = go_cnt[0][1]
            prt_info(tcntobj, go_cnt, None)
            prt_info(tcntobj, go_cnt, gocnt_max / 2.0)
            prt_info(tcntobj, go_cnt, gocnt_max / 10.0)
        print("{HMS} {hms} {ASSC}\n".format(ASSC=assc_name,
                                            HMS=_hms(TIC),
                                            hms=_hms(tic)))
    print('{HMS} {N} Associations'.format(HMS=_hms(TIC), N=len(associations)))
    if not_found:
        _prt_not_found(not_found)
    if errs:
        fout_err = 'namespace_errors.txt'
        with open(fout_err, 'w') as prt:
            for err in errs:
                prt.write(err)
            print('  {N} ERRORS WROTE: {TXT}'.format(N=len(errs),
                                                     TXT=fout_err))
示例#3
0
def test_termcnt_init():
    """Compare GOATOOLS Resnik scores and Yang Resnik scores"""
    godag = get_godag(os.path.join(REPO, 'go-basic.obo'))
    fin_gpad = os.path.join(REPO, 'goa_human.gpad')
    dnld_annotation(fin_gpad)

    # Load all annoations (BP, MF, CC)
    top_cnt_all = _run_full(fin_gpad, godag)

    # Load one annoation (BP, MF, CC) at a time
    top_cnt_ns = _run_each(fin_gpad, godag)

    # Compare different load methods
    assert top_cnt_all == top_cnt_ns
示例#4
0
def test_find_enrichment(run_all=False):
    """RUn an enrichments using all annotation file formats"""

    if run_all:
        fin_obo = join(REPO, 'go-basic.obo')
        get_godag(fin_obo, optional_attrs={'relationship'}, loading_bar=None)
        fin_gaf = join(REPO, 'goa_human.gaf')
        dnld_annotation(fin_gaf)
        for idx, cmd in enumerate(_get_cmds()):
            print('------------------- TEST {I} ------------------------------------'.format(I=idx))
            print('CMD: {CMD}'.format(CMD=cmd))
            assert system(cmd) == 0
        print("TEST PASSED")
    else:
        print('RUN THIS TEST WITH AN ARGUMENT')
def test_tcntobj_relationships(prt=sys.stdout):
    """Test loading of relationships, like part_of, into TermCounts"""
    fin_obo = os.path.join(REPO, "go-basic.obo")
    fin_anno = os.path.join(REPO, 'goa_human.gpad')

    download_go_basic_obo(fin_obo, prt, loading_bar=None)
    dnld_annotation(fin_anno)

    # Load ontologies
    go2obj_r0 = GODag(fin_obo)
    go2obj_r1 = GODag(fin_obo, optional_attrs=['relationship'])

    # Load annotations
    annoobj = GpadReader(fin_anno, godag=go2obj_r0)

    # Create TermCounts objects
    ns2tcntobj_r0 = {ns:TermCounts(go2obj_r0, annoobj.get_id2gos(ns)) for ns in NSS}
    ns2tcntobj_r1 = {ns:TermCounts(go2obj_r1, annoobj.get_id2gos(ns), RELS) for ns in NSS}
    _chk_pass_fail(ns2tcntobj_r0, ns2tcntobj_r1)
示例#6
0
文件: utils.py 项目: marade/goatools
def get_anno_fullname(fin_anno):
    """Get annotation filename"""
    fin_full = join(REPO, fin_anno)
    dnld_annotation(fin_full)
    return fin_full