Exemplo n.º 1
0
def main(infile, outfile, annotations, descriptions, categories):

    cats = dict()
    with open(categories, 'rU') as handle:
        for line in handle:
            if line.startswith('#') or line.strip() == '':
                continue
            line = line.rstrip('\n')
            record = cat_line(line)
            cats[record['catid']] = record

    annots = dict()
    with open(annotations, 'rU') as handle:
        for line in handle:
            if line.startswith('#') or line.strip() == '':
                continue
            line = line.rstrip('\n')
            record = annot_line(line)
            cat = record['catid']
            record.update(cats[cat])
            annots[record['id']] = record

    descs = dict()
    with open(descriptions, 'rU') as handle:
        for line in handle:
            if line.startswith('#') or line.strip() == '':
                continue
            line = line.rstrip('\n')
            record = desc_line(line)
            record['category'] = ''
            record['short_catname'] = ''
            record['long_catname'] = ''
            descs[record['id']] = record


    ips = InterproscanResult(infile)
    with outhandler(outfile) as handle:
        for query, analyses in ips.items():
            results = list()
            if 'SUPERFAMILY' in analyses:
                analysis = analyses['SUPERFAMILY']
                sfids = set()
                for record in analysis:
                    acc = record.accession.lstrip('SSF')
                    sfids.add(acc)
                for acc in sfids:
                    try:
                        results.append(annots[acc])
                    except KeyError:
                        results.append(descs[acc])

            template = '{seqid}\t{category}\t{short_catname}\t{long_catname}\t{id}\t{name}\n'
            for result in results:
                handle.write(template.format(seqid=query, **result))
    return
Exemplo n.º 2
0
def main(infile, outfile, pantherfile):
    pantherdb = External2GO(pantherfile, fmt='panther')
    ips = InterproscanResult(infile)
    with outhandler(outfile) as handle:
        for query, analyses in ips.items():
            if 'PANTHER' in analyses:
                panther_ids = [f.accession for f in analyses['PANTHER']]
                panther_names = [pantherdb[f].name for f in panther_ids]
                unnamed = [
                    i for i, val in enumerate(panther_names) if
                    val in {'FAMILY NOT NAMED', 'SUBFAMILY NOT NAMED'}
                    ]
                subfamily = [i for i, val in enumerate(panther_ids) if ":" in val]
                family = [i for i, val in enumerate(panther_ids) if ":" not in val]
                represented_fams = set()

                ids = list()
                names = list()
                for sf in subfamily:
                    fam = panther_ids[sf].split(':')[0]
                    if sf in unnamed or panther_ids[sf] in represented_fams:
                        continue
                    represented_fams.add(fam)
                    represented_fams.add(panther_ids[sf])

                    ids.append(panther_ids[sf])
                    names.append(panther_names[sf])

                for f in family:
                    if f in unnamed or panther_ids[f] in represented_fams:
                        continue
                    represented_fams.add(panther_ids[f])
                    ids.append(panther_ids[f])
                    names.append(panther_names[f])
            else:
                names = []
                ids = []

            if len(names) == 0 and len(ids) == 0:
                continue

            template = '{seqid}\t{ids}\t{names}\n'
            for id_, name in zip(ids, names):
                handle.write(template.format(
                    seqid=query,
                    ids=id_,
                    names=name,
                    ))
Exemplo n.º 3
0
def main(
        infile,
        outfile,
        obofile,
        outfmt='long',
        pantherfile=None,
        pfamfile=None,
        smartfile=None,
        interprofile=None,
        prositefile=None,
        printsfile=None,
        prodomfile=None,
        tigrfamfile=None,
        pirsffile=None,
        hamapfile=None,
        domainfile=None,
        datadir=None,
        ):
    """ . """
    if datadir is None:
        datadir = ''

    dbs = dict()

    if pantherfile is not None:
        pantherdb = External2GO(pjoin(datadir, pantherfile), fmt='panther')
        dbs['PANTHER'] = pantherdb
    if pfamfile is not None:
        pfamdb = External2GO(pjoin(datadir, pfamfile))
        dbs['Pfam'] = pfamdb
    if smartfile is not None:
        smartdb = External2GO(pjoin(datadir, smartfile))
        dbs['SMART'] = smartdb
    if interprofile is not None:
        interprodb = External2GO(pjoin(datadir, interprofile))
        dbs['IPR'] = interprodb
    if prositefile is not None:
        prositedb = External2GO(pjoin(datadir, prositefile))
        dbs['ProSitePatterns'] = prositedb
        dbs['ProSiteProfiles'] = prositedb
    if printsfile is not None:
        printsdb = External2GO(pjoin(datadir, printsfile))
        dbs['PRINTS'] = printsdb
    if prodomfile is not None:
        prodomdb = External2GO(pjoin(datadir, prodomfile))
        dbs['ProDom'] = prodomdb
    if tigrfamfile is not None:
        tigrfamdb = External2GO(pjoin(datadir, tigrfamfile))
        dbs['TIGRFAM'] = tigrfamdb
    if pirsffile is not None:
        pirsfdb = External2GO(pjoin(datadir, pirsffile))
        dbs['PIRSF'] = pirsfdb
    if hamapfile is not None:
        hamapdb = External2GO(pjoin(datadir, hamapfile))
        dbs['Hamap'] = hamapdb
    if domainfile is not None:
        domaindb = External2GO(pjoin(datadir, domainfile), fmt='superfamily')
        dbs['SUPERFAMILY'] = domaindb

    ips = InterproscanResult(infile)
    godag = GODag(pjoin(datadir, obofile))

    with outhandler(outfile) as handle:
        for query, analyses in ips.items():
            ontologies = set()
            for analysis, records in analyses.items():
                for record in records:
                    if analysis not in dbs:
                        continue
                    acc = record.accession
                    if acc not in dbs[analysis]:
                        continue
                    gos = [g.id for g in dbs[analysis][acc].ontologies]
                    for go in gos:
                        domain = godag[go].namespace.replace('_', ' ')
                        term = godag[go].name
                        ontologies.add((go, term, domain))

            if len(ontologies) == 0:
                continue

            if outfmt == 'long':
                template = "{seqid}\t{go}\t{term}\t{domain}\n"
                for ontology in ontologies:
                    go, term, domain = ontology
                    handle.write(template.format(
                        seqid=query,
                        go=go,
                        term=term,
                        domain=domain,
                        ))
            elif outfmt == 'association':
                template = "{seqid}\t{gos}\n"
                gos = [go for go, term, domain in ontologies]
                handle.write(template.format(
                    seqid=query,
                    gos=';'.join(gos)
                    ))