def translate_identifiers(): if 'ids' not in request.forms: return template('translate', { 'results': None, 'namespaces': namespace_fullname.items(), }) inputns = request.forms['inputns'] outputns = request.forms['outputns'] results = [] for i in request.forms['ids'].strip().split(): i = i.strip() if not len(i): continue e = translate(i, inputns, 'ensembl:gene_id') o = translate(e, 'ensembl:gene_id', outputns) results.append((i,e,o)) if format == 'csv': return '\n'.join('\t'.join(r) for r in results) if format == 'json': from collections import namedtuple rtype = namedtuple('WaldoIDTranslation', 'input ensembl_gene output') results = [rtype(r) for r in results] return json.dumps(results) return template('translate', { 'results': results, 'inputns_user' : namespace_fullname[inputns], 'outputns_user' : namespace_fullname[outputns], 'namespaces': namespace_fullname.items(), })
def from_ensembl_peptide_id(ensembl_peptide_id, session=None): ''' mgi_id = from_ensembl_peptide_id(ensembl_peptide_id, session={backend.create_session()}) Convert ensembl_peptide_id to mgi ID (MGI:00xxxxxx) Parameters ---------- ensembl_peptide_id : Ensembl peptide ID session : SQLAlchemy session to use (default: call backend.create_session()) Returns ------- mgi_id : MGI ID ''' return translate(ensembl_peptide_id, 'ensembl:peptide_id', 'mgi:id', session)
def from_ensembl_gene_id(ensembl_gene_id, session=None): ''' locate_id = from_ensembl_gene_id(ensembl_gene_id, session={backend.create_session()}) Convert ensembl_gene_id to LOCATE uid. Parameters ---------- ensembl_gene_id : Ensembl gene ID session : SQLAlchemy session to use (default: call backend.create_session()) Returns ------- locate_id : LOCATE protein identifier ''' return translate(ensembl_gene_id, 'ensembl:gene_id', 'locate:id', session)
def search(format='html'): q = request.query.q rs = session.query(waldo.uniprot.models.Entry).filter_by(rname=q).all() if len(rs) == 0: rs = waldo.uniprot.retrieve.retrieve_name_matches(q) if len(rs) == 1: (e,) = rs return _result(format, 'Uniprot', e.name, translate(e.name, 'uniprot:name', 'ensembl:gene_id')) elif len(rs) == 0: return template('index', { 'message' : 'No results found.', }) else: return template('many', { 'results': rs, 'search_term_value': q, })
def get_information(protein_id, session=None): """ information = get_information(protein_id, session={backend.create_session()}) Returns all information on the protein identified by `protein_id` Parameters ---------- protein_id : string protein ID in one of the formats accepted by Waldo session : database connection, optional database connection to use Returns ------- information : string CSV representation of all information on `protein_id` """ def get_data(module): spec_id = module.from_ensembl_gene_id(ensemblgene, session) if not spec_id: return "None, None" locs = module.retrieve_go_annotations(spec_id) all_locs = [] for ls in locs: all_locs.extend([loc.strip() for loc in re.split("[,.;:]+", ls)]) all_locs = list(set(all_locs)) all_locs = ";".join(all_locs) url = module.gen_url(spec_id) return "%s,%s" % (all_locs, url) if session is None: session = waldo.backend.create_session() ensemblgene = translate(protein_id, get_id_namespace(protein_id), "ensembl:gene_id", session) return ",".join( [ ensemblgene, get_data(waldo.uniprot.retrieve), get_data(waldo.mgi.retrieve), get_data(waldo.locate.retrieve), get_data(waldo.hpa.retrieve), ] )
def from_ensembl_peptide_id(ensembl_peptide_id, session=None): ''' name = from ensembl_peptide_id(ensembl_peptide_id, session={backend.create_session()}) Convert ensembl_peptide_id to Uniprot name/ID. Parameters ---------- ensembl_peptide_id : str Ensembl protein ID session : SQLAlchemy session Session to use (default: create a new one) Returns ------- name : str Uniprot peptide name ''' return translate(ensembl_peptide_id, 'ensembl:peptide_id', 'uniprot:name', session)
def from_ensembl_gene_id(ensembl_gene_id, session=None): ''' name = from_ensembl_gene_id(ensembl_gene_id, session={backend.create_session()}) Convert ensembl_gene_id to uniprot name (uniprot ID). Parameters ---------- ensembl_gene_id : str Ensembl gene ID session : SQLAlchemy session Session to use (default: call backend.create_session()) Returns ------- name : str uniprot gene name ''' return translate(ensembl_gene_id, 'ensembl:gene_id', 'uniprot:name', session)
def test_retrieve(): ensembl = 'ENSG00000070785' uid = '6000005' engine = create_engine('sqlite://') metadata = waldo.locate.models.Base.metadata metadata.bind = engine metadata.create_all() sessionmaker_ = sessionmaker(engine) waldo.locate.load.load(_testdir, sessionmaker_) session = sessionmaker_() locid = translate(ensembl, 'ensembl:gene_id', 'locate:id', session) assert locid == uid ret = waldo.locate.retrieve.from_ensembl_gene_id(ensembl, session) assert ret == uid goids = waldo.locate.retrieve.retrieve_go_annotations(ret, session) entry = waldo.locate.retrieve.retrieve_entry(locid, session) assert len(entry.organisms) == 1
def search(format='html'): locateid = request.query.locateid return _result(format, 'LOCATE ID', locateid, translate(locateid.upper(), 'locate:id', 'ensembl:gene_id'))
def search(format='html'): uniprotacc = request.query.uniprotacc return _result(format, 'Uniprot Accession ID', uniprotacc, translate(uniprotacc.upper(), 'uniprot:accession', 'ensembl:gene_id'))
def search(format='html'): uniprotname = request.query.uniprotname return _result(format, 'Uniprot Name', uniprotname, translate(uniprotname.upper(), 'uniprot:name', 'ensembl:gene_id'))
def search(format='html'): mgiid = request.query.mgiid if not mgiid.startswith('MGI:'): mgiid = 'MGI:'+mgiid return _result(format, 'MGI ID', mgiid, translate(mgiid, 'mgi:id', 'ensembl:gene_id'))
def test_nr_entries(): nr_entries = len(re.findall("ENSMUSG", gzip.GzipFile(_testfile).read())) session, loaded = _load_refseq_test() assert loaded == nr_entries assert translate("NP_112436", "refseq:accession", "ensembl:gene_id", session) == "ENSMUSG00000040613"