def getUNIPid(line): seqid = list() seqid.append(line[11:line[0].find(']') - 1]) pairs = uniprot.batch_uniprot_id_mapping_pairs('P_REFSEQ_AC', 'ACC', seqid) reviewedpair = "" revStat = 0 #Cette partie du code sert a determiner qu'elle des uniprotID sont reviewed #et va conserver le premier de la liste for y in pairs: uniCode = y[1] req = urllib2.Request( 'http://www.uniprot.org/uniprot/?query={}&sort=score&columns=reviewed&format=tab' .format(uniCode)) web = urllib2.urlopen(req) for i in web: if i.find("Status") == -1: if i.find("unreviewed") == -1: if revStat == 0: reviewedpair = y[1] revStat = 1 break elif reviewedpair == "": reviewedpair = y[1] return [GetGoAnnotation(reviewedpair)]
def map_to_refseq(seqids): uniprot_mapping = uniprot.sequentially_convert_to_uniprot_id(seqids, "func.cache.json") uniprot_ids = uniprot_mapping.values() pairs = uniprot.batch_uniprot_id_mapping_pairs("ACC", "P_REFSEQ_AC", uniprot_ids) mapping = {} for seqid in seqids: if seqid in uniprot_mapping: uniprot_id = uniprot_mapping[seqid] for pair in pairs: if uniprot_id == pair[0]: mapping[seqid] = pair[1] os.remove("func.cache.json") return mapping
def map_to_refseq(seqids): uniprot_mapping = uniprot.sequentially_convert_to_uniprot_id( seqids, 'func.cache.json') uniprot_ids = uniprot_mapping.values() pairs = uniprot.batch_uniprot_id_mapping_pairs( 'ACC', 'P_REFSEQ_AC', uniprot_ids) mapping = {} for seqid in seqids: if seqid in uniprot_mapping: uniprot_id = uniprot_mapping[seqid] for pair in pairs: if uniprot_id == pair[0]: mapping[seqid] = pair[1] os.remove('func.cache.json') return mapping
def getUNIPid(line): seqid = list() seqid.append(line[11:line[0].find(']')-1]) pairs = uniprot.batch_uniprot_id_mapping_pairs('P_REFSEQ_AC','ACC',seqid) reviewedpair = "" revStat= 0 #Cette partie du code sert a determiner qu'elle des uniprotID sont reviewed #et va conserver le premier de la liste for y in pairs: uniCode = y[1] req = urllib2.Request('http://www.uniprot.org/uniprot/?query={}&sort=score&columns=reviewed&format=tab'.format(uniCode)) web = urllib2.urlopen(req) for i in web: if i.find("Status") == -1: if i.find("unreviewed") == -1: if revStat == 0 : reviewedpair = y[1] revStat = 1 break elif reviewedpair == "" : reviewedpair = y[1] return[GetGoAnnotation(reviewedpair)]
import os import uniprot import pprint import sys # Clean up caches os.system('rm cache*') # Example 1 - reading a fasta file seqids, fastas = uniprot.read_fasta('example.fasta') pprint.pprint(seqids, indent=2) # Example 2 - map identifiers for RefSeq to Uniprot seqids = "NP_000508.1 NP_001018081.3".split() pairs = uniprot.batch_uniprot_id_mapping_pairs('P_REFSEQ_AC', 'ACC', seqids) pprint.pprint(pairs, indent=2) # Example 2 - get UniProt metadata uniprot_seqids = [j for i, j in pairs] uniprot_data = uniprot.batch_uniprot_metadata(uniprot_seqids, 'cache') pprint.pprint(uniprot_data, indent=2) # Example 3 - parse for isoforms in metadata text = open('cache/metadata.0.txt').read() uniprot_data = uniprot.parse_isoforms(text) pprint.pprint(uniprot_data) # Example 4 - chaining commands to map seqids seqids = "EFG_MYCA1 YP_885981.1 ENSG00000196176 Q91ZU6-8".split() uniprot_data = uniprot.get_metadata_with_some_seqid_conversions( seqids, 'cache2')
# Example 1 - reading a fasta file seqids, fastas = uniprot.read_fasta("example.fasta") pprint.pprint(seqids, indent=2) # Example 2 - batch read identifier mappings with # prespecified identifier types seqids = """ NP_000508.1 NP_001018081.3 """.split() pairs = uniprot.batch_uniprot_id_mapping_pairs("P_REFSEQ_AC", "ACC", seqids) pprint.pprint(pairs, indent=2) # Example 3 - sequential identifier mapping to UniProt # identifiers using robust but slow method seqids = """ EFG_MYCA1 YP_885981.1 CpC231_1796 """.split() mapping = uniprot.sequentially_convert_to_uniprot_id(seqids, "cache.json") uniprot_seqids = mapping.values()
import os import uniprot import pprint import sys # Clean up caches os.system('rm cache*') # Example 1 - reading a fasta file seqids, fastas = uniprot.read_fasta('example.fasta') pprint.pprint(seqids, indent=2) # Example 2 - map identifiers for RefSeq to Uniprot seqids = "NP_000508.1 NP_001018081.3".split() pairs = uniprot.batch_uniprot_id_mapping_pairs( 'P_REFSEQ_AC', 'ACC', seqids) pprint.pprint(pairs, indent=2) # Example 2 - get UniProt metadata uniprot_seqids = [j for i,j in pairs] uniprot_data = uniprot.batch_uniprot_metadata( uniprot_seqids, 'cache') pprint.pprint(uniprot_data, indent=2) # Example 3 - parse for isoforms in metadata text = open('cache/metadata.0.txt').read() uniprot_data = uniprot.parse_isoforms(text) pprint.pprint(uniprot_data) # Example 4 - chaining commands to map seqids seqids = "EFG_MYCA1 YP_885981.1 ENSG00000196176 Q91ZU6-8".split()