forked from boscoh/uniprot
-
Notifications
You must be signed in to change notification settings - Fork 0
/
example.py
83 lines (53 loc) · 1.69 KB
/
example.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
import os
import uniprot
import pprint
# Clean up caches
os.system('rm *output* *cache*')
# Example 1 - reading a fasta file
seqids, fastas = uniprot.read_fasta('example.fasta')
pprint.pprint(seqids, indent=2)
# Example 2 - batch read identifier mappings with
# prespecified identifier types
seqids = """
NP_000508.1 NP_001018081.3
""".split()
pairs = uniprot.batch_uniprot_id_mapping_pairs(
'P_REFSEQ_AC', 'ACC', seqids)
pprint.pprint(pairs, indent=2)
# Example 3 - sequential identifier mapping to UniProt
# identifiers using robust but slow method
seqids = """
EFG_MYCA1 YP_885981.1 CpC231_1796
""".split()
mapping = uniprot.sequentially_convert_to_uniprot_id(
seqids, 'cache.json')
uniprot_seqids = mapping.values()
# Example 4 - get UniProt metadata
uniprot_data = uniprot.batch_uniprot_metadata(
uniprot_seqids, 'cache2.txt')
pprint.pprint(uniprot_data, indent=2)
for l in open('cache2.txt'):
print l.strip()
uniprot.write_fasta('example.output.fasta', uniprot_data, uniprot_seqids)
# Example 5 - chaining commands to make your own
# special mapper
def map_to_refseq(seqids):
uniprot_mapping = uniprot.sequentially_convert_to_uniprot_id(
seqids, 'func.cache.json')
uniprot_ids = uniprot_mapping.values()
pairs = uniprot.batch_uniprot_id_mapping_pairs(
'ACC', 'P_REFSEQ_AC', uniprot_ids)
mapping = {}
for seqid in seqids:
if seqid in uniprot_mapping:
uniprot_id = uniprot_mapping[seqid]
for pair in pairs:
if uniprot_id == pair[0]:
mapping[seqid] = pair[1]
os.remove('func.cache.json')
return mapping
seqids = """
EFG_MYCA1 YP_885981.1 CpC231_1796
""".split()
mapping = map_to_refseq(seqids)
pprint.pprint(mapping, indent=2)