forked from intermine/intermine-python-ws-demo
-
Notifications
You must be signed in to change notification settings - Fork 0
/
pathways.py
93 lines (72 loc) · 3.43 KB
/
pathways.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
#!/usr/bin/python
from intermine.webservice import Service
from intermine.webservice import ServiceError
from collections import defaultdict
from itertools import groupby, ifilter
import org_util
class PathwayDemo(object):
service_urls = {'Drosophila melanogaster': "http://www.flymine.org/query/service",
'Saccharomyces cerevisiae': "http://yeastmine.yeastgenome.org/yeastmine/service",
'Rattus norvegicus': "http://ratmine.mcw.edu/ratmine/service",
'Mus musculus': "http://metabolicmine.org/test/service",
'Homo sapiens': "http://metabolicmine.org/test/service",
}
def __init__(self):
self.services = {}
for (name, service_url) in self.service_urls.items():
try:
self.services[name] = Service(service_url)
print "Connected to %s" % service_url
except ServiceError as e:
print "Failed to initialise: %s, %s" % (service_url, e)
# returns a list of lists
def find_gene(self, symbol, org_name):
service = self.services[org_name]
query = service.new_query("Gene")\
.select("symbol", "primaryIdentifier", "name", "organism.name")\
.where("Gene", "LOOKUP", symbol)\
.where("organism.name", "=", org_name)
return [row.to_l() for row in query.rows()]
def get_homologs_for_gene(self, symbol, org_name):
# always use FlyMine for querying homologs
h_sym = "homologues.homologue.symbol"
h_org = "homologues.homologue.organism.name"
h_ds = "homologues.dataSets.name"
service = self.services["Drosophila melanogaster"]
query = service.new_query("Gene")\
.select(h_org, h_sym, h_ds)\
.where(h_org, "ONE OF", org_util.get_names())\
.where("organism.name", "=", org_name)\
.where("symbol", "=", symbol)\
.where(h_sym, "IS NOT NULL")
homologs = defaultdict(dict)
for org, g1 in groupby(query.rows(), lambda x: x[h_org]):
for sym, g2 in groupby(g1, lambda x: x[h_sym]):
homologs[org][sym] = [r[h_ds] for r in g2]
return homologs
def get_pathways(self, symbol, org_name):
service = self.services[org_name]
sym = "symbol"
pw = "pathways.name"
ds = 'Gene.pathways.dataSets.name'
org = "organism.name"
query = service.new_query("Gene").select(sym, org, pw)
# YeastMine doesn't have pathway.dataSets, check model first
if self.is_path_in_model(org_name, ds):
query.add_view(ds)
query.add_join('Gene.pathways.dataSets', 'OUTER')
query.add_sort_order(pw, 'asc')
query.add_constraint(sym, '=', symbol)
query.add_constraint(org, "=", org_name)
# Return a list of triples
return [[r[2] + " (%s)" % r[3] if len(r) == 4 else r[2], r[1], r[0]] for r in map(lambda x: x.split("\t"), query.results("tsv"))]
def is_path_in_model(self, org, path):
service = self.services[org]
try:
service.model.validate_path(path)
except:
return False
return True
def strip_suffix(self, dataset):
if dataset.endswith('data set'):
return dataset[0:dataset.find('data set')]