-
Notifications
You must be signed in to change notification settings - Fork 0
/
search.py
executable file
·139 lines (106 loc) · 3.28 KB
/
search.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
#!/usr/bin/python
"""Provide KNN search service over HTTP. """
import sys, getopt
import json
import numpy as np
import pykgraph as kg
from wheezy.http import HTTPResponse
from wheezy.http import WSGIApplication
from wheezy.routing import url
from wheezy.web.handlers import BaseHandler
from wheezy.web.middleware import bootstrap_defaults
from wheezy.web.middleware import path_routing_middleware_factory
__author__ = "Huahai Yang"
__copyright__ = "Copyright 2015, Juji, Inc."
__license__ = "BSD"
__maintainer__ = "Huahai Yang"
__email__ = "hyang@juji-inc.com"
__status__ = "Development"
class SearchHandler(BaseHandler):
def get(self):
# TODO: check the params exist and validate them
d = self.request.query.get('d')[0]
k = int(self.request.query.get('k')[0])
q = [[float(x) for x in self.request.query.get('q')]]
dd = indices[d]
n = dd['n']
needed = dd['needed']
index = dd['index']
padded = dd['padded']
pids = dd['pids']
pq = np.zeros((1, n - 1 + needed))
pq[:, :-needed] = q
result = index.search(padded, pq, K = k, withDistance = True)
ids = map(lambda x: pids[x], result[0][0].tolist())
return self.json_response({'ids': ids, 'dists': result[1][0].tolist()})
def welcome(request):
response = HTTPResponse()
response.write('Server is up!')
return response
all_urls = [
url('', welcome, name='default'),
url('search', SearchHandler, name='search')
]
options = {}
web = WSGIApplication(
middleware=[
bootstrap_defaults(url_mapping=all_urls),
path_routing_middleware_factory
],
options=options
)
indices = {}
def load(entry, datafile):
data = np.genfromtxt(datafile, delimiter=',')
(m, n) = data.shape
pids = data[:, 0]
scores = data[:, 1:n]
# kgraph requires the number of columns to be multiple of 4
needed = 4 - (n - 1) % 4
padded = np.zeros((m, n - 1 + needed))
padded[:, :-needed] = scores
index = kg.KGraph()
index.load(datafile + ".index")
entry['m'] = m
entry['n'] = n
entry['pids'] = pids
entry['needed'] = needed
entry['padded'] = padded
entry['index'] = index
def init(datanames, datafiles):
names = datanames.split(',')
files = datafiles.split(',')
for i in range(0, len(names)):
n = names[i]
indices[n] = {}
load(indices[n], files[i])
def main(argv):
datafiles = ''
datanames = ''
port = 8071
try:
opts, args = getopt.getopt(argv, "hp:f:d:",
["port=", "datafiles=", "datanames="])
except getopt.GetoptError:
print 'search.py -p <port> -d <datanames> -f <datafiles>'
sys.exit(2)
for opt, arg in opts:
if opt == '-h':
print 'search.py -p <port> -d <datanames> -f <datafiles>'
sys.exit()
elif opt in ("-d", "--datanames"):
datanames = arg
elif opt in ("-f", "--datafiles"):
datafiles = arg
elif opt in ("-p", "--port"):
port = arg
init(datanames, datafiles)
from wsgiref.simple_server import make_server
try:
print('Server started on port ' + str(port))
make_server('', port, web).serve_forever()
except KeyboardInterrupt:
pass
print('\nThanks!')
if __name__ == '__main__':
main(sys.argv[1:])