def test_string(): DATA_STRS = [ "xyz", "beagcfa", "cea", "cb", "d", "c", "bdaf", "ddcd", "egbfa", "a", "fba", "bcccfe", "ab", "bfgbfdc", "bcbbgf", "bfbb" ] QUERY_STRS = ["abc", "def", "ghik"] space_type = 'leven' space_param = [] method_name = 'small_world_rand' method_param = [ 'NN=17', 'initIndexAttempts=3', 'initSearchAttempts=1', 'indexThreadQty=4' ] index = nmslib.initIndex(len(DATA_STRS), space_type, space_param, method_name, method_param, nmslib.DataType.STRING, nmslib.DistType.INT) for pos, data in enumerate(DATA_STRS): #print pos, data nmslib.setData(index, pos, data) nmslib.buildIndex(index) k = 2 for idx, data in enumerate(QUERY_STRS): print idx, nmslib.knnQuery(index, k, data) nmslib.freeIndex(index)
def test_vector(): n = 4500 space_type = 'cosinesimil' space_param = [] method_name = 'small_world_rand' method_param = [ 'NN=17', 'initIndexAttempts=3', 'initSearchAttempts=1', 'indexThreadQty=4' ] index = nmslib.initIndex(n, space_type, space_param, method_name, method_param, nmslib.DataType.VECTOR, nmslib.DistType.FLOAT) for pos, data in enumerate(read_data('sample_dataset.txt')): if pos >= n: break #print pos, data nmslib.setData(index, pos, data) print 'here' nmslib.buildIndex(index) k = 2 for idx, data in enumerate(read_data('sample_queryset.txt')): print idx, nmslib.knnQuery(index, k, data) nmslib.freeIndex(index)
def fit(self, X): import nmslib if self._method_name == 'vptree': # To avoid this issue: # terminate called after throwing an instance of 'std::runtime_error' # what(): The data size is too small or the bucket size is too big. Select the parameters so that <total # of records> is NOT less than <bucket size> * 1000 # Aborted (core dumped) self._method_param.append('bucketSize=%d' % min(int(X.shape[0] * 0.0005), 1000)) self._index = nmslib.initIndex(X.shape[0], self._nmslib_metric, [], self._method_name, self._method_param, nmslib.DataType.VECTOR, nmslib.DistType.FLOAT) for i, x in enumerate(X): nmslib.setData(self._index, i, x.tolist()) nmslib.buildIndex(self._index)
def test_vector(): n = 4500 space_type = "cosinesimil" space_param = [] method_name = "small_world_rand" method_param = ["NN=17", "initIndexAttempts=3", "initSearchAttempts=1", "indexThreadQty=4"] index = nmslib.initIndex( n, space_type, space_param, method_name, method_param, nmslib.DataType.VECTOR, nmslib.DistType.FLOAT ) for pos, data in enumerate(read_data("sample_dataset.txt")): if pos >= n: break # print pos, data nmslib.setData(index, pos, data) print "here" nmslib.buildIndex(index) k = 2 for idx, data in enumerate(read_data("sample_queryset.txt")): print idx, nmslib.knnQuery(index, k, data) nmslib.freeIndex(index)
def test_string(): DATA_STRS = [ "xyz", "beagcfa", "cea", "cb", "d", "c", "bdaf", "ddcd", "egbfa", "a", "fba", "bcccfe", "ab", "bfgbfdc", "bcbbgf", "bfbb", ] QUERY_STRS = ["abc", "def", "ghik"] space_type = "leven" space_param = [] method_name = "small_world_rand" method_param = ["NN=17", "initIndexAttempts=3", "initSearchAttempts=1", "indexThreadQty=4"] index = nmslib.initIndex( len(DATA_STRS), space_type, space_param, method_name, method_param, nmslib.DataType.STRING, nmslib.DistType.INT ) for pos, data in enumerate(DATA_STRS): # print pos, data nmslib.setData(index, pos, data) nmslib.buildIndex(index) k = 2 for idx, data in enumerate(QUERY_STRS): print idx, nmslib.knnQuery(index, k, data) nmslib.freeIndex(index)
def test_save_and_load(data, init_nn=3, init_index=3, init_search=3): import nmslib reload(nmslib) n = data.shape[0] space_type = 'l2' space_param = [] method_name = 'small_world_rand' method_param = ['NN=%d'%init_nn, 'initIndexAttempts=%d'%init_index, 'initSearchAttempts=%d'%init_search, 'indexThreadQty=4', 'graphFileName=savedGraph.txt', 'saveGraphFile=1', 'loadGraphFile=0'] index = nmslib.initIndex(n, space_type, space_param, method_name, method_param, nmslib.DataType.VECTOR, nmslib.DistType.FLOAT) t0 = time.time() for pos, d in enumerate(data): nmslib.setData(index, pos, d.tolist()) nmslib.buildIndex(index) print 'Building %i dataset took %1.4f' % (data.shape[0], time.time()-t0) def query(q, k=10, m=3): return nmslib.knnQuery(index, k, q.tolist()) print 'building score: ' print test_method(query) nmslib.freeIndex(index) method_param = ['NN=%d'%init_nn, 'initIndexAttempts=%d'%init_index, 'initSearchAttempts=%d'%init_search, 'indexThreadQty=1', 'graphFileName=savedGraph.txt', 'saveGraphFile=0', 'loadGraphFile=1'] index2 = nmslib.initIndex(n, space_type, space_param, method_name, method_param, nmslib.DataType.VECTOR, nmslib.DistType.FLOAT) t0 = time.time() for pos, d in enumerate(data): nmslib.setData(index2, pos, d.tolist()) nmslib.buildIndex(index2) print 'Building %i dataset took %1.4f' % (data.shape[0], time.time()-t0) def query2(q, k=10, m=3): return nmslib.knnQuery(index2, k, q.tolist()) print 'loading score: ' print test_method(query2) nmslib.freeIndex(index2)