Пример #1
0
def test_string():
    DATA_STRS = [
        "xyz", "beagcfa", "cea", "cb", "d", "c", "bdaf", "ddcd", "egbfa", "a",
        "fba", "bcccfe", "ab", "bfgbfdc", "bcbbgf", "bfbb"
    ]
    QUERY_STRS = ["abc", "def", "ghik"]
    space_type = 'leven'
    space_param = []
    method_name = 'small_world_rand'
    method_param = [
        'NN=17', 'initIndexAttempts=3', 'initSearchAttempts=1',
        'indexThreadQty=4'
    ]
    index = nmslib.initIndex(len(DATA_STRS), space_type, space_param,
                             method_name, method_param, nmslib.DataType.STRING,
                             nmslib.DistType.INT)
    for pos, data in enumerate(DATA_STRS):
        #print pos, data
        nmslib.setData(index, pos, data)
    nmslib.buildIndex(index)

    k = 2
    for idx, data in enumerate(QUERY_STRS):
        print idx, nmslib.knnQuery(index, k, data)
    nmslib.freeIndex(index)
Пример #2
0
def test_vector():
    n = 4500
    space_type = 'cosinesimil'
    space_param = []
    method_name = 'small_world_rand'
    method_param = [
        'NN=17', 'initIndexAttempts=3', 'initSearchAttempts=1',
        'indexThreadQty=4'
    ]
    index = nmslib.initIndex(n, space_type, space_param, method_name,
                             method_param, nmslib.DataType.VECTOR,
                             nmslib.DistType.FLOAT)

    for pos, data in enumerate(read_data('sample_dataset.txt')):
        if pos >= n:
            break
        #print pos, data
        nmslib.setData(index, pos, data)
    print 'here'
    nmslib.buildIndex(index)

    k = 2
    for idx, data in enumerate(read_data('sample_queryset.txt')):
        print idx, nmslib.knnQuery(index, k, data)

    nmslib.freeIndex(index)
Пример #3
0
    def fit(self, X):
        import nmslib
        if self._method_name == 'vptree':
            # To avoid this issue:
            # terminate called after throwing an instance of 'std::runtime_error'
            # what():  The data size is too small or the bucket size is too big. Select the parameters so that <total # of records> is NOT less than <bucket size> * 1000
            # Aborted (core dumped)
            self._method_param.append('bucketSize=%d' % min(int(X.shape[0] * 0.0005), 1000))
        self._index = nmslib.initIndex(X.shape[0], self._nmslib_metric, [], self._method_name, self._method_param, nmslib.DataType.VECTOR, nmslib.DistType.FLOAT)
	
        for i, x in enumerate(X):
            nmslib.setData(self._index, i, x.tolist())
        nmslib.buildIndex(self._index)
Пример #4
0
    def fit(self, X):
        import nmslib
        if self._method_name == 'vptree':
            # To avoid this issue:
            # terminate called after throwing an instance of 'std::runtime_error'
            # what():  The data size is too small or the bucket size is too big. Select the parameters so that <total # of records> is NOT less than <bucket size> * 1000
            # Aborted (core dumped)
            self._method_param.append('bucketSize=%d' %
                                      min(int(X.shape[0] * 0.0005), 1000))
        self._index = nmslib.initIndex(X.shape[0], self._nmslib_metric, [],
                                       self._method_name, self._method_param,
                                       nmslib.DataType.VECTOR,
                                       nmslib.DistType.FLOAT)

        for i, x in enumerate(X):
            nmslib.setData(self._index, i, x.tolist())
        nmslib.buildIndex(self._index)
Пример #5
0
def test_vector():
    n = 4500
    space_type = "cosinesimil"
    space_param = []
    method_name = "small_world_rand"
    method_param = ["NN=17", "initIndexAttempts=3", "initSearchAttempts=1", "indexThreadQty=4"]
    index = nmslib.initIndex(
        n, space_type, space_param, method_name, method_param, nmslib.DataType.VECTOR, nmslib.DistType.FLOAT
    )

    for pos, data in enumerate(read_data("sample_dataset.txt")):
        if pos >= n:
            break
        # print pos, data
        nmslib.setData(index, pos, data)
    print "here"
    nmslib.buildIndex(index)

    k = 2
    for idx, data in enumerate(read_data("sample_queryset.txt")):
        print idx, nmslib.knnQuery(index, k, data)

    nmslib.freeIndex(index)
Пример #6
0
def test_string():
    DATA_STRS = [
        "xyz",
        "beagcfa",
        "cea",
        "cb",
        "d",
        "c",
        "bdaf",
        "ddcd",
        "egbfa",
        "a",
        "fba",
        "bcccfe",
        "ab",
        "bfgbfdc",
        "bcbbgf",
        "bfbb",
    ]
    QUERY_STRS = ["abc", "def", "ghik"]
    space_type = "leven"
    space_param = []
    method_name = "small_world_rand"
    method_param = ["NN=17", "initIndexAttempts=3", "initSearchAttempts=1", "indexThreadQty=4"]
    index = nmslib.initIndex(
        len(DATA_STRS), space_type, space_param, method_name, method_param, nmslib.DataType.STRING, nmslib.DistType.INT
    )
    for pos, data in enumerate(DATA_STRS):
        # print pos, data
        nmslib.setData(index, pos, data)
    nmslib.buildIndex(index)

    k = 2
    for idx, data in enumerate(QUERY_STRS):
        print idx, nmslib.knnQuery(index, k, data)
    nmslib.freeIndex(index)
Пример #7
0
def test_save_and_load(data, init_nn=3, init_index=3, init_search=3):
    import nmslib
    reload(nmslib)
    n = data.shape[0]
    space_type = 'l2'
    space_param = []
    method_name = 'small_world_rand'
    method_param = ['NN=%d'%init_nn,
                    'initIndexAttempts=%d'%init_index,
                    'initSearchAttempts=%d'%init_search,
                    'indexThreadQty=4',
                    'graphFileName=savedGraph.txt',
                    'saveGraphFile=1',
                    'loadGraphFile=0']
    index = nmslib.initIndex(n,
                             space_type,
                             space_param,
                             method_name,
                             method_param,
                             nmslib.DataType.VECTOR,
                             nmslib.DistType.FLOAT)
    t0 = time.time()
    for pos, d in enumerate(data):
        nmslib.setData(index, pos, d.tolist())

    nmslib.buildIndex(index)
    print 'Building %i dataset took %1.4f' % (data.shape[0], time.time()-t0)

    def query(q, k=10, m=3):
        return nmslib.knnQuery(index, k, q.tolist())

    print 'building score: '
    print test_method(query)

    nmslib.freeIndex(index)

    method_param = ['NN=%d'%init_nn,
                    'initIndexAttempts=%d'%init_index,
                    'initSearchAttempts=%d'%init_search,
                    'indexThreadQty=1',
                    'graphFileName=savedGraph.txt',
                    'saveGraphFile=0',
                    'loadGraphFile=1']
    index2 = nmslib.initIndex(n,
                             space_type,
                             space_param,
                             method_name,
                             method_param,
                             nmslib.DataType.VECTOR,
                             nmslib.DistType.FLOAT)
    t0 = time.time()
    for pos, d in enumerate(data):
        nmslib.setData(index2, pos, d.tolist())

    nmslib.buildIndex(index2)
    print 'Building %i dataset took %1.4f' % (data.shape[0], time.time()-t0)

    def query2(q, k=10, m=3):
        return nmslib.knnQuery(index2, k, q.tolist())

    print 'loading score: '
    print test_method(query2)
    nmslib.freeIndex(index2)