Пример #1
1
def test_string_loaded():
    DATA_STRS = ["xyz", "beagcfa", "cea", "cb",
                 "d", "c", "bdaf", "ddcd",
                 "egbfa", "a", "fba", "bcccfe",
                 "ab", "bfgbfdc", "bcbbgf", "bfbb"
                 ]
    QUERY_STRS = ["abc", "def", "ghik"]
    space_type = 'leven'
    space_param = []
    method_name = 'small_world_rand'
    index_name  = method_name + '.index'

    index = nmslib.init(
                             space_type,
                             space_param,
                             method_name,
                             nmslib.DataType.OBJECT_AS_STRING,
                             nmslib.DistType.INT)

    for id, data in enumerate(DATA_STRS):
        nmslib.addDataPoint(index, id, data)

    print 'Let\'s print a few data entries'
    print 'We have added %d data points' % nmslib.getDataPointQty(index)

    for i in range(0,min(MAX_PRINT_QTY,nmslib.getDataPointQty(index))):
        print nmslib.getDataPoint(index,i)

    print 'Let\'s invoke the index-build process'

    index_param = ['NN=17', 'initIndexAttempts=3', 'indexThreadQty=4']
    query_time_param = ['initSearchAttempts=3']


    nmslib.loadIndex(index, index_name)

    print "The index %s is loaded" % index_name

    nmslib.setQueryTimeParams(index, query_time_param)

    print 'Query time parameters are set'

    print "Results for the loaded index:"

    k = 2
    for idx, data in enumerate(QUERY_STRS):
        print idx, nmslib.knnQuery(index, k, data)

    nmslib.freeIndex(index)
Пример #2
0
def test_string():
    DATA_STRS = [
        "xyz", "beagcfa", "cea", "cb", "d", "c", "bdaf", "ddcd", "egbfa", "a",
        "fba", "bcccfe", "ab", "bfgbfdc", "bcbbgf", "bfbb"
    ]
    QUERY_STRS = ["abc", "def", "ghik"]
    space_type = 'leven'
    space_param = []
    method_name = 'small_world_rand'
    method_param = [
        'NN=17', 'initIndexAttempts=3', 'initSearchAttempts=1',
        'indexThreadQty=4'
    ]
    index = nmslib.initIndex(len(DATA_STRS), space_type, space_param,
                             method_name, method_param, nmslib.DataType.STRING,
                             nmslib.DistType.INT)
    for pos, data in enumerate(DATA_STRS):
        #print pos, data
        nmslib.setData(index, pos, data)
    nmslib.buildIndex(index)

    k = 2
    for idx, data in enumerate(QUERY_STRS):
        print idx, nmslib.knnQuery(index, k, data)
    nmslib.freeIndex(index)
Пример #3
0
def test_vector():
    n = 4500
    space_type = 'cosinesimil'
    space_param = []
    method_name = 'small_world_rand'
    method_param = [
        'NN=17', 'initIndexAttempts=3', 'initSearchAttempts=1',
        'indexThreadQty=4'
    ]
    index = nmslib.initIndex(n, space_type, space_param, method_name,
                             method_param, nmslib.DataType.VECTOR,
                             nmslib.DistType.FLOAT)

    for pos, data in enumerate(read_data('sample_dataset.txt')):
        if pos >= n:
            break
        #print pos, data
        nmslib.setData(index, pos, data)
    print 'here'
    nmslib.buildIndex(index)

    k = 2
    for idx, data in enumerate(read_data('sample_queryset.txt')):
        print idx, nmslib.knnQuery(index, k, data)

    nmslib.freeIndex(index)
Пример #4
0
def test_string_fresh(batch=True):
    DATA_STRS = ["xyz", "beagcfa", "cea", "cb",
                 "d", "c", "bdaf", "ddcd",
                 "egbfa", "a", "fba", "bcccfe",
                 "ab", "bfgbfdc", "bcbbgf", "bfbb"
                 ]
    QUERY_STRS = ["abc", "def", "ghik"]
    space_type = 'leven'
    space_param = []
    method_name = 'small_world_rand'
    index_name  = method_name + '.index'

    index = nmslib.init(
                             space_type,
                             space_param,
                             method_name,
                             nmslib.DataType.OBJECT_AS_STRING,
                             nmslib.DistType.INT)

    if batch:
        print 'DATA_STRS', DATA_STRS
        positions = nmslib.addDataPointBatch(index, np.arange(len(DATA_STRS), dtype=np.int32), DATA_STRS)
    else:
        for id, data in enumerate(DATA_STRS):
            nmslib.addDataPoint(index, id, data)

    print 'Let\'s print a few data entries'
    print 'We have added %d data points' % nmslib.getDataPointQty(index)

    for i in range(0,min(MAX_PRINT_QTY,nmslib.getDataPointQty(index))):
        print nmslib.getDataPoint(index,i)

    print 'Let\'s invoke the index-build process'

    index_param = ['NN=17', 'initIndexAttempts=3', 'indexThreadQty=4']
    query_time_param = ['initSearchAttempts=3']

    nmslib.createIndex(index, index_param)
    nmslib.setQueryTimeParams(index, query_time_param)

    print 'Query time parameters are set'

    print "Results for the freshly created index:"

    k = 2
    if batch:
        num_threads = 10
        res = nmslib.knnQueryBatch(index, num_threads, k, QUERY_STRS)
    for idx, data in enumerate(QUERY_STRS):
        res = nmslib.knnQuery(index, k, data)
        print idx, data, res, [DATA_STRS[i] for i in res]

    nmslib.saveIndex(index, index_name)

    print "The index %s is saved" % index_name

    nmslib.freeIndex(index)
Пример #5
0
def test_string_fresh(batch=True):
    DATA_STRS = [
        "xyz", "beagcfa", "cea", "cb", "d", "c", "bdaf", "ddcd", "egbfa", "a",
        "fba", "bcccfe", "ab", "bfgbfdc", "bcbbgf", "bfbb"
    ]
    QUERY_STRS = ["abc", "def", "ghik"]
    space_type = 'leven'
    space_param = []
    method_name = 'small_world_rand'
    index_name = method_name + '.index'

    index = nmslib.init(space_type, space_param, method_name,
                        nmslib.DataType.OBJECT_AS_STRING, nmslib.DistType.INT)

    if batch:
        print 'DATA_STRS', DATA_STRS
        positions = nmslib.addDataPointBatch(
            index, np.arange(len(DATA_STRS), dtype=np.int32), DATA_STRS)
    else:
        for id, data in enumerate(DATA_STRS):
            nmslib.addDataPoint(index, id, data)

    print 'Let\'s print a few data entries'
    print 'We have added %d data points' % nmslib.getDataPointQty(index)

    for i in range(0, min(MAX_PRINT_QTY, nmslib.getDataPointQty(index))):
        print nmslib.getDataPoint(index, i)

    print 'Let\'s invoke the index-build process'

    index_param = ['NN=17', 'initIndexAttempts=3', 'indexThreadQty=4']
    query_time_param = ['initSearchAttempts=3']

    nmslib.createIndex(index, index_param)
    nmslib.setQueryTimeParams(index, query_time_param)

    print 'Query time parameters are set'

    print "Results for the freshly created index:"

    k = 2
    if batch:
        num_threads = 10
        res = nmslib.knnQueryBatch(index, num_threads, k, QUERY_STRS)
    for idx, data in enumerate(QUERY_STRS):
        res = nmslib.knnQuery(index, k, data)
        print idx, data, res, [DATA_STRS[i] for i in res]

    nmslib.saveIndex(index, index_name)

    print "The index %s is saved" % index_name

    nmslib.freeIndex(index)
Пример #6
0
def test_object_as_string_fresh(batch=True):
    space_type = 'cosinesimil'
    space_param = []
    method_name = 'small_world_rand'
    index_name  = method_name + '.index'
    if os.path.isfile(index_name):
        os.remove(index_name)
    index = nmslib.init(
                             space_type,
                             space_param,
                             method_name,
                             nmslib.DataType.OBJECT_AS_STRING,
                             nmslib.DistType.FLOAT)

    if batch:
        data = [s for s in read_data_as_string('sample_dataset.txt')]
        positions = nmslib.addDataPointBatch(index, np.arange(len(data), dtype=np.int32), data)
    else:
        for id, data in enumerate(read_data_as_string('sample_dataset.txt')):
            nmslib.addDataPoint(index, id, data)

    print('Let\'s print a few data entries')
    print('We have added %d data points' % nmslib.getDataPointQty(index))

    for i in range(0,min(MAX_PRINT_QTY,nmslib.getDataPointQty(index))):
       print(nmslib.getDataPoint(index, i))

    print('Let\'s invoke the index-build process')

    index_param = ['NN=17', 'efConstruction=50', 'indexThreadQty=4']
    query_time_param = ['efSearch=50']

    nmslib.createIndex(index, index_param)

    print('The index is created')

    nmslib.setQueryTimeParams(index,query_time_param)

    print('Query time parameters are set')

    print("Results for the freshly created index:")

    k = 3

    for idx, data in enumerate(read_data_as_string('sample_queryset.txt')):
        print(idx, nmslib.knnQuery(index, k, data))

    nmslib.saveIndex(index, index_name)

    print("The index %s is saved" % index_name)

    nmslib.freeIndex(index)
Пример #7
0
def test_object_as_string_fresh(batch=True):
    space_type = 'cosinesimil'
    space_param = []
    method_name = 'small_world_rand'
    index_name  = method_name + '.index'
    if os.path.isfile(index_name):
        os.remove(index_name)
    index = nmslib.init(
                             space_type,
                             space_param,
                             method_name,
                             nmslib.DataType.OBJECT_AS_STRING,
                             nmslib.DistType.FLOAT)

    if batch:
        data = [s for s in read_data_as_string('sample_dataset.txt')]
        positions = nmslib.addDataPointBatch(index, np.arange(len(data), dtype=np.int32), data)
    else:
        for id, data in enumerate(read_data_as_string('sample_dataset.txt')):
            nmslib.addDataPoint(index, id, data)

    print 'Let\'s print a few data entries'
    print 'We have added %d data points' % nmslib.getDataPointQty(index)

    for i in range(0,min(MAX_PRINT_QTY,nmslib.getDataPointQty(index))):
       print nmslib.getDataPoint(index, i)

    print 'Let\'s invoke the index-build process'

    index_param = ['NN=17', 'initIndexAttempts=3', 'indexThreadQty=4']
    query_time_param = ['initSearchAttempts=3']

    nmslib.createIndex(index, index_param)

    print 'The index is created'

    nmslib.setQueryTimeParams(index,query_time_param)

    print 'Query time parameters are set'

    print "Results for the freshly created index:"

    k = 3

    for idx, data in enumerate(read_data_as_string('sample_queryset.txt')):
        print idx, nmslib.knnQuery(index, k, data)

    nmslib.saveIndex(index, index_name)

    print "The index %s is saved" % index_name

    nmslib.freeIndex(index)
Пример #8
0
def test_string_loaded():
    DATA_STRS = ["xyz", "beagcfa", "cea", "cb",
                 "d", "c", "bdaf", "ddcd",
                 "egbfa", "a", "fba", "bcccfe",
                 "ab", "bfgbfdc", "bcbbgf", "bfbb"
                 ]
    QUERY_STRS = ["abc", "def", "ghik"]
    space_type = 'leven'
    space_param = []
    method_name = 'small_world_rand'
    index_name  = method_name + '.index'

    index = nmslib.init(
                             space_type,
                             space_param,
                             method_name,
                             nmslib.DataType.OBJECT_AS_STRING,
                             nmslib.DistType.INT)

    for id, data in enumerate(DATA_STRS):
        nmslib.addDataPoint(index, id, data)

    print('Let\'s print a few data entries')
    print('We have added %d data points' % nmslib.getDataPointQty(index))

    for i in range(0,min(MAX_PRINT_QTY,nmslib.getDataPointQty(index))):
        print(nmslib.getDataPoint(index,i))

    print('Let\'s invoke the index-build process')

    index_param = ['NN=17', 'efConstruction=50', 'indexThreadQty=4']
    query_time_param = ['efSearch=50']


    nmslib.loadIndex(index, index_name)

    print("The index %s is loaded" % index_name)

    nmslib.setQueryTimeParams(index, query_time_param)

    print('Query time parameters are set')

    print("Results for the loaded index:")

    k = 2
    for idx, data in enumerate(QUERY_STRS):
        print(idx, nmslib.knnQuery(index, k, data))

    nmslib.freeIndex(index)
Пример #9
0
def test_sparse_vector_fresh():
    space_type = 'cosinesimil_sparse'
    space_param = []
    method_name = 'small_world_rand'
    index_name  = method_name + '_sparse.index'
    if os.path.isfile(index_name):
        os.remove(index_name)
    index = nmslib.init(
                             space_type,
                             space_param,
                             method_name,
                             nmslib.DataType.SPARSE_VECTOR,
                             nmslib.DistType.FLOAT)

    for id, data in enumerate(read_sparse_data('sample_sparse_dataset.txt')):
        nmslib.addDataPoint(index, id, data)

    print 'We have added %d data points' % nmslib.getDataPointQty(index)

    for i in range(0,min(MAX_PRINT_QTY,nmslib.getDataPointQty(index))):
       print nmslib.getDataPoint(index,i)

    print 'Let\'s invoke the index-build process'

    index_param = ['NN=17', 'initIndexAttempts=3', 'indexThreadQty=4']
    query_time_param = ['initSearchAttempts=3']

    nmslib.createIndex(index, index_param)

    print 'The index is created'

    nmslib.setQueryTimeParams(index,query_time_param)

    print 'Query time parameters are set'

    print "Results for the freshly created index:"

    k = 3

    for idx, data in enumerate(read_sparse_data('sample_sparse_queryset.txt')):
        print idx, nmslib.knnQuery(index, k, data)

    nmslib.saveIndex(index, index_name)

    print "The index %s is saved" % index_name

    nmslib.freeIndex(index)
Пример #10
0
def test_sparse_vector_fresh():
    space_type = 'cosinesimil_sparse_fast'
    space_param = []
    method_name = 'small_world_rand'
    index_name  = method_name + '_sparse.index'
    if os.path.isfile(index_name):
        os.remove(index_name)
    index = nmslib.init(
                             space_type,
                             space_param,
                             method_name,
                             nmslib.DataType.SPARSE_VECTOR,
                             nmslib.DistType.FLOAT)

    for id, data in enumerate(read_sparse_data('sample_sparse_dataset.txt')):
        nmslib.addDataPoint(index, id, data)

    print('We have added %d data points' % nmslib.getDataPointQty(index))

    for i in range(0,min(MAX_PRINT_QTY,nmslib.getDataPointQty(index))):
       print(nmslib.getDataPoint(index,i))

    print('Let\'s invoke the index-build process')

    index_param = ['NN=17', 'efConstruction=50', 'indexThreadQty=4']
    query_time_param = ['efSearch=50']

    nmslib.createIndex(index, index_param)

    print('The index is created')

    nmslib.setQueryTimeParams(index,query_time_param)

    print('Query time parameters are set')

    print("Results for the freshly created index:")

    k = 3

    for idx, data in enumerate(read_sparse_data('sample_sparse_queryset.txt')):
        print(idx, nmslib.knnQuery(index, k, data))

    nmslib.saveIndex(index, index_name)

    print("The index %s is saved" % index_name)

    nmslib.freeIndex(index)
Пример #11
0
def test_vector_loaded():
    space_type = 'cosinesimil'
    space_param = []
    method_name = 'small_world_rand'
    index_name  = method_name + '.index'
    index = nmslib.init(
                             space_type,
                             space_param,
                             method_name,
                             nmslib.DataType.DENSE_VECTOR,
                             nmslib.DistType.FLOAT)

    for id, data in enumerate(read_data('sample_dataset.txt')):
        pos = nmslib.addDataPoint(index, id, data)
	if id != pos:
            print 'id %s != pos %s' % (id, pos)
	    sys.exit(1)

    print 'Let\'s print a few data entries'
    print 'We have added %d data points' % nmslib.getDataPointQty(index)

    for i in range(0,min(MAX_PRINT_QTY,nmslib.getDataPointQty(index))):
       print nmslib.getDataPoint(index,i)

    print 'Let\'s invoke the index-build process'


    query_time_param = ['initSearchAttempts=3']

    nmslib.loadIndex(index, index_name)

    print "The index %s is loaded" % index_name

    nmslib.setQueryTimeParams(index,query_time_param)

    print 'Query time parameters are set'

    print "Results for the loaded index"

    k = 2
    for idx, data in enumerate(read_data('sample_queryset.txt')):
        print idx, nmslib.knnQuery(index, k, data)

    nmslib.freeIndex(index)
Пример #12
0
def test_vector_loaded():
    space_type = 'cosinesimil'
    space_param = []
    method_name = 'small_world_rand'
    index_name  = method_name + '.index'
    index = nmslib.init(
                             space_type,
                             space_param,
                             method_name,
                             nmslib.DataType.DENSE_VECTOR,
                             nmslib.DistType.FLOAT)

    for id, data in enumerate(read_data('sample_dataset.txt')):
        pos = nmslib.addDataPoint(index, id, data)
        if id != pos:
            print('id %s != pos %s' % (id, pos))
            sys.exit(1)

    print('Let\'s print a few data entries')
    print('We have added %d data points' % nmslib.getDataPointQty(index))

    for i in range(0,min(MAX_PRINT_QTY,nmslib.getDataPointQty(index))):
       print(nmslib.getDataPoint(index,i))

    print('Let\'s invoke the index-build process')


    query_time_param = ['efSearch=50']

    nmslib.loadIndex(index, index_name)

    print("The index %s is loaded" % index_name)

    nmslib.setQueryTimeParams(index,query_time_param)

    print('Query time parameters are set')

    print("Results for the loaded index")

    k = 2
    for idx, data in enumerate(read_data('sample_queryset.txt')):
        print(idx, nmslib.knnQuery(index, k, data))

    nmslib.freeIndex(index)
Пример #13
0
def test_vector():
    n = 4500
    space_type = "cosinesimil"
    space_param = []
    method_name = "small_world_rand"
    method_param = ["NN=17", "initIndexAttempts=3", "initSearchAttempts=1", "indexThreadQty=4"]
    index = nmslib.initIndex(
        n, space_type, space_param, method_name, method_param, nmslib.DataType.VECTOR, nmslib.DistType.FLOAT
    )

    for pos, data in enumerate(read_data("sample_dataset.txt")):
        if pos >= n:
            break
        # print pos, data
        nmslib.setData(index, pos, data)
    print "here"
    nmslib.buildIndex(index)

    k = 2
    for idx, data in enumerate(read_data("sample_queryset.txt")):
        print idx, nmslib.knnQuery(index, k, data)

    nmslib.freeIndex(index)
Пример #14
0
def test_string():
    DATA_STRS = [
        "xyz",
        "beagcfa",
        "cea",
        "cb",
        "d",
        "c",
        "bdaf",
        "ddcd",
        "egbfa",
        "a",
        "fba",
        "bcccfe",
        "ab",
        "bfgbfdc",
        "bcbbgf",
        "bfbb",
    ]
    QUERY_STRS = ["abc", "def", "ghik"]
    space_type = "leven"
    space_param = []
    method_name = "small_world_rand"
    method_param = ["NN=17", "initIndexAttempts=3", "initSearchAttempts=1", "indexThreadQty=4"]
    index = nmslib.initIndex(
        len(DATA_STRS), space_type, space_param, method_name, method_param, nmslib.DataType.STRING, nmslib.DistType.INT
    )
    for pos, data in enumerate(DATA_STRS):
        # print pos, data
        nmslib.setData(index, pos, data)
    nmslib.buildIndex(index)

    k = 2
    for idx, data in enumerate(QUERY_STRS):
        print idx, nmslib.knnQuery(index, k, data)
    nmslib.freeIndex(index)
Пример #15
0
def test_vector_load(fast=True, fast_batch=True, seq=True):
    space_type = 'cosinesimil'
    space_param = []
    method_name = 'small_world_rand'
    index_name  = method_name + '.index'
    if os.path.isfile(index_name):
        os.remove(index_name)
    f = '/tmp/foo.txt'
    if not os.path.isfile(f):
        print('creating %s' % f)
        np.savetxt(f, np.random.rand(100000,1000), delimiter="\t")
        print('done')

    if fast:
        index = nmslib.init(
                             space_type,
                             space_param,
                             method_name,
                             nmslib.DataType.DENSE_VECTOR,
                             nmslib.DistType.FLOAT)
        with TimeIt('fast add data point'):
            data = read_data_fast(f)
            nmslib.addDataPointBatch(index, np.arange(len(data), dtype=np.int32), data)
        nmslib.freeIndex(index)

    if fast_batch:
        index = nmslib.init(
                             space_type,
                             space_param,
                             method_name,
                             nmslib.DataType.DENSE_VECTOR,
                             nmslib.DistType.FLOAT)
        with TimeIt('fast_batch add data point'):
            offset = 0
            for data in read_data_fast_batch(f, 10000):
                nmslib.addDataPointBatch(index, np.arange(len(data), dtype=np.int32) + offset, data)
                offset += data.shape[0]
        print('offset', offset)
        nmslib.freeIndex(index)

    if seq:
        index = nmslib.init(
                             space_type,
                             space_param,
                             method_name,
                             nmslib.DataType.DENSE_VECTOR,
                             nmslib.DistType.FLOAT)
        with TimeIt('seq add data point'):
            for id, data in enumerate(read_data(f)):
                nmslib.addDataPoint(index, id, data)
        nmslib.freeIndex(index)
Пример #16
0
def test_vector_load(fast=True, fast_batch=True, seq=True):
    space_type = 'cosinesimil'
    space_param = []
    method_name = 'small_world_rand'
    index_name  = method_name + '.index'
    if os.path.isfile(index_name):
        os.remove(index_name)
    f = '/tmp/foo.txt'
    if not os.path.isfile(f):
        print 'creating %s' % f
        np.savetxt(f, np.random.rand(100000,1000), delimiter="\t")
        print 'done'

    if fast:
        index = nmslib.init(
                             space_type,
                             space_param,
                             method_name,
                             nmslib.DataType.DENSE_VECTOR,
                             nmslib.DistType.FLOAT)
        with TimeIt('fast add data point'):
            data = read_data_fast(f)
            nmslib.addDataPointBatch(index, np.arange(len(data), dtype=np.int32), data)
        nmslib.freeIndex(index)

    if fast_batch:
        index = nmslib.init(
                             space_type,
                             space_param,
                             method_name,
                             nmslib.DataType.DENSE_VECTOR,
                             nmslib.DistType.FLOAT)
        with TimeIt('fast_batch add data point'):
            offset = 0
            for data in read_data_fast_batch(f, 10000):
                nmslib.addDataPointBatch(index, np.arange(len(data), dtype=np.int32) + offset, data)
                offset += data.shape[0]
        print 'offset', offset
        nmslib.freeIndex(index)

    if seq:
        index = nmslib.init(
                             space_type,
                             space_param,
                             method_name,
                             nmslib.DataType.DENSE_VECTOR,
                             nmslib.DistType.FLOAT)
        with TimeIt('seq add data point'):
            for id, data in enumerate(read_data(f)):
                nmslib.addDataPoint(index, id, data)
        nmslib.freeIndex(index)
Пример #17
0
def test_vector_fresh(fast=True):
    space_type = 'cosinesimil'
    space_param = []
    method_name = 'small_world_rand'
    index_name  = method_name + '.index'
    if os.path.isfile(index_name):
        os.remove(index_name)
    index = nmslib.init(
                             space_type,
                             space_param,
                             method_name,
                             nmslib.DataType.DENSE_VECTOR,
                             nmslib.DistType.FLOAT)

    start = time.time()
    if fast:
        data = read_data_fast('sample_dataset.txt')
        print 'data.shape', data.shape
        positions = nmslib.addDataPointBatch(index, np.arange(len(data), dtype=np.int32), data)
    else:
        for id, data in enumerate(read_data('sample_dataset.txt')):
            pos = nmslib.addDataPoint(index, id, data)
	    if id != pos:
                print 'id %s != pos %s' % (id, pos)
		sys.exit(1)
    end = time.time()
    print 'added data in %s secs' % (end - start)

    print 'Let\'s print a few data entries'
    print 'We have added %d data points' % nmslib.getDataPointQty(index)

    for i in range(0,min(MAX_PRINT_QTY,nmslib.getDataPointQty(index))):
       print nmslib.getDataPoint(index, i)

    print 'Let\'s invoke the index-build process'

    index_param = ['NN=17', 'initIndexAttempts=3', 'indexThreadQty=4']
    query_time_param = ['initSearchAttempts=3']

    nmslib.createIndex(index, index_param)

    print 'The index is created'

    nmslib.setQueryTimeParams(index,query_time_param)

    print 'Query time parameters are set'

    print "Results for the freshly created index:"

    k = 3

    start = time.time()
    if fast:
        num_threads = 10
        query = read_data_fast('sample_queryset.txt')
        res = nmslib.knnQueryBatch(index, num_threads, k, query)
        for idx, v in enumerate(res):
            print idx, v
    else:
        for idx, data in enumerate(read_data('sample_queryset.txt')):
            print idx, nmslib.knnQuery(index, k, data)
    end = time.time()
    print 'querying done in %s secs' % (end - start)

    nmslib.saveIndex(index, index_name)

    print "The index %s is saved" % index_name

    nmslib.freeIndex(index)
Пример #18
0
 def freeIndex(self):
     nmslib.freeIndex(self._index)
Пример #19
0
 def clr_mem(self):
     nmslib.freeIndex(self.index)
     self.created = False
Пример #20
0
def test_vector_fresh(fast=True):
    space_type = 'cosinesimil'
    space_param = []
    method_name = 'small_world_rand'
    index_name = method_name + '.index'
    if os.path.isfile(index_name):
        os.remove(index_name)
    index = nmslib.init(space_type, space_param, method_name,
                        nmslib.DataType.DENSE_VECTOR, nmslib.DistType.FLOAT)

    start = time.time()
    if fast:
        data = read_data_fast('sample_dataset.txt')
        print('data.shape', data.shape)
        positions = nmslib.addDataPointBatch(
            index, np.arange(len(data), dtype=np.int32), data)
    else:
        for id, data in enumerate(read_data('sample_dataset.txt')):
            pos = nmslib.addDataPoint(index, id, data)
            if id != pos:
                print('id %s != pos %s' % (id, pos))
                sys.exit(1)
    end = time.time()
    print('added data in %s secs' % (end - start))

    print('Let\'s print a few data entries')
    print('We have added %d data points' % nmslib.getDataPointQty(index))

    print("Distance between points (0,0) " +
          str(nmslib.getDistance(index, 0, 0)))
    print("Distance between points (1,1) " +
          str(nmslib.getDistance(index, 1, 1)))
    print("Distance between points (0,1) " +
          str(nmslib.getDistance(index, 0, 1)))
    print("Distance between points (1,0) " +
          str(nmslib.getDistance(index, 1, 0)))

    for i in range(0, min(MAX_PRINT_QTY, nmslib.getDataPointQty(index))):
        print(nmslib.getDataPoint(index, i))

    print('Let\'s invoke the index-build process')

    index_param = ['NN=17', 'initIndexAttempts=3', 'indexThreadQty=4']
    query_time_param = ['initSearchAttempts=3']

    nmslib.createIndex(index, index_param)

    print('The index is created')

    nmslib.setQueryTimeParams(index, query_time_param)

    print('Query time parameters are set')

    print("Results for the freshly created index:")

    k = 3

    start = time.time()
    if fast:
        num_threads = 10
        query = read_data_fast('sample_queryset.txt')
        res = nmslib.knnQueryBatch(index, num_threads, k, query)
        for idx, v in enumerate(res):
            print(idx, v)
    else:
        for idx, data in enumerate(read_data('sample_queryset.txt')):
            print(idx, nmslib.knnQuery(index, k, data))
    end = time.time()
    print('querying done in %s secs' % (end - start))

    nmslib.saveIndex(index, index_name)

    print("The index %s is saved" % index_name)

    nmslib.freeIndex(index)
Пример #21
0
def bench_sparse_vector(batch=True):
    dim = 20000
    dataset = np.random.binomial(1, 0.01, size=(40000, dim))
    queryset = np.random.binomial(1, 0.009, size=(1000, dim))

    print 'dataset[0]:', [[i, v] for i, v in enumerate(dataset[0]) if v > 0]

    k = 3

    q0 = queryset[0]
    res = []
    for i in range(dataset.shape[0]):
        res.append([i, distance.cosine(q0, dataset[i,:])])
    res.sort(key=lambda x: x[1])
    print 'q0 res', res[:k]

    data_matrix = csr_matrix(dataset, dtype=np.float32)
    query_matrix = csr_matrix(queryset, dtype=np.float32)

    data_to_return = range(dataset.shape[0])
    with TimeIt('building MultiClusterIndex'):
        cp = snn.MultiClusterIndex(data_matrix, data_to_return)

    with TimeIt('knn search'):
        res = cp.search(query_matrix, k=k, return_distance=False)

    print res[:5]
    for i in res[0]:
        print int(i), distance.cosine(q0, dataset[int(i),:])

    #space_type = 'cosinesimil_sparse'
    space_type = 'cosinesimil_sparse_fast'
    space_param = []
    method_name = 'small_world_rand'
    index_name  = method_name + '_sparse.index'
    if os.path.isfile(index_name):
        os.remove(index_name)
    index = nmslib.init(space_type,
                        space_param,
                        method_name,
                        nmslib.DataType.SPARSE_VECTOR,
                        nmslib.DistType.FLOAT)

    if batch:
        with TimeIt('batch add'):
            positions = nmslib.addDataPointBatch(index, np.arange(len(dataset), dtype=np.int32), data_matrix)
        print 'positions', positions
    else:
        d = []
        q = []
        with TimeIt('preparing'):
            for data in dataset:
                d.append([[i, v] for i, v in enumerate(data) if v > 0])
            for data in queryset:
                q.append([[i, v] for i, v in enumerate(data) if v > 0])
        with TimeIt('adding points'):
            for id, data in enumerate(d):
                nmslib.addDataPoint(index, id, data)

    print 'Let\'s invoke the index-build process'

    index_param = ['NN=17', 'initIndexAttempts=3', 'indexThreadQty=4']
    query_time_param = ['initSearchAttempts=3']

    with TimeIt('building index'):
        nmslib.createIndex(index, index_param)

    print 'The index is created'

    nmslib.setQueryTimeParams(index,query_time_param)

    print 'Query time parameters are set'

    print "Results for the freshly created index:"

    with TimeIt('knn query'):
        if batch:
            num_threads = 10
            res = nmslib.knnQueryBatch(index, num_threads, k, query_matrix)
            for idx, v in enumerate(res):
                if idx < 5:
                    print idx, v
                if idx == 0:
                    for i in v:
                        print 'q0', i, distance.cosine(q0, dataset[i,:])
        else:
            for idx, data in enumerate(q):
                res = nmslib.knnQuery(index, k, data)
                if idx < 5:
                    print idx, res

    nmslib.saveIndex(index, index_name)

    print "The index %s is saved" % index_name

    nmslib.freeIndex(index)
Пример #22
0
def bench_sparse_vector(batch=True):
    # delay importing these so CI can import module
    from scipy.sparse import csr_matrix
    from scipy.spatial import distance
    from pysparnn.cluster_index import MultiClusterIndex

    dim = 20000
    dataset = np.random.binomial(1, 0.01, size=(40000, dim))
    queryset = np.random.binomial(1, 0.009, size=(1000, dim))

    print('dataset[0]:', [[i, v] for i, v in enumerate(dataset[0]) if v > 0])

    k = 3

    q0 = queryset[0]
    res = []
    for i in range(dataset.shape[0]):
        res.append([i, distance.cosine(q0, dataset[i, :])])
    res.sort(key=lambda x: x[1])
    print('q0 res', res[:k])

    data_matrix = csr_matrix(dataset, dtype=np.float32)
    query_matrix = csr_matrix(queryset, dtype=np.float32)

    data_to_return = range(dataset.shape[0])

    with TimeIt('building MultiClusterIndex'):
        cp = MultiClusterIndex(data_matrix, data_to_return)

    with TimeIt('knn search'):
        res = cp.search(query_matrix, k=k, return_distance=False)

    print(res[:5])
    for i in res[0]:
        print(int(i), distance.cosine(q0, dataset[int(i), :]))

    #space_type = 'cosinesimil_sparse'
    space_type = 'cosinesimil_sparse_fast'
    space_param = []
    method_name = 'small_world_rand'
    index_name = method_name + '_sparse.index'
    if os.path.isfile(index_name):
        os.remove(index_name)
    index = nmslib.init(space_type, space_param, method_name,
                        nmslib.DataType.SPARSE_VECTOR, nmslib.DistType.FLOAT)

    if batch:
        with TimeIt('batch add'):
            positions = nmslib.addDataPointBatch(
                index, np.arange(len(dataset), dtype=np.int32), data_matrix)
        print('positions', positions)
    else:
        d = []
        q = []
        with TimeIt('preparing'):
            for data in dataset:
                d.append([[i, v] for i, v in enumerate(data) if v > 0])
            for data in queryset:
                q.append([[i, v] for i, v in enumerate(data) if v > 0])
        with TimeIt('adding points'):
            for id, data in enumerate(d):
                nmslib.addDataPoint(index, id, data)

    print('Let\'s invoke the index-build process')

    index_param = ['NN=17', 'efConstruction=50', 'indexThreadQty=4']
    query_time_param = ['efSearch=50']

    with TimeIt('building index'):
        nmslib.createIndex(index, index_param)

    print('The index is created')

    nmslib.setQueryTimeParams(index, query_time_param)

    print('Query time parameters are set')

    print("Results for the freshly created index:")

    with TimeIt('knn query'):
        if batch:
            num_threads = 10
            res = nmslib.knnQueryBatch(index, num_threads, k, query_matrix)
            for idx, v in enumerate(res):
                if idx < 5:
                    print(idx, v)
                if idx == 0:
                    for i in v:
                        print('q0', i, distance.cosine(q0, dataset[i, :]))
        else:
            for idx, data in enumerate(q):
                res = nmslib.knnQuery(index, k, data)
                if idx < 5:
                    print(idx, res)

    nmslib.saveIndex(index, index_name)

    print("The index %s is saved" % index_name)

    nmslib.freeIndex(index)
Пример #23
0
def test_save_and_load(data, init_nn=3, init_index=3, init_search=3):
    import nmslib
    reload(nmslib)
    n = data.shape[0]
    space_type = 'l2'
    space_param = []
    method_name = 'small_world_rand'
    method_param = ['NN=%d'%init_nn,
                    'initIndexAttempts=%d'%init_index,
                    'initSearchAttempts=%d'%init_search,
                    'indexThreadQty=4',
                    'graphFileName=savedGraph.txt',
                    'saveGraphFile=1',
                    'loadGraphFile=0']
    index = nmslib.initIndex(n,
                             space_type,
                             space_param,
                             method_name,
                             method_param,
                             nmslib.DataType.VECTOR,
                             nmslib.DistType.FLOAT)
    t0 = time.time()
    for pos, d in enumerate(data):
        nmslib.setData(index, pos, d.tolist())

    nmslib.buildIndex(index)
    print 'Building %i dataset took %1.4f' % (data.shape[0], time.time()-t0)

    def query(q, k=10, m=3):
        return nmslib.knnQuery(index, k, q.tolist())

    print 'building score: '
    print test_method(query)

    nmslib.freeIndex(index)

    method_param = ['NN=%d'%init_nn,
                    'initIndexAttempts=%d'%init_index,
                    'initSearchAttempts=%d'%init_search,
                    'indexThreadQty=1',
                    'graphFileName=savedGraph.txt',
                    'saveGraphFile=0',
                    'loadGraphFile=1']
    index2 = nmslib.initIndex(n,
                             space_type,
                             space_param,
                             method_name,
                             method_param,
                             nmslib.DataType.VECTOR,
                             nmslib.DistType.FLOAT)
    t0 = time.time()
    for pos, d in enumerate(data):
        nmslib.setData(index2, pos, d.tolist())

    nmslib.buildIndex(index2)
    print 'Building %i dataset took %1.4f' % (data.shape[0], time.time()-t0)

    def query2(q, k=10, m=3):
        return nmslib.knnQuery(index2, k, q.tolist())

    print 'loading score: '
    print test_method(query2)
    nmslib.freeIndex(index2)
Пример #24
0
 def freeIndex(self):
     nmslib.freeIndex(self._index)
Пример #25
0
 def freeIndex(self):
     import nmslib
     nmslib.freeIndex(self._index)
Пример #26
0
 def freeIndex(self):
     import nmslib
     nmslib.freeIndex(self._index)