Exemplo n.º 1
0
def test_serialization_mnist():

    X_train, X_test = _get_mnist_data()

    for no_trees, expected_precision in ((1, 0.05),
                                         (5, 0.3),
                                         (10, 0.5),
                                         (50, 0.9)):

        tree = RPForest(leaf_size=10, no_trees=no_trees)
        tree.fit(X_train)

        # Serialize and deserialize
        tree = pickle.loads(pickle.dumps(tree))

        precision = 0.0
        X_train /= np.linalg.norm(X_train, axis=1)[:, np.newaxis]
        for x_test in X_test:
            true_nns = np.argsort(-np.dot(X_train, x_test))[:10]
            nns = tree.query(x_test, 10)[:10]
            assert (nns < X_train.shape[0]).all()

            precision += len(set(nns) & set(true_nns)) / 10.0

        precision /= X_test.shape[0]

        assert precision >= expected_precision
Exemplo n.º 2
0
def test_find_self():

    X_train, X_test = _get_mnist_data()

    for no_trees, expected_precision in ((1, 0.05), (5, 0.3), (10, 0.5),
                                         (50, 0.9)):

        tree = RPForest(leaf_size=10, no_trees=no_trees)
        tree.fit(X_train)

        nodes = {k: set(v) for k, v in tree.get_leaf_nodes()}
        for i, x_train in enumerate(X_train):
            nns = tree.query(x_train, 10)[:10]
            assert nns[0] == i

            point_codes = tree.encode(x_train)

            for code in point_codes:
                assert i in nodes[code]

        tree = pickle.loads(pickle.dumps(tree))

        nodes = {k: set(v) for k, v in tree.get_leaf_nodes()}
        for i, x_train in enumerate(X_train):
            nns = tree.query(x_train, 10)[:10]
            assert nns[0] == i

            point_codes = tree.encode(x_train)

            for code in point_codes:
                assert i in nodes[code]
Exemplo n.º 3
0
def test_candidates_mnist():

    X_train, X_test = _get_mnist_data()

    for no_trees, expected_precision in ((1, 0.05),
                                         (5, 0.12),
                                         (10, 0.2),
                                         (50, 0.5),
                                         (80, 0.6)):

        tree = RPForest(leaf_size=10, no_trees=no_trees)
        tree.fit(X_train)

        precision = 0.0
        X_train /= np.linalg.norm(X_train, axis=1)[:, np.newaxis]
        for x_test in X_test:
            true_nns = np.argsort(-np.dot(X_train, x_test))[:10]
            check_nns = tree.get_candidates(x_test, 100000)
            assert len(check_nns) == len(set(check_nns))
            assert -1 not in check_nns
            assert (check_nns < X_train.shape[0]).all()
            nns = tree.get_candidates(x_test, 10)[:10]
            assert (nns < X_train.shape[0]).all()

            precision += len(set(nns) & set(true_nns)) / 10.0

        precision /= X_test.shape[0]

        assert precision >= expected_precision
Exemplo n.º 4
0
def test_sample_training():

    X_train, X_test = _get_mnist_data()

    for no_trees, expected_precision in ((1, 0.05), (5, 0.3), (10, 0.5),
                                         (50, 0.9)):

        tree = RPForest(leaf_size=10, no_trees=no_trees)
        # Fit on quarter of data
        X_sample = X_train[:X_train.shape[0] / 4]
        tree.fit(X_sample)
        # Clear and index everything
        tree.clear()
        for i, x in enumerate(X_train):
            tree.index(i, x)
        tree._X = X_train

        precision = 0.0
        X_train /= np.linalg.norm(X_train, axis=1)[:, np.newaxis]
        for x_test in X_test:
            true_nns = np.argsort(-np.dot(X_train, x_test))[:10]
            nns = tree.query(x_test, 10)[:10]

            precision += len(set(nns) & set(true_nns)) / 10.0

        precision /= X_test.shape[0]

        assert precision >= expected_precision
Exemplo n.º 5
0
def test_serialization_mnist():

    X_train, X_test = _get_mnist_data()

    for no_trees, expected_precision in ((1, 0.05), (5, 0.3), (10, 0.5),
                                         (50, 0.9)):

        tree = RPForest(leaf_size=10, no_trees=no_trees)
        tree.fit(X_train)

        # Serialize and deserialize
        tree = pickle.loads(pickle.dumps(tree))

        precision = 0.0
        X_train /= np.linalg.norm(X_train, axis=1)[:, np.newaxis]
        for x_test in X_test:
            true_nns = np.argsort(-np.dot(X_train, x_test))[:10]
            nns = tree.query(x_test, 10)[:10]
            assert (nns < X_train.shape[0]).all()

            precision += len(set(nns) & set(true_nns)) / 10.0

        precision /= X_test.shape[0]

        assert precision >= expected_precision
Exemplo n.º 6
0
def lvnn(fp, nt=3, k=5, iter=5, leaves=50):

    nn = np.zeros((fp.shape[0], k, 2)) - 1

    print(' start Tree build')
    model = RPForest(leaf_size=leaves, no_trees=nt)
    model.fit(fp)
    for i in range(0, fp.shape[0]):
        nn[i, :, 0] = model.query(fp[i, ], k)

    t = 0
    while t < iter:
        t += 1
        old_nn = nn
        for i in range(0, fp.shape[0]):
            h = set()
            for j in range(0, k):
                ji = old_nn[i, j, 0]
                for l in range(0, k):
                    li = old_nn[ji, l, 0]
                    d = -np.linalg.norm(fp[i, :] - fp[li, :])
                    h.update([(li, d)])
                nn[i, :, :] = np.array(nsmallest(k, h))

    csr = np.zeros((fp.shape[0] * k, 3))
    l = 0
    for i in range(fp.shape[0]):
        for j in range(k):
            csr[l, 0] = i
            csr[l, 1] = nn[i, j, 0]
            csr[l, 2] = nn[i, j, 1]
            l = l + 1
    return csr
Exemplo n.º 7
0
def test_max_size():

    X_train, X_test = _get_mnist_data()

    tree = RPForest(leaf_size=10, no_trees=10)
    tree.fit(X_train)

    for leaf_code, leaf_indices in tree.get_leaf_nodes():
        assert len(leaf_indices) < 10
Exemplo n.º 8
0
def test_max_size():

    X_train, X_test = _get_mnist_data()

    tree = RPForest(leaf_size=10, no_trees=10)
    tree.fit(X_train)

    for leaf_code, leaf_indices in tree.get_leaf_nodes():
        assert len(leaf_indices) < 10
Exemplo n.º 9
0
def test_sample_training():

    X_train, X_test = _get_mnist_data()

    for no_trees, expected_precision in ((1, 0.05),
                                         (5, 0.3),
                                         (10, 0.5),
                                         (50, 0.9)):

        tree = RPForest(leaf_size=10, no_trees=no_trees)
        # Fit on quarter of data
        X_sample = X_train[:X_train.shape[0] / 4]
        tree.fit(X_sample)
        # Clear and index everything
        tree.clear()
        for i, x in enumerate(X_train):
            tree.index(i, x)
        tree._X = X_train

        precision = 0.0
        X_train /= np.linalg.norm(X_train, axis=1)[:, np.newaxis]
        for x_test in X_test:
            true_nns = np.argsort(-np.dot(X_train, x_test))[:10]
            nns = tree.query(x_test, 10)[:10]

            precision += len(set(nns) & set(true_nns)) / 10.0

        precision /= X_test.shape[0]

        assert precision >= expected_precision
Exemplo n.º 10
0
def test_find_self():

    X_train, X_test = _get_mnist_data()

    for no_trees, expected_precision in ((1, 0.05),
                                         (5, 0.3),
                                         (10, 0.5),
                                         (50, 0.9)):

        tree = RPForest(leaf_size=10, no_trees=no_trees)
        tree.fit(X_train)

        nodes = {k: set(v) for k, v in tree.get_leaf_nodes()}
        for i, x_train in enumerate(X_train):
            nns = tree.query(x_train, 10)[:10]
            assert nns[0] == i

            point_codes = tree.encode(x_train)

            for code in point_codes:
                assert i in nodes[code]

        tree = pickle.loads(pickle.dumps(tree))

        nodes = {k: set(v) for k, v in tree.get_leaf_nodes()}
        for i, x_train in enumerate(X_train):
            nns = tree.query(x_train, 10)[:10]
            assert nns[0] == i

            point_codes = tree.encode(x_train)

            for code in point_codes:
                assert i in nodes[code]
Exemplo n.º 11
0
class RPForest(BaseANN):
    def __init__(self, leaf_size, n_trees):
        from rpforest import RPForest
        self.name = 'RPForest(leaf_size=%d, n_trees=%d)' % (leaf_size, n_trees)
        self._model = RPForest(leaf_size=leaf_size, no_trees=n_trees)

    def fit(self, X):
        self._model.fit(X)

    def query(self, v, n):
        return self._model.query(v, n)
Exemplo n.º 12
0
class RPForest(BaseANN):
    def __init__(self, leaf_size, n_trees):
        from rpforest import RPForest
        self.name = 'RPForest(leaf_size=%d, n_trees=%d)' % (leaf_size, n_trees)
        self._model = RPForest(leaf_size=leaf_size, no_trees=n_trees)

    def fit(self, X):
        self._model.fit(X)

    def query(self, v, n):
        return self._model.query(v, n)
Exemplo n.º 13
0
def test_multiple_fit_calls():

    X_train, X_test = _get_mnist_data()

    tree = RPForest(leaf_size=10, no_trees=10)
    tree.fit(X_train)

    assert len(tree.trees) == 10

    tree.fit(X_train)

    assert len(tree.trees) == 10
Exemplo n.º 14
0
def test_candidates_mnist():

    X_train, X_test = _get_mnist_data()

    for no_trees, expected_precision in ((1, 0.05), (5, 0.12), (10, 0.2),
                                         (50, 0.5), (80, 0.6)):

        tree = RPForest(leaf_size=10, no_trees=no_trees)
        tree.fit(X_train)

        precision = 0.0
        X_train /= np.linalg.norm(X_train, axis=1)[:, np.newaxis]
        for x_test in X_test:
            true_nns = np.argsort(-np.dot(X_train, x_test))[:10]
            check_nns = tree.get_candidates(x_test, 100000)
            assert len(check_nns) == len(set(check_nns))
            assert -1 not in check_nns
            assert (check_nns < X_train.shape[0]).all()
            nns = tree.get_candidates(x_test, 10)[:10]
            assert (nns < X_train.shape[0]).all()

            precision += len(set(nns) & set(true_nns)) / 10.0

        precision /= X_test.shape[0]

        assert precision >= expected_precision
Exemplo n.º 15
0
def test_encoding_mnist():

    X_train, X_test = _get_mnist_data()

    for no_trees, expected_precision in ((1, 0.05),
                                         (5, 0.3),
                                         (10, 0.5),
                                         (50, 0.9)):

        tree = RPForest(leaf_size=10, no_trees=no_trees)
        tree.fit(X_train)

        for x_train in X_train:
            encodings_0 = tree.encode(x_train)
            encodings_1 = tree.encode(x_train)
            assert encodings_0 == encodings_1

        tree = pickle.loads(pickle.dumps(tree))

        for x_train in X_train:
            encodings_0 = tree.encode(x_train)
            encodings_1 = tree.encode(x_train)
            assert encodings_0 == encodings_1
Exemplo n.º 16
0
def test_multiple_fit_calls():

    X_train, X_test = _get_mnist_data()

    tree = RPForest(leaf_size=10, no_trees=10)
    tree.fit(X_train)

    assert len(tree.trees) == 10

    tree.fit(X_train)

    assert len(tree.trees) == 10
Exemplo n.º 17
0
def test_encoding_mnist():

    X_train, X_test = _get_mnist_data()

    for no_trees, expected_precision in ((1, 0.05), (5, 0.3), (10, 0.5),
                                         (50, 0.9)):

        tree = RPForest(leaf_size=10, no_trees=no_trees)
        tree.fit(X_train)

        for x_train in X_train:
            encodings_0 = tree.encode(x_train)
            encodings_1 = tree.encode(x_train)
            assert encodings_0 == encodings_1

        tree = pickle.loads(pickle.dumps(tree))

        for x_train in X_train:
            encodings_0 = tree.encode(x_train)
            encodings_1 = tree.encode(x_train)
            assert encodings_0 == encodings_1
Exemplo n.º 18
0
def get_algos(m, save_index):
    algos = {
        'lshf': [
            LSHF(m, 5, 10),
            LSHF(m, 5, 20),
            LSHF(m, 10, 20),
            LSHF(m, 10, 50),
            LSHF(m, 20, 100)
        ],
        'flann': [
            FLANN(m, 0.2),
            FLANN(m, 0.5),
            FLANN(m, 0.7),
            FLANN(m, 0.8),
            FLANN(m, 0.9),
            FLANN(m, 0.95),
            FLANN(m, 0.97),
            FLANN(m, 0.98),
            FLANN(m, 0.99),
            FLANN(m, 0.995)
        ],
        'panns': [
            PANNS(m, 5, 20),
            PANNS(m, 10, 10),
            PANNS(m, 10, 50),
            PANNS(m, 10, 100),
            PANNS(m, 20, 100),
            PANNS(m, 40, 100)
        ],
        'annoy': [
            Annoy(m, n_trees, search_k) for n_trees in [100, 200, 400]
            for search_k in [
                100, 200, 400, 1000, 2000, 4000, 10000, 20000, 40000, 100000,
                200000, 400000
            ]
        ],
        'nearpy': [
            NearPy(m, 10, 5),
            NearPy(m, 10, 10),
            NearPy(m, 10, 20),
            NearPy(m, 10, 40),  # NearPy(m, 10, 100),
            NearPy(m, 12, 5),
            NearPy(m, 12, 10),
            NearPy(m, 12, 20),
            NearPy(m, 12, 40),  # NearPy(m, 12, 100),
            NearPy(m, 14, 5),
            NearPy(m, 14, 10),
            NearPy(m, 14, 20),
            NearPy(m, 14, 40),  # NearPy(m, 14, 100),
            NearPy(m, 16, 5),
            NearPy(m, 16, 10),
            NearPy(m, 16, 15),
            NearPy(m, 16, 20),
            NearPy(m, 16, 25),
            NearPy(m, 16, 30),
            NearPy(m, 16, 40)
        ],  #, NearPy(m, 16, 50), NearPy(m, 16, 70), NearPy(m, 16, 90), NearPy(m, 16, 120), NearPy(m, 16, 150)],
        'bruteforce': [BruteForce(m)],
        'bruteforce-blas': [BruteForceBLAS(m)],
        'ball': [
            BallTree(m, 10),
            BallTree(m, 20),
            BallTree(m, 40),
            BallTree(m, 100),
            BallTree(m, 200),
            BallTree(m, 400),
            BallTree(m, 1000)
        ],
        'kd': [
            KDTree(m, 10),
            KDTree(m, 20),
            KDTree(m, 40),
            KDTree(m, 100),
            KDTree(m, 200),
            KDTree(m, 400),
            KDTree(m, 1000)
        ],

        # START: Non-Metric Space Library (nmslib) entries
        'bruteforce0(nmslib)':
        [NmslibNewIndex(m, 'seq_search', ['copyMem=0'])],
        # We don't need copyMem=1 now, because the new Python wrapper already re-creates data points.
        #'bruteforce1(nmslib)': [NmslibNewIndex(m, 'seq_search', ['copyMem=1'])],
        'BallTree(nmslib)': [],
        'hnsw(nmslib)': [],
        'SW-graph(nmslib)': [],
        'faiss': [
            Faiss(m, l, p) for l in [5, 10, 20, 50, 100, 200, 400, 800, 1600]
            for p in [1, 2, 3, 4, 5, 8, 10, 20, 50, 100, 200] if l >= p
        ]
    }

    for r in [
            0.99, 0.97, 0.95, 0.9, 0.85, 0.8, 0.7, 0.6, 0.5, 0.4, 0.3, 0.2, 0.1
    ]:
        algos['BallTree(nmslib)'].append(
            NmslibNewIndex(m, 'vptree',
                           ['tuneK=10', 'desiredRecall=%f' % r]))

    if m == 'euclidean':
        # kgraph
        kgraph_preset = {
            'reverse': -1
        }
        kgraph_Ps = [1, 2, 3, 4, 5, 10, 20, 30, 40, 50, 60, 70, 80, 90, 100]
        algos['kgraph'] = [
            KGraph(m, P, kgraph_preset, save_index) for P in kgraph_Ps
        ]

        # nmslib algorithms
        # Only works for euclidean distance
        MsPostsEfs = [(32, 2, [
            20, 30, 40, 50, 60, 70, 80, 90, 100, 120, 140, 160, 200, 300, 400
        ]), (20, 2, [2, 5, 10, 15, 20, 30, 40, 50, 70, 80, 120, 200, 400]),
                      (12, 0, [1, 2, 5, 10, 15, 20, 30, 40, 50, 70, 80, 120]),
                      (4, 0, [1, 2, 5, 10, 20, 30, 50, 70, 90, 120]),
                      (8, 0, [1, 2, 5, 10, 20, 30, 50, 70, 90, 120, 160])]
        for oneCase in MsPostsEfs:
            for ef in oneCase[2]:
                algos['hnsw(nmslib)'].append(
                    NmslibReuseIndex(m, 'hnsw', [
                        'M=%d' % oneCase[0],
                        'post=%d' % oneCase[1], 'efConstruction=400'
                    ], save_index, ['ef=%d' % ef]))

        algos['MP-lsh(lshkit)'] = []
        for r in [
                0.99, 0.97, 0.95, 0.9, 0.85, 0.8, 0.7, 0.6, 0.5, 0.4, 0.3, 0.2,
                0.1
        ]:
            algos['MP-lsh(lshkit)'].append(
                NmslibNewIndex(m, 'lsh_multiprobe', [
                    'desiredRecall=%f' % r, 'H=1200001', 'T=10', 'L=50',
                    'tuneK=10'
                ]))

        NNsAndEfs = [(10, [800, 400, 200, 100, 50, 30, 20, 15, 10]),
                     (5, [30, 25, 20, 15, 10, 5, 4, 3, 2, 1])]
        for oneCase in NNsAndEfs:
            for ef in oneCase[1]:
                algos['SW-graph(nmslib)'].append(
                    NmslibReuseIndex(
                        m, 'sw-graph', [
                            'NN=%d' % oneCase[0], 'efConstruction=400',
                            'initIndexAttempts=1'
                        ], save_index,
                        ['efSearch=%d' % ef, 'initSearchAttempts=1']))

    # END: Non-Metric Space Library (nmslib) entries

    if m == 'angular':
        # kgraph
        kgraph_preset = {
            'reverse': -1,
            'K': 200,
            'L': 300,
            'S': 20
        }
        kgraph_Ps = [1, 2, 3, 4, 5, 10, 20, 30, 40, 50, 60, 70, 80, 90, 100]
        algos['kgraph'] = [
            KGraph(m, P, kgraph_preset, save_index) for P in kgraph_Ps
        ]

        # nmslib algorithms
        MsPostsEfs = [(48, 2, [
            50, 70, 90, 120, 160, 200, 400, 600, 700, 800, 1000, 1400, 1600,
            2000
        ]),
                      (32, 2, [
                          10, 20, 30, 40, 50, 60, 70, 80, 90, 100, 120, 140,
                          160, 200, 300, 400, 600, 700, 800, 1000, 1200, 1400,
                          1600, 2000
                      ]), (20, 0, [2, 5, 10, 15, 20, 30, 40, 50, 70, 80]),
                      (12, 0, [1, 2, 5, 10, 15, 20, 30, 40, 50, 70, 80])]

        for oneCase in MsPostsEfs:
            for ef in oneCase[2]:
                algos['hnsw(nmslib)'].append(
                    NmslibReuseIndex(m, 'hnsw', [
                        'M=%d' % oneCase[0],
                        'post=%d' % oneCase[1], 'efConstruction=800'
                    ], save_index, ['ef=%d' % ef]))

        NNsAndEfs = [
            (30, [700, 650, 550, 450, 350, 275, 200, 150, 120, 80, 50, 30]),
            (15, [80, 50, 30, 20]), (3, [120, 80, 60, 40, 20, 10, 8, 4, 2])
        ]

        for oneCase in NNsAndEfs:
            for ef in oneCase[1]:
                algos['SW-graph(nmslib)'].append(
                    NmslibReuseIndex(
                        m, 'sw-graph', [
                            'NN=%d' % oneCase[0], 'efConstruction=800',
                            'initIndexAttempts=1'
                        ], save_index,
                        ['efSearch=%d' % ef, 'initSearchAttempts=1']))

        # END: Non-Metric Space Library (nmslib) entries
        # RPForest only works for cosine
        algos['rpforest'] = [
            RPForest(leaf_size, n_trees)
            for n_trees in [3, 5, 10, 20, 40, 100, 200, 400]
            for leaf_size in [3, 5, 10, 20, 40, 100, 200, 400]
        ]
        L = []
        x = 1
        while True:
            L.append(x)
            if x >= 1400:
                break
            x = int(math.ceil(x * 1.1))
        algos['falconn'] = [FALCONN(m, 16, l, l) for l in L]

    return algos
Exemplo n.º 19
0
def get_algos(m):
    algos = {
        'lshf': [
            LSHF(m, 5, 10),
            LSHF(m, 5, 20),
            LSHF(m, 10, 20),
            LSHF(m, 10, 50),
            LSHF(m, 20, 100)
        ],
        'flann': [
            FLANN(m, 0.2),
            FLANN(m, 0.5),
            FLANN(m, 0.7),
            FLANN(m, 0.8),
            FLANN(m, 0.9),
            FLANN(m, 0.95),
            FLANN(m, 0.97),
            FLANN(m, 0.98),
            FLANN(m, 0.99),
            FLANN(m, 0.995)
        ],
        'panns': [
            PANNS(m, 5, 20),
            PANNS(m, 10, 10),
            PANNS(m, 10, 50),
            PANNS(m, 10, 100),
            PANNS(m, 20, 100),
            PANNS(m, 40, 100)
        ],
        'annoy': [
            Annoy(m, n_trees, search_k) for n_trees in [100, 200, 400]
            for search_k in [
                100, 200, 400, 1000, 2000, 4000, 10000, 20000, 40000, 100000,
                200000, 400000
            ]
        ],
        'nearpy': [
            NearPy(m, 10, 5),
            NearPy(m, 10, 10),
            NearPy(m, 10, 20),
            NearPy(m, 10, 40),  # NearPy(m, 10, 100),
            NearPy(m, 12, 5),
            NearPy(m, 12, 10),
            NearPy(m, 12, 20),
            NearPy(m, 12, 40),  # NearPy(m, 12, 100),
            NearPy(m, 14, 5),
            NearPy(m, 14, 10),
            NearPy(m, 14, 20),
            NearPy(m, 14, 40),  # NearPy(m, 14, 100),
            NearPy(m, 16, 5),
            NearPy(m, 16, 10),
            NearPy(m, 16, 15),
            NearPy(m, 16, 20),
            NearPy(m, 16, 25),
            NearPy(m, 16, 30),
            NearPy(m, 16, 40)
        ],  #, NearPy(m, 16, 50), NearPy(m, 16, 70), NearPy(m, 16, 90), NearPy(m, 16, 120), NearPy(m, 16, 150)],
        'kgraph': [
            KGraph(m, 20),
            KGraph(m, 50),
            KGraph(m, 100),
            KGraph(m, 200),
            KGraph(m, 500),
            KGraph(m, 1000),
            KGraph(m, 2000),
            KGraph(m, 4000),
            KGraph(m, 10000)
        ],
        'bruteforce': [BruteForce(m)],
        'ball': [
            BallTree(m, 10),
            BallTree(m, 20),
            BallTree(m, 40),
            BallTree(m, 100),
            BallTree(m, 200),
            BallTree(m, 400),
            BallTree(m, 1000)
        ],
        'kd': [
            KDTree(m, 10),
            KDTree(m, 20),
            KDTree(m, 40),
            KDTree(m, 100),
            KDTree(m, 200),
            KDTree(m, 400),
            KDTree(m, 1000)
        ],

        # START: Non-Metric Space Library (nmslib) entries
        'bruteforce0(nmslib)': [Nmslib(m, 'seq_search', ['copyMem=0'])],
        'bruteforce1(nmslib)': [Nmslib(m, 'seq_search', ['copyMem=1'])],
        'BallTree(nmslib)': [
            Nmslib(m, 'vptree', ['tuneK=10', 'desiredRecall=0.99']),
            Nmslib(m, 'vptree', ['tuneK=10', 'desiredRecall=0.95']),
            Nmslib(m, 'vptree', ['tuneK=10', 'desiredRecall=0.90']),
            Nmslib(m, 'vptree', ['tuneK=10', 'desiredRecall=0.85']),
            Nmslib(m, 'vptree', ['tuneK=10', 'desiredRecall=0.8']),
            Nmslib(m, 'vptree', ['tuneK=10', 'desiredRecall=0.7']),
            Nmslib(m, 'vptree', ['tuneK=10', 'desiredRecall=0.6']),
            Nmslib(m, 'vptree', ['tuneK=10', 'desiredRecall=0.5']),
            Nmslib(m, 'vptree', ['tuneK=10', 'desiredRecall=0.4']),
            Nmslib(m, 'vptree', ['tuneK=10', 'desiredRecall=0.3']),
            Nmslib(m, 'vptree', ['tuneK=10', 'desiredRecall=0.2']),
            Nmslib(m, 'vptree', ['tuneK=10', 'desiredRecall=0.1']),
        ],
        'SW-graph(nmslib)': [
            Nmslib(m, 'small_world_rand',
                   ['NN=20', 'initIndexAttempts=4', 'initSearchAttempts=48']),
            Nmslib(m, 'small_world_rand',
                   ['NN=20', 'initIndexAttempts=4', 'initSearchAttempts=32']),
            Nmslib(m, 'small_world_rand',
                   ['NN=20', 'initIndexAttempts=4', 'initSearchAttempts=16']),
            Nmslib(m, 'small_world_rand',
                   ['NN=20', 'initIndexAttempts=4', 'initSearchAttempts=8']),
            Nmslib(m, 'small_world_rand',
                   ['NN=20', 'initIndexAttempts=4', 'initSearchAttempts=4']),
            Nmslib(m, 'small_world_rand',
                   ['NN=20', 'initIndexAttempts=4', 'initSearchAttempts=2']),
            Nmslib(m, 'small_world_rand',
                   ['NN=17', 'initIndexAttempts=4', 'initSearchAttempts=2']),
            Nmslib(m, 'small_world_rand',
                   ['NN=14', 'initIndexAttempts=4', 'initSearchAttempts=2']),
            Nmslib(m, 'small_world_rand',
                   ['NN=11', 'initIndexAttempts=5', 'initSearchAttempts=2']),
            Nmslib(m, 'small_world_rand',
                   ['NN=8', 'initIndexAttempts=5', 'initSearchAttempts=2']),
            Nmslib(m, 'small_world_rand',
                   ['NN=5', 'initIndexAttempts=5', 'initSearchAttempts=2']),
            Nmslib(m, 'small_world_rand',
                   ['NN=3', 'initIndexAttempts=5', 'initSearchAttempts=2']),
        ]
    }

    if m == 'euclidean':
        # Only works for euclidean distance
        algos['MP-lsh(lshkit)'] = [
            Nmslib(m, 'lsh_multiprobe', [
                'desiredRecall=0.99', 'H=1200001', 'T=10', 'L=50', 'tuneK=10'
            ]),
            Nmslib(m, 'lsh_multiprobe', [
                'desiredRecall=0.97', 'H=1200001', 'T=10', 'L=50', 'tuneK=10'
            ]),
            Nmslib(m, 'lsh_multiprobe', [
                'desiredRecall=0.95', 'H=1200001', 'T=10', 'L=50', 'tuneK=10'
            ]),
            Nmslib(m, 'lsh_multiprobe', [
                'desiredRecall=0.90', 'H=1200001', 'T=10', 'L=50', 'tuneK=10'
            ]),
            Nmslib(m, 'lsh_multiprobe', [
                'desiredRecall=0.85', 'H=1200001', 'T=10', 'L=50', 'tuneK=10'
            ]),
            Nmslib(m, 'lsh_multiprobe', [
                'desiredRecall=0.80', 'H=1200001', 'T=10', 'L=50', 'tuneK=10'
            ]),
            Nmslib(
                m, 'lsh_multiprobe',
                ['desiredRecall=0.7', 'H=1200001', 'T=10', 'L=50', 'tuneK=10'
                 ]),
            Nmslib(
                m, 'lsh_multiprobe',
                ['desiredRecall=0.6', 'H=1200001', 'T=10', 'L=50', 'tuneK=10'
                 ]),
            Nmslib(
                m, 'lsh_multiprobe',
                ['desiredRecall=0.5', 'H=1200001', 'T=10', 'L=50', 'tuneK=10'
                 ]),
            Nmslib(
                m, 'lsh_multiprobe',
                ['desiredRecall=0.4', 'H=1200001', 'T=10', 'L=50', 'tuneK=10'
                 ]),
            Nmslib(
                m, 'lsh_multiprobe',
                ['desiredRecall=0.3', 'H=1200001', 'T=10', 'L=50', 'tuneK=10'
                 ]),
            Nmslib(
                m, 'lsh_multiprobe',
                ['desiredRecall=0.2', 'H=1200001', 'T=10', 'L=50', 'tuneK=10'
                 ]),
            Nmslib(
                m, 'lsh_multiprobe',
                ['desiredRecall=0.1', 'H=1200001', 'T=10', 'L=50', 'tuneK=10'
                 ]),
        ]

    # END: Non-Metric Space Library (nmslib) entries

    if m == 'angular':
        # RPForest only works for cosine
        algos['rpforest'] = [
            RPForest(leaf_size, n_trees)
            for n_trees in [3, 5, 10, 20, 40, 100, 200, 400]
            for leaf_size in [3, 5, 10, 20, 40, 100, 200, 400]
        ]
        L = []
        x = 1
        while True:
            L.append(x)
            if x >= 1400:
                break
            x = int(math.ceil(x * 1.1))
        algos['falconn'] = [FALCONN(m, 16, l, l) for l in L]

    return algos
Exemplo n.º 20
0
                  sep="")
            fq.close()

            print('time query:', end_query - start_query)
            print('accuracy:', accuracy / len(xq) / k)

    quit()
    a = [350]
    b = [350]

    for leaf_size in a:
        for no_trees in b:
            fq = open('fq_RPForest.txt', 'a')
            if X.dtype != np.double:
                X = np.array(X).astype(np.double)
            t = RPForest(leaf_size, no_trees)
            t.fit(X)
            start_query = time.time()
            accuracy = 0
            for i in range(len(xq)):
                v = xq[i]
                if v.dtype != np.double:
                    v = np.array(v).astype(np.double)
                ans = t.query(v, k)
                for x in ans:
                    if x in gt[i]:
                        accuracy += 1

            end_query = time.time()
            print(leaf_size, no_trees)
            print(round(accuracy / len(xq) / k, 4),
Exemplo n.º 21
0
for k in xrange(hash_counts):
    redis_storage.store_hash_configuration(lshash[k])








## RPFOREST TEST
from rpforest import RPForest
leaf_size = 5
n_trees = 20
name = 'RPForest(leaf_size=%d, n_trees=%d)' % (leaf_size, n_trees)
model = RPForest(leaf_size=leaf_size, no_trees=n_trees)
#fitting
features = features.copy(order='C') #something related to Cython error
model.fit(features)
model.clear()
#indexing
for i, x in enumerate(features):
    t = Timer()
    with t:
        model.index(dict_feat[i], x.tolist())
#querying
for i in range(features.shape[0]):
    t = Timer()
    with t:
        results = model.get_candidates(features[i])
    print 'queried', dict_feat[i], 'results', results
Exemplo n.º 22
0
 def __init__(self, leaf_size, n_trees):
     from rpforest import RPForest
     self.name = 'RPForest(leaf_size=%d, n_trees=%d)' % (leaf_size, n_trees)
     self._model = RPForest(leaf_size=leaf_size, no_trees=n_trees)
Exemplo n.º 23
0
 def __init__(self, leaf_size, n_trees):
     from rpforest import RPForest
     self.name = 'RPForest(leaf_size=%d, n_trees=%d)' % (leaf_size, n_trees)
     self._model = RPForest(leaf_size=leaf_size, no_trees=n_trees)
Exemplo n.º 24
0
def get_algos(m):
    algos = {
        'lshf': [LSHF(m, 5, 10), LSHF(m, 5, 20), LSHF(m, 10, 20), LSHF(m, 10, 50), LSHF(m, 20, 100)],
        'flann': [FLANN(m, 0.2), FLANN(m, 0.5), FLANN(m, 0.7), FLANN(m, 0.8), FLANN(m, 0.9), FLANN(m, 0.95), FLANN(m, 0.97), FLANN(m, 0.98), FLANN(m, 0.99), FLANN(m, 0.995)],
        'panns': [PANNS(m, 5, 20), PANNS(m, 10, 10), PANNS(m, 10, 50), PANNS(m, 10, 100), PANNS(m, 20, 100), PANNS(m, 40, 100)],
        'annoy': [Annoy(m, n_trees, search_k) for n_trees in [100, 200, 400] for search_k in [100, 200, 400, 1000, 2000, 4000, 10000, 20000, 40000, 100000, 200000, 400000]],
        'nearpy': [NearPy(m, 10, 5), NearPy(m, 10, 10), NearPy(m, 10, 20), NearPy(m, 10, 40), # NearPy(m, 10, 100),
                   NearPy(m, 12, 5), NearPy(m, 12, 10), NearPy(m, 12, 20), NearPy(m, 12, 40), # NearPy(m, 12, 100),
                   NearPy(m, 14, 5), NearPy(m, 14, 10), NearPy(m, 14, 20), NearPy(m, 14, 40), # NearPy(m, 14, 100),
                   NearPy(m, 16, 5), NearPy(m, 16, 10), NearPy(m, 16, 15), NearPy(m, 16, 20), NearPy(m, 16, 25), NearPy(m, 16, 30), NearPy(m, 16, 40)], #, NearPy(m, 16, 50), NearPy(m, 16, 70), NearPy(m, 16, 90), NearPy(m, 16, 120), NearPy(m, 16, 150)],
        'bruteforce': [BruteForce(m)],
        'ball': [BallTree(m, 10), BallTree(m, 20), BallTree(m, 40), BallTree(m, 100), BallTree(m, 200), BallTree(m, 400), BallTree(m, 1000)],
        'kd': [KDTree(m, 10), KDTree(m, 20), KDTree(m, 40), KDTree(m, 100), KDTree(m, 200), KDTree(m, 400), KDTree(m, 1000)],

        # START: Non-Metric Space Library (nmslib) entries
        'bruteforce0(nmslib)': [NmslibNewIndex(m, 'seq_search', ['copyMem=0'])],
        # We don't need copyMem=1 now, because the new Python wrapper already re-creates data points.
        #'bruteforce1(nmslib)': [NmslibNewIndex(m, 'seq_search', ['copyMem=1'])],

        'BallTree(nmslib)': [
            NmslibNewIndex(m, 'vptree', ['tuneK=10', 'desiredRecall=0.99']),
            NmslibNewIndex(m, 'vptree', ['tuneK=10', 'desiredRecall=0.95']),
            NmslibNewIndex(m, 'vptree', ['tuneK=10', 'desiredRecall=0.90']),
            NmslibNewIndex(m, 'vptree', ['tuneK=10', 'desiredRecall=0.85']),
            NmslibNewIndex(m, 'vptree', ['tuneK=10', 'desiredRecall=0.8']),
            NmslibNewIndex(m, 'vptree', ['tuneK=10', 'desiredRecall=0.7']),
            NmslibNewIndex(m, 'vptree', ['tuneK=10', 'desiredRecall=0.6']),
            NmslibNewIndex(m, 'vptree', ['tuneK=10', 'desiredRecall=0.5']),
            NmslibNewIndex(m, 'vptree', ['tuneK=10', 'desiredRecall=0.4']),
            NmslibNewIndex(m, 'vptree', ['tuneK=10', 'desiredRecall=0.3']),
            NmslibNewIndex(m, 'vptree', ['tuneK=10', 'desiredRecall=0.2']),
            NmslibNewIndex(m, 'vptree', ['tuneK=10', 'desiredRecall=0.1']),
        ],

        'hnsw(nmslib)': [],

        'SW-graph(nmslib)' :[]
    }

    if m == 'euclidean':
        # kgraph 
        kgraph_preset ={'reverse':-1};
        kgraph_Ps = [10,20,30,40,50,60,70,80,90,100]
        algos['kgraph'] = [KGraph(m, P, kgraph_preset) for P in kgraph_Ps]

        # nmslib algorithms
        # Only works for euclidean distance
        MsAndEfs=[
                [32,[10, 20, 30, 40, 50, 60, 70, 80, 90, 100, 120, 140, 160, 200, 300, 400]],
                [4,[1, 2, 5, 10, 20, 30,  50,  70,  90,  120]],
                [8,[1,2,5,10,20, 30, 50, 70, 90, 120, 160, ]],
                [20, [2, 5, 10, 15, 20, 30, 40, 50, 70, 80,120,200,400]],
                [12, [1, 2, 5, 10, 15, 20, 30, 40, 50, 70, 80,120]]]
        for MsAndEf in MsAndEfs:
            for ef in MsAndEf[1]:
                algos['hnsw(nmslib)'].append(NmslibReuseIndex(m, 'hnsw', ['M='+str(MsAndEf[0]), 'efConstruction=400'], ['ef=' + str(ef), 'searchMethod=3']))
        
        algos['MP-lsh(lshkit)'] = [
            NmslibNewIndex(m, 'lsh_multiprobe', ['desiredRecall=0.99','H=1200001','T=10','L=50','tuneK=10']),
            NmslibNewIndex(m, 'lsh_multiprobe', ['desiredRecall=0.97','H=1200001','T=10','L=50','tuneK=10']),
            NmslibNewIndex(m, 'lsh_multiprobe', ['desiredRecall=0.95','H=1200001','T=10','L=50','tuneK=10']),
            NmslibNewIndex(m, 'lsh_multiprobe', ['desiredRecall=0.90','H=1200001','T=10','L=50','tuneK=10']),
            NmslibNewIndex(m, 'lsh_multiprobe', ['desiredRecall=0.85','H=1200001','T=10','L=50','tuneK=10']),
            NmslibNewIndex(m, 'lsh_multiprobe', ['desiredRecall=0.80','H=1200001','T=10','L=50','tuneK=10']),
            NmslibNewIndex(m, 'lsh_multiprobe', ['desiredRecall=0.7','H=1200001','T=10','L=50','tuneK=10']),
            NmslibNewIndex(m, 'lsh_multiprobe', ['desiredRecall=0.6','H=1200001','T=10','L=50','tuneK=10']),
            NmslibNewIndex(m, 'lsh_multiprobe', ['desiredRecall=0.5','H=1200001','T=10','L=50','tuneK=10']),
            NmslibNewIndex(m, 'lsh_multiprobe', ['desiredRecall=0.4','H=1200001','T=10','L=50','tuneK=10']),
            NmslibNewIndex(m, 'lsh_multiprobe', ['desiredRecall=0.3','H=1200001','T=10','L=50','tuneK=10']),
            NmslibNewIndex(m, 'lsh_multiprobe', ['desiredRecall=0.2','H=1200001','T=10','L=50','tuneK=10']),
            NmslibNewIndex(m, 'lsh_multiprobe', ['desiredRecall=0.1','H=1200001','T=10','L=50','tuneK=10']),
        ]

        algos['SW-graph(nmslib)'] = [
            NmslibReuseIndex(m, 'sw-graph', ['NN=10', 'efConstruction=400', 'initIndexAttempts=1'], ['efSearch=800',   'initSearchAttempts=1']),
            NmslibReuseIndex(m, 'sw-graph', ['NN=10', 'efConstruction=400', 'initIndexAttempts=1'], ['efSearch=400',   'initSearchAttempts=1']),
            NmslibReuseIndex(m, 'sw-graph', ['NN=10', 'efConstruction=400', 'initIndexAttempts=1'], ['efSearch=200',   'initSearchAttempts=1']),
            NmslibReuseIndex(m, 'sw-graph', ['NN=10', 'efConstruction=400', 'initIndexAttempts=1'], ['efSearch=100',   'initSearchAttempts=1']),
            NmslibReuseIndex(m, 'sw-graph', ['NN=10', 'efConstruction=400', 'initIndexAttempts=1'], ['efSearch=50',   'initSearchAttempts=1']),
            NmslibReuseIndex(m, 'sw-graph', ['NN=10', 'efConstruction=400', 'initIndexAttempts=1'], ['efSearch=30',   'initSearchAttempts=1']),
            NmslibReuseIndex(m, 'sw-graph', ['NN=10', 'efConstruction=400', 'initIndexAttempts=1'], ['efSearch=20',   'initSearchAttempts=1']),
            NmslibReuseIndex(m, 'sw-graph', ['NN=10', 'efConstruction=400', 'initIndexAttempts=1'], ['efSearch=15',   'initSearchAttempts=1']),
            NmslibReuseIndex(m, 'sw-graph', ['NN=10', 'efConstruction=400', 'initIndexAttempts=1'], ['efSearch=10',   'initSearchAttempts=1']),


            NmslibReuseIndex(m, 'sw-graph', ['NN=5', 'efConstruction=400', 'initIndexAttempts=1'], ['efSearch=30',   'initSearchAttempts=1']),
            NmslibReuseIndex(m, 'sw-graph', ['NN=5', 'efConstruction=400', 'initIndexAttempts=1'], ['efSearch=25',   'initSearchAttempts=1']),
            NmslibReuseIndex(m, 'sw-graph', ['NN=5', 'efConstruction=400', 'initIndexAttempts=1'], ['efSearch=20',   'initSearchAttempts=1']),
            NmslibReuseIndex(m, 'sw-graph', ['NN=5', 'efConstruction=400', 'initIndexAttempts=1'], ['efSearch=15',   'initSearchAttempts=1']),
            NmslibReuseIndex(m, 'sw-graph', ['NN=5', 'efConstruction=400', 'initIndexAttempts=1'], ['efSearch=10',   'initSearchAttempts=1']),
            NmslibReuseIndex(m, 'sw-graph', ['NN=5', 'efConstruction=400', 'initIndexAttempts=1'], ['efSearch=5',   'initSearchAttempts=1']),
            NmslibReuseIndex(m, 'sw-graph', ['NN=5', 'efConstruction=400', 'initIndexAttempts=1'], ['efSearch=4',   'initSearchAttempts=1']),
            NmslibReuseIndex(m, 'sw-graph', ['NN=5', 'efConstruction=400', 'initIndexAttempts=1'], ['efSearch=3',   'initSearchAttempts=1']),
            NmslibReuseIndex(m, 'sw-graph', ['NN=5', 'efConstruction=400', 'initIndexAttempts=1'], ['efSearch=2',   'initSearchAttempts=1']),
            NmslibReuseIndex(m, 'sw-graph', ['NN=5', 'efConstruction=400', 'initIndexAttempts=1'], ['efSearch=1',   'initSearchAttempts=1']),
        ]



    # END: Non-Metric Space Library (nmslib) entries

    if m == 'angular':
        # kgraph 
        kgraph_preset ={'reverse':-1, 'K':200, 'L':300, 'S':20};
        kgraph_Ps = [10,20,30,40,50,60,70,80,90,100]
        algos['kgraph'] = [KGraph(m, P, kgraph_preset) for P in kgraph_Ps]

        # nmslib algorithms
        MsAndEfs=[
                [32,[10, 20, 30, 40, 50, 60, 70, 80, 90, 100, 120, 140, 160, 200, 300, 400, 600, 700, 800, 1000, 1200, 1400,1600, 2000]],
                [64,[10,  30,  50,  70,  90,  120,  160,  200, 400, 600, 700, 800, 1000, 1400, 1600, 2000]],
                [96,[10, 30, 50, 70, 90, 120, 160, 200, 400, 700, 1000, 1400,1600, 2000]],
                [20, [2, 5, 10, 15, 20, 30, 40, 50, 70, 80]],
                [12, [1, 2, 5, 10, 15, 20, 30, 40, 50, 70, 80]]]
        for MsAndEf in MsAndEfs:
            for ef in MsAndEf[1]:
                algos['hnsw(nmslib)'].append(NmslibReuseIndex(m, 'hnsw', ['M='+str(MsAndEf[0]), 'efConstruction=1600'], ['ef=' + str(ef), 'searchMethod=4']))
        # RPForest only works for cosine
        algos['rpforest'] = [RPForest(leaf_size, n_trees) for n_trees in [3, 5, 10, 20, 40, 100, 200, 400] for leaf_size in [3, 5, 10, 20, 40, 100, 200, 400]]
        L = []
        x = 1
        while True:
            L.append(x)
            if x >= 1400:
                break
            x = int(math.ceil(x * 1.1))
        algos['falconn'] = [FALCONN(m, 16, l, l) for l in L]

        # START: Non-Metric Space Library (nmslib) entries
        algos['SW-graph(nmslib)'] = [
            NmslibReuseIndex(m, 'sw-graph', ['NN=30', 'efConstruction=1600', 'initIndexAttempts=1'], ['efSearch=700',   'initSearchAttempts=1']),
            NmslibReuseIndex(m, 'sw-graph', ['NN=30', 'efConstruction=1600', 'initIndexAttempts=1'], ['efSearch=650',   'initSearchAttempts=1']),
            NmslibReuseIndex(m, 'sw-graph', ['NN=30', 'efConstruction=1600', 'initIndexAttempts=1'], ['efSearch=550',   'initSearchAttempts=1']),
            NmslibReuseIndex(m, 'sw-graph', ['NN=30', 'efConstruction=1600', 'initIndexAttempts=1'], ['efSearch=450',   'initSearchAttempts=1']),
            NmslibReuseIndex(m, 'sw-graph', ['NN=30', 'efConstruction=1600', 'initIndexAttempts=1'], ['efSearch=350',   'initSearchAttempts=1']),
            NmslibReuseIndex(m, 'sw-graph', ['NN=30', 'efConstruction=1600', 'initIndexAttempts=1'], ['efSearch=275',   'initSearchAttempts=1']),
            NmslibReuseIndex(m, 'sw-graph', ['NN=30', 'efConstruction=1600', 'initIndexAttempts=1'], ['efSearch=200',   'initSearchAttempts=1']),
            NmslibReuseIndex(m, 'sw-graph', ['NN=30', 'efConstruction=1600', 'initIndexAttempts=1'], ['efSearch=150',   'initSearchAttempts=1']),
            NmslibReuseIndex(m, 'sw-graph', ['NN=30', 'efConstruction=1600', 'initIndexAttempts=1'], ['efSearch=120',   'initSearchAttempts=1']),
            NmslibReuseIndex(m, 'sw-graph', ['NN=30', 'efConstruction=1600', 'initIndexAttempts=1'], ['efSearch=80',   'initSearchAttempts=1']),
            NmslibReuseIndex(m, 'sw-graph', ['NN=30', 'efConstruction=1600', 'initIndexAttempts=1'], ['efSearch=50',   'initSearchAttempts=1']),
            NmslibReuseIndex(m, 'sw-graph', ['NN=30', 'efConstruction=1600', 'initIndexAttempts=1'], ['efSearch=30',   'initSearchAttempts=1']),

            NmslibReuseIndex(m, 'sw-graph', ['NN=15', 'efConstruction=1600', 'initIndexAttempts=1'], ['efSearch=80',   'initSearchAttempts=1']),
            NmslibReuseIndex(m, 'sw-graph', ['NN=15', 'efConstruction=1600', 'initIndexAttempts=1'], ['efSearch=50',   'initSearchAttempts=1']),
            NmslibReuseIndex(m, 'sw-graph', ['NN=15', 'efConstruction=1600', 'initIndexAttempts=1'], ['efSearch=30',   'initSearchAttempts=1']),
            NmslibReuseIndex(m, 'sw-graph', ['NN=15', 'efConstruction=1600', 'initIndexAttempts=1'], ['efSearch=20',   'initSearchAttempts=1']),

            NmslibReuseIndex(m, 'sw-graph', ['NN=3', 'efConstruction=1600', 'initIndexAttempts=1'], ['efSearch=120',   'initSearchAttempts=1']),
            NmslibReuseIndex(m, 'sw-graph', ['NN=3', 'efConstruction=1600', 'initIndexAttempts=1'], ['efSearch=80',   'initSearchAttempts=1']),
            NmslibReuseIndex(m, 'sw-graph', ['NN=3', 'efConstruction=1600', 'initIndexAttempts=1'], ['efSearch=60',   'initSearchAttempts=1']),
            NmslibReuseIndex(m, 'sw-graph', ['NN=3', 'efConstruction=1600', 'initIndexAttempts=1'], ['efSearch=40',   'initSearchAttempts=1']),
            NmslibReuseIndex(m, 'sw-graph', ['NN=3', 'efConstruction=1600', 'initIndexAttempts=1'], ['efSearch=20',   'initSearchAttempts=1']),
            NmslibReuseIndex(m, 'sw-graph', ['NN=3', 'efConstruction=1600', 'initIndexAttempts=1'], ['efSearch=10',   'initSearchAttempts=1']),
            NmslibReuseIndex(m, 'sw-graph', ['NN=3', 'efConstruction=1600', 'initIndexAttempts=1'], ['efSearch=8',   'initSearchAttempts=1']),
            NmslibReuseIndex(m, 'sw-graph', ['NN=3', 'efConstruction=1600', 'initIndexAttempts=1'], ['efSearch=4',   'initSearchAttempts=1']),
            NmslibReuseIndex(m, 'sw-graph', ['NN=3', 'efConstruction=1600', 'initIndexAttempts=1'], ['efSearch=2',   'initSearchAttempts=1']),
        ]


        # END: Non-Metric Space Library (nmslib) entries

    return algos
Exemplo n.º 25
0
 def _get_random_projection_forest(self, leaf_size=20, no_trees=10):
     self.embed_feat = self.pca.transform(self.feat)
     rpf = RPForest(leaf_size=leaf_size, no_trees=no_trees)
     rpf.fit(self.embed_feat)
     return rpf