Exemplo n.º 1
0
def test_sample_training():

    X_train, X_test = _get_mnist_data()

    for no_trees, expected_precision in ((1, 0.05), (5, 0.3), (10, 0.5),
                                         (50, 0.9)):

        tree = RPForest(leaf_size=10, no_trees=no_trees)
        # Fit on quarter of data
        X_sample = X_train[:X_train.shape[0] / 4]
        tree.fit(X_sample)
        # Clear and index everything
        tree.clear()
        for i, x in enumerate(X_train):
            tree.index(i, x)
        tree._X = X_train

        precision = 0.0
        X_train /= np.linalg.norm(X_train, axis=1)[:, np.newaxis]
        for x_test in X_test:
            true_nns = np.argsort(-np.dot(X_train, x_test))[:10]
            nns = tree.query(x_test, 10)[:10]

            precision += len(set(nns) & set(true_nns)) / 10.0

        precision /= X_test.shape[0]

        assert precision >= expected_precision
Exemplo n.º 2
0
def test_candidates_mnist():

    X_train, X_test = _get_mnist_data()

    for no_trees, expected_precision in ((1, 0.05), (5, 0.12), (10, 0.2),
                                         (50, 0.5), (80, 0.6)):

        tree = RPForest(leaf_size=10, no_trees=no_trees)
        tree.fit(X_train)

        precision = 0.0
        X_train /= np.linalg.norm(X_train, axis=1)[:, np.newaxis]
        for x_test in X_test:
            true_nns = np.argsort(-np.dot(X_train, x_test))[:10]
            check_nns = tree.get_candidates(x_test, 100000)
            assert len(check_nns) == len(set(check_nns))
            assert -1 not in check_nns
            assert (check_nns < X_train.shape[0]).all()
            nns = tree.get_candidates(x_test, 10)[:10]
            assert (nns < X_train.shape[0]).all()

            precision += len(set(nns) & set(true_nns)) / 10.0

        precision /= X_test.shape[0]

        assert precision >= expected_precision
Exemplo n.º 3
0
def test_serialization_mnist():

    X_train, X_test = _get_mnist_data()

    for no_trees, expected_precision in ((1, 0.05),
                                         (5, 0.3),
                                         (10, 0.5),
                                         (50, 0.9)):

        tree = RPForest(leaf_size=10, no_trees=no_trees)
        tree.fit(X_train)

        # Serialize and deserialize
        tree = pickle.loads(pickle.dumps(tree))

        precision = 0.0
        X_train /= np.linalg.norm(X_train, axis=1)[:, np.newaxis]
        for x_test in X_test:
            true_nns = np.argsort(-np.dot(X_train, x_test))[:10]
            nns = tree.query(x_test, 10)[:10]
            assert (nns < X_train.shape[0]).all()

            precision += len(set(nns) & set(true_nns)) / 10.0

        precision /= X_test.shape[0]

        assert precision >= expected_precision
Exemplo n.º 4
0
def lvnn(fp, nt=3, k=5, iter=5, leaves=50):

    nn = np.zeros((fp.shape[0], k, 2)) - 1

    print(' start Tree build')
    model = RPForest(leaf_size=leaves, no_trees=nt)
    model.fit(fp)
    for i in range(0, fp.shape[0]):
        nn[i, :, 0] = model.query(fp[i, ], k)

    t = 0
    while t < iter:
        t += 1
        old_nn = nn
        for i in range(0, fp.shape[0]):
            h = set()
            for j in range(0, k):
                ji = old_nn[i, j, 0]
                for l in range(0, k):
                    li = old_nn[ji, l, 0]
                    d = -np.linalg.norm(fp[i, :] - fp[li, :])
                    h.update([(li, d)])
                nn[i, :, :] = np.array(nsmallest(k, h))

    csr = np.zeros((fp.shape[0] * k, 3))
    l = 0
    for i in range(fp.shape[0]):
        for j in range(k):
            csr[l, 0] = i
            csr[l, 1] = nn[i, j, 0]
            csr[l, 2] = nn[i, j, 1]
            l = l + 1
    return csr
Exemplo n.º 5
0
def test_candidates_mnist():

    X_train, X_test = _get_mnist_data()

    for no_trees, expected_precision in ((1, 0.05),
                                         (5, 0.12),
                                         (10, 0.2),
                                         (50, 0.5),
                                         (80, 0.6)):

        tree = RPForest(leaf_size=10, no_trees=no_trees)
        tree.fit(X_train)

        precision = 0.0
        X_train /= np.linalg.norm(X_train, axis=1)[:, np.newaxis]
        for x_test in X_test:
            true_nns = np.argsort(-np.dot(X_train, x_test))[:10]
            check_nns = tree.get_candidates(x_test, 100000)
            assert len(check_nns) == len(set(check_nns))
            assert -1 not in check_nns
            assert (check_nns < X_train.shape[0]).all()
            nns = tree.get_candidates(x_test, 10)[:10]
            assert (nns < X_train.shape[0]).all()

            precision += len(set(nns) & set(true_nns)) / 10.0

        precision /= X_test.shape[0]

        assert precision >= expected_precision
Exemplo n.º 6
0
def test_find_self():

    X_train, X_test = _get_mnist_data()

    for no_trees, expected_precision in ((1, 0.05),
                                         (5, 0.3),
                                         (10, 0.5),
                                         (50, 0.9)):

        tree = RPForest(leaf_size=10, no_trees=no_trees)
        tree.fit(X_train)

        nodes = {k: set(v) for k, v in tree.get_leaf_nodes()}
        for i, x_train in enumerate(X_train):
            nns = tree.query(x_train, 10)[:10]
            assert nns[0] == i

            point_codes = tree.encode(x_train)

            for code in point_codes:
                assert i in nodes[code]

        tree = pickle.loads(pickle.dumps(tree))

        nodes = {k: set(v) for k, v in tree.get_leaf_nodes()}
        for i, x_train in enumerate(X_train):
            nns = tree.query(x_train, 10)[:10]
            assert nns[0] == i

            point_codes = tree.encode(x_train)

            for code in point_codes:
                assert i in nodes[code]
Exemplo n.º 7
0
def test_sample_training():

    X_train, X_test = _get_mnist_data()

    for no_trees, expected_precision in ((1, 0.05),
                                         (5, 0.3),
                                         (10, 0.5),
                                         (50, 0.9)):

        tree = RPForest(leaf_size=10, no_trees=no_trees)
        # Fit on quarter of data
        X_sample = X_train[:X_train.shape[0] / 4]
        tree.fit(X_sample)
        # Clear and index everything
        tree.clear()
        for i, x in enumerate(X_train):
            tree.index(i, x)
        tree._X = X_train

        precision = 0.0
        X_train /= np.linalg.norm(X_train, axis=1)[:, np.newaxis]
        for x_test in X_test:
            true_nns = np.argsort(-np.dot(X_train, x_test))[:10]
            nns = tree.query(x_test, 10)[:10]

            precision += len(set(nns) & set(true_nns)) / 10.0

        precision /= X_test.shape[0]

        assert precision >= expected_precision
Exemplo n.º 8
0
def test_find_self():

    X_train, X_test = _get_mnist_data()

    for no_trees, expected_precision in ((1, 0.05), (5, 0.3), (10, 0.5),
                                         (50, 0.9)):

        tree = RPForest(leaf_size=10, no_trees=no_trees)
        tree.fit(X_train)

        nodes = {k: set(v) for k, v in tree.get_leaf_nodes()}
        for i, x_train in enumerate(X_train):
            nns = tree.query(x_train, 10)[:10]
            assert nns[0] == i

            point_codes = tree.encode(x_train)

            for code in point_codes:
                assert i in nodes[code]

        tree = pickle.loads(pickle.dumps(tree))

        nodes = {k: set(v) for k, v in tree.get_leaf_nodes()}
        for i, x_train in enumerate(X_train):
            nns = tree.query(x_train, 10)[:10]
            assert nns[0] == i

            point_codes = tree.encode(x_train)

            for code in point_codes:
                assert i in nodes[code]
Exemplo n.º 9
0
def test_serialization_mnist():

    X_train, X_test = _get_mnist_data()

    for no_trees, expected_precision in ((1, 0.05), (5, 0.3), (10, 0.5),
                                         (50, 0.9)):

        tree = RPForest(leaf_size=10, no_trees=no_trees)
        tree.fit(X_train)

        # Serialize and deserialize
        tree = pickle.loads(pickle.dumps(tree))

        precision = 0.0
        X_train /= np.linalg.norm(X_train, axis=1)[:, np.newaxis]
        for x_test in X_test:
            true_nns = np.argsort(-np.dot(X_train, x_test))[:10]
            nns = tree.query(x_test, 10)[:10]
            assert (nns < X_train.shape[0]).all()

            precision += len(set(nns) & set(true_nns)) / 10.0

        precision /= X_test.shape[0]

        assert precision >= expected_precision
Exemplo n.º 10
0
def test_max_size():

    X_train, X_test = _get_mnist_data()

    tree = RPForest(leaf_size=10, no_trees=10)
    tree.fit(X_train)

    for leaf_code, leaf_indices in tree.get_leaf_nodes():
        assert len(leaf_indices) < 10
Exemplo n.º 11
0
def test_max_size():

    X_train, X_test = _get_mnist_data()

    tree = RPForest(leaf_size=10, no_trees=10)
    tree.fit(X_train)

    for leaf_code, leaf_indices in tree.get_leaf_nodes():
        assert len(leaf_indices) < 10
Exemplo n.º 12
0
class RPForest(BaseANN):
    def __init__(self, leaf_size, n_trees):
        from rpforest import RPForest
        self.name = 'RPForest(leaf_size=%d, n_trees=%d)' % (leaf_size, n_trees)
        self._model = RPForest(leaf_size=leaf_size, no_trees=n_trees)

    def fit(self, X):
        self._model.fit(X)

    def query(self, v, n):
        return self._model.query(v, n)
Exemplo n.º 13
0
class RPForest(BaseANN):
    def __init__(self, leaf_size, n_trees):
        from rpforest import RPForest
        self.name = 'RPForest(leaf_size=%d, n_trees=%d)' % (leaf_size, n_trees)
        self._model = RPForest(leaf_size=leaf_size, no_trees=n_trees)

    def fit(self, X):
        self._model.fit(X)

    def query(self, v, n):
        return self._model.query(v, n)
Exemplo n.º 14
0
def test_multiple_fit_calls():

    X_train, X_test = _get_mnist_data()

    tree = RPForest(leaf_size=10, no_trees=10)
    tree.fit(X_train)

    assert len(tree.trees) == 10

    tree.fit(X_train)

    assert len(tree.trees) == 10
Exemplo n.º 15
0
def test_multiple_fit_calls():

    X_train, X_test = _get_mnist_data()

    tree = RPForest(leaf_size=10, no_trees=10)
    tree.fit(X_train)

    assert len(tree.trees) == 10

    tree.fit(X_train)

    assert len(tree.trees) == 10
Exemplo n.º 16
0
def test_encoding_mnist():

    X_train, X_test = _get_mnist_data()

    for no_trees, expected_precision in ((1, 0.05), (5, 0.3), (10, 0.5),
                                         (50, 0.9)):

        tree = RPForest(leaf_size=10, no_trees=no_trees)
        tree.fit(X_train)

        for x_train in X_train:
            encodings_0 = tree.encode(x_train)
            encodings_1 = tree.encode(x_train)
            assert encodings_0 == encodings_1

        tree = pickle.loads(pickle.dumps(tree))

        for x_train in X_train:
            encodings_0 = tree.encode(x_train)
            encodings_1 = tree.encode(x_train)
            assert encodings_0 == encodings_1
Exemplo n.º 17
0
def test_encoding_mnist():

    X_train, X_test = _get_mnist_data()

    for no_trees, expected_precision in ((1, 0.05),
                                         (5, 0.3),
                                         (10, 0.5),
                                         (50, 0.9)):

        tree = RPForest(leaf_size=10, no_trees=no_trees)
        tree.fit(X_train)

        for x_train in X_train:
            encodings_0 = tree.encode(x_train)
            encodings_1 = tree.encode(x_train)
            assert encodings_0 == encodings_1

        tree = pickle.loads(pickle.dumps(tree))

        for x_train in X_train:
            encodings_0 = tree.encode(x_train)
            encodings_1 = tree.encode(x_train)
            assert encodings_0 == encodings_1
Exemplo n.º 18
0
 def _get_random_projection_forest(self, leaf_size=20, no_trees=10):
     self.embed_feat = self.pca.transform(self.feat)
     rpf = RPForest(leaf_size=leaf_size, no_trees=no_trees)
     rpf.fit(self.embed_feat)
     return rpf
Exemplo n.º 19
0






## RPFOREST TEST
from rpforest import RPForest
leaf_size = 5
n_trees = 20
name = 'RPForest(leaf_size=%d, n_trees=%d)' % (leaf_size, n_trees)
model = RPForest(leaf_size=leaf_size, no_trees=n_trees)
#fitting
features = features.copy(order='C') #something related to Cython error
model.fit(features)
model.clear()
#indexing
for i, x in enumerate(features):
    t = Timer()
    with t:
        model.index(dict_feat[i], x.tolist())
#querying
for i in range(features.shape[0]):
    t = Timer()
    with t:
        results = model.get_candidates(features[i])
    print 'queried', dict_feat[i], 'results', results


Exemplo n.º 20
0
            fq.close()

            print('time query:', end_query - start_query)
            print('accuracy:', accuracy / len(xq) / k)

    quit()
    a = [350]
    b = [350]

    for leaf_size in a:
        for no_trees in b:
            fq = open('fq_RPForest.txt', 'a')
            if X.dtype != np.double:
                X = np.array(X).astype(np.double)
            t = RPForest(leaf_size, no_trees)
            t.fit(X)
            start_query = time.time()
            accuracy = 0
            for i in range(len(xq)):
                v = xq[i]
                if v.dtype != np.double:
                    v = np.array(v).astype(np.double)
                ans = t.query(v, k)
                for x in ans:
                    if x in gt[i]:
                        accuracy += 1

            end_query = time.time()
            print(leaf_size, no_trees)
            print(round(accuracy / len(xq) / k, 4),
                  ": ",