Exemplo n.º 1
0
def test_sample_training():

    X_train, X_test = _get_mnist_data()

    for no_trees, expected_precision in ((1, 0.05),
                                         (5, 0.3),
                                         (10, 0.5),
                                         (50, 0.9)):

        tree = RPForest(leaf_size=10, no_trees=no_trees)
        # Fit on quarter of data
        X_sample = X_train[:X_train.shape[0] / 4]
        tree.fit(X_sample)
        # Clear and index everything
        tree.clear()
        for i, x in enumerate(X_train):
            tree.index(i, x)
        tree._X = X_train

        precision = 0.0
        X_train /= np.linalg.norm(X_train, axis=1)[:, np.newaxis]
        for x_test in X_test:
            true_nns = np.argsort(-np.dot(X_train, x_test))[:10]
            nns = tree.query(x_test, 10)[:10]

            precision += len(set(nns) & set(true_nns)) / 10.0

        precision /= X_test.shape[0]

        assert precision >= expected_precision
Exemplo n.º 2
0
def test_sample_training():

    X_train, X_test = _get_mnist_data()

    for no_trees, expected_precision in ((1, 0.05), (5, 0.3), (10, 0.5),
                                         (50, 0.9)):

        tree = RPForest(leaf_size=10, no_trees=no_trees)
        # Fit on quarter of data
        X_sample = X_train[:X_train.shape[0] / 4]
        tree.fit(X_sample)
        # Clear and index everything
        tree.clear()
        for i, x in enumerate(X_train):
            tree.index(i, x)
        tree._X = X_train

        precision = 0.0
        X_train /= np.linalg.norm(X_train, axis=1)[:, np.newaxis]
        for x_test in X_test:
            true_nns = np.argsort(-np.dot(X_train, x_test))[:10]
            nns = tree.query(x_test, 10)[:10]

            precision += len(set(nns) & set(true_nns)) / 10.0

        precision /= X_test.shape[0]

        assert precision >= expected_precision