示例#1
0
def test_distance_matrix():
    metric = dipymetric.SumPointwiseEuclideanMetric()

    for dtype in [np.int32, np.int64, np.float32, np.float64]:
        # Compute distances of all tuples spawn by the Cartesian product
        # of `data` with itself.
        data = (np.random.rand(4, 10, 3) * 10).astype(dtype)
        D = dipymetric.distance_matrix(metric, data)
        assert_equal(D.shape, (len(data), len(data)))
        assert_array_equal(np.diag(D), np.zeros(len(data)))

        if metric.is_order_invariant:
            # Distance matrix should be symmetric
            assert_array_equal(D, D.T)

        for i in range(len(data)):
            for j in range(len(data)):
                assert_equal(D[i, j], dipymetric.dist(metric, data[i],
                                                      data[j]))

        # Compute distances of all tuples spawn by the Cartesian product
        # of `data` with `data2`.
        data2 = (np.random.rand(3, 10, 3) * 10).astype(dtype)
        D = dipymetric.distance_matrix(metric, data, data2)
        assert_equal(D.shape, (len(data), len(data2)))

        for i in range(len(data)):
            for j in range(len(data2)):
                assert_equal(D[i, j], dipymetric.dist(metric, data[i],
                                                      data2[j]))
示例#2
0
def test_quickbundles_empty_data():
    threshold = 10
    metric = dipymetric.SumPointwiseEuclideanMetric()
    clusters = quickbundles([], metric, threshold)
    assert_equal(len(clusters), 0)
    assert_equal(len(clusters.centroids), 0)

    clusters = quickbundles([], metric, threshold, ordering=[])
    assert_equal(len(clusters), 0)
    assert_equal(len(clusters.centroids), 0)
示例#3
0
def test_quickbundles_2D():
    # Test quickbundles clustering using 2D points and the Eulidean metric.
    rng = np.random.RandomState(42)
    data = []
    data += [rng.randn(1, 2) + np.array([0, 0]) for i in range(1)]
    data += [rng.randn(1, 2) + np.array([10, 10]) for i in range(2)]
    data += [rng.randn(1, 2) + np.array([-10, 10]) for i in range(3)]
    data += [rng.randn(1, 2) + np.array([10, -10]) for i in range(4)]
    data += [rng.randn(1, 2) + np.array([-10, -10]) for i in range(5)]
    data = np.array(data, dtype=dtype)

    clusters_truth = [[0], [1, 2], [3, 4, 5], [6, 7, 8, 9],
                      [10, 11, 12, 13, 14]]

    # # Uncomment the following to visualize this test
    # import pylab as plt
    # plt.plot(*zip(*data[0:1, 0]), linestyle='None', marker='s')
    # plt.plot(*zip(*data[1:3, 0]), linestyle='None', marker='o')
    # plt.plot(*zip(*data[3:6, 0]), linestyle='None', marker='+')
    # plt.plot(*zip(*data[6:10, 0]), linestyle='None', marker='.')
    # plt.plot(*zip(*data[10:, 0]), linestyle='None', marker='*')
    # plt.show()

    # Theorically using a threshold above the following value will not
    # produce expected results.
    threshold = np.sqrt(2 * (10**2)) - np.sqrt(2)
    metric = dipymetric.SumPointwiseEuclideanMetric()
    ordering = np.arange(len(data))
    for i in range(100):
        rng.shuffle(ordering)
        clusters = quickbundles(data, metric, threshold, ordering=ordering)

        # Check if clusters are the same as 'clusters_truth'
        for cluster in clusters:
            # Find the corresponding cluster in 'clusters_truth'
            for cluster_truth in clusters_truth:
                if cluster_truth[0] in cluster.indices:
                    assert_equal(sorted(cluster.indices),
                                 sorted(cluster_truth))

    # Cluster each cluster again using a small threshold
    for cluster in clusters:
        subclusters = quickbundles(data,
                                   metric,
                                   threshold=0,
                                   ordering=cluster.indices)
        assert_equal(len(subclusters), len(cluster))
        assert_equal(sorted(itertools.chain(*subclusters)),
                     sorted(cluster.indices))

    # A very large threshold should produce only 1 cluster
    clusters = quickbundles(data, metric, threshold=np.inf)
    assert_equal(len(clusters), 1)
    assert_equal(len(clusters[0]), len(data))
    assert_array_equal(clusters[0].indices, range(len(data)))

    # A very small threshold should produce only N clusters where N=len(data)
    clusters = quickbundles(data, metric, threshold=0)
    assert_equal(len(clusters), len(data))
    assert_array_equal(list(map(len, clusters)), np.ones(len(data)))
    assert_array_equal(
        [idx for cluster in clusters for idx in cluster.indices],
        range(len(data)))