def test_distance_matrix(): metric = dipymetric.SumPointwiseEuclideanMetric() for dtype in [np.int32, np.int64, np.float32, np.float64]: # Compute distances of all tuples spawn by the Cartesian product # of `data` with itself. data = (np.random.rand(4, 10, 3) * 10).astype(dtype) D = dipymetric.distance_matrix(metric, data) assert_equal(D.shape, (len(data), len(data))) assert_array_equal(np.diag(D), np.zeros(len(data))) if metric.is_order_invariant: # Distance matrix should be symmetric assert_array_equal(D, D.T) for i in range(len(data)): for j in range(len(data)): assert_equal(D[i, j], dipymetric.dist(metric, data[i], data[j])) # Compute distances of all tuples spawn by the Cartesian product # of `data` with `data2`. data2 = (np.random.rand(3, 10, 3) * 10).astype(dtype) D = dipymetric.distance_matrix(metric, data, data2) assert_equal(D.shape, (len(data), len(data2))) for i in range(len(data)): for j in range(len(data2)): assert_equal(D[i, j], dipymetric.dist(metric, data[i], data2[j]))
def test_quickbundles_empty_data(): threshold = 10 metric = dipymetric.SumPointwiseEuclideanMetric() clusters = quickbundles([], metric, threshold) assert_equal(len(clusters), 0) assert_equal(len(clusters.centroids), 0) clusters = quickbundles([], metric, threshold, ordering=[]) assert_equal(len(clusters), 0) assert_equal(len(clusters.centroids), 0)
def test_quickbundles_2D(): # Test quickbundles clustering using 2D points and the Eulidean metric. rng = np.random.RandomState(42) data = [] data += [rng.randn(1, 2) + np.array([0, 0]) for i in range(1)] data += [rng.randn(1, 2) + np.array([10, 10]) for i in range(2)] data += [rng.randn(1, 2) + np.array([-10, 10]) for i in range(3)] data += [rng.randn(1, 2) + np.array([10, -10]) for i in range(4)] data += [rng.randn(1, 2) + np.array([-10, -10]) for i in range(5)] data = np.array(data, dtype=dtype) clusters_truth = [[0], [1, 2], [3, 4, 5], [6, 7, 8, 9], [10, 11, 12, 13, 14]] # # Uncomment the following to visualize this test # import pylab as plt # plt.plot(*zip(*data[0:1, 0]), linestyle='None', marker='s') # plt.plot(*zip(*data[1:3, 0]), linestyle='None', marker='o') # plt.plot(*zip(*data[3:6, 0]), linestyle='None', marker='+') # plt.plot(*zip(*data[6:10, 0]), linestyle='None', marker='.') # plt.plot(*zip(*data[10:, 0]), linestyle='None', marker='*') # plt.show() # Theorically using a threshold above the following value will not # produce expected results. threshold = np.sqrt(2 * (10**2)) - np.sqrt(2) metric = dipymetric.SumPointwiseEuclideanMetric() ordering = np.arange(len(data)) for i in range(100): rng.shuffle(ordering) clusters = quickbundles(data, metric, threshold, ordering=ordering) # Check if clusters are the same as 'clusters_truth' for cluster in clusters: # Find the corresponding cluster in 'clusters_truth' for cluster_truth in clusters_truth: if cluster_truth[0] in cluster.indices: assert_equal(sorted(cluster.indices), sorted(cluster_truth)) # Cluster each cluster again using a small threshold for cluster in clusters: subclusters = quickbundles(data, metric, threshold=0, ordering=cluster.indices) assert_equal(len(subclusters), len(cluster)) assert_equal(sorted(itertools.chain(*subclusters)), sorted(cluster.indices)) # A very large threshold should produce only 1 cluster clusters = quickbundles(data, metric, threshold=np.inf) assert_equal(len(clusters), 1) assert_equal(len(clusters[0]), len(data)) assert_array_equal(clusters[0].indices, range(len(data))) # A very small threshold should produce only N clusters where N=len(data) clusters = quickbundles(data, metric, threshold=0) assert_equal(len(clusters), len(data)) assert_array_equal(list(map(len, clusters)), np.ones(len(data))) assert_array_equal( [idx for cluster in clusters for idx in cluster.indices], range(len(data)))