def test_sDBICAN_initial_parent_support_calculation(self): """ Test for method sDBICAN. Tests that the support initial clusters is calculated correctly. Cluster support is a measure of how far epsilon can be reduced while still forming a cluster. More specifically the sum of points retained within the cluster at each value of epsilon bellow the maximum value of epsilon of that cluster. In the case of top level clusters (i.e. those found at the initial values of epsilon: max_eps) that do not exist at max_eps - 1 their calculates support should be 0. They are still a valid cluster (and would be found with non-hierarchical clustering) but if they have child clusters then the child clusters are always selected. In this test when (max) epsilon = 3 the parent cluster is detected by the non-hierarchical version and is also detected by the hierarchical version but because it has 0 support it is discarded in favour of the child clusters. """ array = np.array([1, 3, 4, 5, 7, 8, 9, 11], dtype=int) answer_1 = np.fromiter([(0, 8)], dtype=SDBICAN._DTYPE_SLICE) answer_2 = np.fromiter([(1, 4), (4, 7)], dtype=SDBICAN._DTYPE_SLICE) npt.assert_array_equal(DBICAN.dbican(array, 3, 4), answer_1) npt.assert_array_equal(DBICAN.dbican(array, 3, 3), answer_1) npt.assert_array_equal(DBICAN.dbican(array, 3, 2), answer_2) npt.assert_array_equal(SDBICAN.sdbican(array, 3, 4), answer_1) npt.assert_array_equal(SDBICAN.sdbican(array, 3, 3), answer_2) npt.assert_array_equal(SDBICAN.sdbican(array, 3, 2), answer_2)
def test_labels(self): """Test for method labels""" udc_object = DBICAN(4, 5) input_array = np.array([ 1, 2, 21, 22, 22, 22, 24, 38, 54, 54, 55, 56, 65, 65, 66, 67, 68, 90 ], dtype=int) labels = np.array( [-1, -1, 0, 0, 0, 0, 0, -1, 1, 1, 1, 1, 2, 2, 2, 2, 2, -1], dtype=int) udc_object.fit(input_array) npt.assert_array_equal(udc_object.labels(), labels)
def test_cluster_extremities(self): """Test for method cluster_extremities""" model = DBICAN(4, 5) input_array = np.array([ 1, 2, 21, 22, 22, 22, 24, 38, 54, 54, 55, 56, 65, 65, 66, 67, 68, 90 ], dtype=int) cluster_extremities = [(21, 24), (54, 56), (65, 68)] model.fit(input_array) for result, answer in zip(model.cluster_extremities(), cluster_extremities): assert result == answer
def test_DBICAN_few_reads(self, array, slices): """ Test for method DBICAN with small arrays. Method udc should correctly handle an array of length 0 or greater. If the length of the array is less than 'n' then an empty array will always be returned. """ npt.assert_array_equal(DBICAN.dbican(array, 5, 5), slices)
def test_melt_slices(self, slices, melted_slices): """ Test for hidden method _melt_slices. Test includes following edge cases: * Long slice completely overlaps short loci: (15, 25) & (16, 17) & (19, 20) --> (15, 25) * Adjacent slices do not get merged: (7, 9) & (9, 12) --> (*, 9) & (9, *) * Slice may span a single value: (13, 14) --> (13, 14) """ npt.assert_array_equal(DBICAN._melt_slices(slices), melted_slices)
def test_clusters(self): """ Test for method clusters. """ model = DBICAN(4, 5) input_array = np.array([ 1, 2, 21, 22, 22, 22, 24, 38, 54, 54, 55, 56, 65, 65, 66, 67, 68, 90 ], dtype=int) cluster_arrays = [ np.array([21, 22, 22, 22, 24], dtype=int), np.array([54, 54, 55, 56], dtype=int), np.array([65, 65, 66, 67, 68], dtype=int) ] model.fit(input_array) for result, answer in zip(model.clusters(), cluster_arrays): npt.assert_array_equal(result, answer)
def test_subcluster(self): """ Test for hidden method _subcluster. """ array = np.array([ 1, 2, 21, 22, 22, 22, 24, 38, 54, 54, 55, 56, 65, 65, 66, 67, 68, 90 ], dtype=int) slices = np.array([(2, 6), (3, 7), (8, 12), (12, 16), (13, 17)], dtype=DBICAN._DTYPE_SLICE) npt.assert_array_equal(DBICAN._subcluster(array, 4, 5), slices)
def test_flat_cluster(self): """ Test for hidden method _cluster. Most edge cases should be caught in tests for component methods. """ sub_slices = np.array([ 1, 2, 21, 22, 22, 22, 24, 38, 54, 54, 55, 56, 65, 65, 66, 67, 68, 90 ], dtype=int) slices = np.array([(2, 7), (8, 12), (12, 17)], dtype=DBICAN._DTYPE_SLICE) npt.assert_array_equal(DBICAN._cluster(sub_slices, 4, 5), slices)
def test_fit(self): """ Test to run class via the method fit. Points passed to the fit method should be copied into new array. New copy of points should be sorted. Most edge cases should be caught in tests for component methods. """ model = DBICAN(4, 5) input_array = np.array([ 1, 2, 21, 22, 22, 22, 24, 38, 54, 54, 55, 56, 65, 65, 66, 67, 68, 90 ], dtype=int) answer_array = np.array([ 1, 2, 21, 22, 22, 22, 24, 38, 54, 54, 55, 56, 65, 65, 66, 67, 68, 90 ], dtype=int) answer_slices = np.array([(2, 7), (8, 12), (12, 17)], dtype=DBICAN._DTYPE_SLICE) model.fit(input_array) assert model.input_array is not input_array npt.assert_array_equal(model.input_array, answer_array) npt.assert_array_equal(model.slices, answer_slices)
15, 16, 18, 23, 25, 26, 27, 27, 28, 29, 30, 31, 33 ]) # variables mpts = 10 eps = 5 # DBSCAN labels cores, dbscan_labels = dbscan(points.reshape(-1, 1), eps=eps, min_samples=mpts) # DBSCAN* labels are same for core points but otherwise -1 dbscanx_labels = np.zeros(len(points), dtype=np.int) - 1 dbscanx_labels[cores] = dbscan_labels[cores] # DBICAN labels dbican = DBICAN(min_points=mpts, epsilon=eps) dbican.fit(points) dbican_labels = dbican.labels() # plotting labels_list = [dbscan_labels, dbscanx_labels, dbican_labels] name_list = [r'DBSCAN', r'DBSCAN*', 'DBICAN'] title = r'Comparison of Algorithms with $m_{pts}=10$ and $\varepsilon=5$' legend_labels = ['.', '1', '2'] x_max_ofset = 2 height = 2.5 width = 6 n_row = 3 n_col = 1 rcParams['figure.figsize'] = width, height
def test_sorted_ascending(self, array, answer): """ Test for hidden method _sorted_ascending. """ assert DBICAN._sorted_ascending(array) == answer