示例#1
0
    def test_sDBICAN_initial_parent_support_calculation(self):
        """
        Test for method sDBICAN.
        Tests that the support initial clusters is calculated correctly.

        Cluster support is a measure of how far epsilon can be reduced while
        still forming a cluster.
        More specifically the sum of points retained within the cluster at
        each value of epsilon
        bellow the maximum value of epsilon of that cluster.
        In the case of top level clusters (i.e. those found at the initial
        values of epsilon: max_eps)
        that do not exist at max_eps - 1 their calculates support should be 0.
        They are still a valid cluster (and would be found with
        non-hierarchical clustering) but if they
        have child clusters then the child clusters are always selected.
        In this test when (max) epsilon = 3 the parent cluster is detected by
        the non-hierarchical
        version and is also detected by the hierarchical version but because
        it has 0 support it
        is discarded in favour of the child clusters.
        """

        array = np.array([1, 3, 4, 5, 7, 8, 9, 11], dtype=int)
        answer_1 = np.fromiter([(0, 8)], dtype=SDBICAN._DTYPE_SLICE)
        answer_2 = np.fromiter([(1, 4), (4, 7)], dtype=SDBICAN._DTYPE_SLICE)

        npt.assert_array_equal(DBICAN.dbican(array, 3, 4), answer_1)
        npt.assert_array_equal(DBICAN.dbican(array, 3, 3), answer_1)
        npt.assert_array_equal(DBICAN.dbican(array, 3, 2), answer_2)

        npt.assert_array_equal(SDBICAN.sdbican(array, 3, 4), answer_1)
        npt.assert_array_equal(SDBICAN.sdbican(array, 3, 3), answer_2)
        npt.assert_array_equal(SDBICAN.sdbican(array, 3, 2), answer_2)
示例#2
0
 def test_labels(self):
     """Test for method labels"""
     udc_object = DBICAN(4, 5)
     input_array = np.array([
         1, 2, 21, 22, 22, 22, 24, 38, 54, 54, 55, 56, 65, 65, 66, 67, 68,
         90
     ],
                            dtype=int)
     labels = np.array(
         [-1, -1, 0, 0, 0, 0, 0, -1, 1, 1, 1, 1, 2, 2, 2, 2, 2, -1],
         dtype=int)
     udc_object.fit(input_array)
     npt.assert_array_equal(udc_object.labels(), labels)
示例#3
0
 def test_cluster_extremities(self):
     """Test for method cluster_extremities"""
     model = DBICAN(4, 5)
     input_array = np.array([
         1, 2, 21, 22, 22, 22, 24, 38, 54, 54, 55, 56, 65, 65, 66, 67, 68,
         90
     ],
                            dtype=int)
     cluster_extremities = [(21, 24), (54, 56), (65, 68)]
     model.fit(input_array)
     for result, answer in zip(model.cluster_extremities(),
                               cluster_extremities):
         assert result == answer
示例#4
0
 def test_DBICAN_few_reads(self, array, slices):
     """
     Test for method DBICAN with small arrays.
     Method udc should correctly handle an array of length 0 or greater.
     If the length of the array is less than 'n' then an empty array will
     always be returned.
     """
     npt.assert_array_equal(DBICAN.dbican(array, 5, 5), slices)
示例#5
0
 def test_melt_slices(self, slices, melted_slices):
     """
     Test for hidden method _melt_slices.
     Test includes following edge cases:
      * Long slice completely overlaps short loci: (15, 25) & (16, 17) & (19, 20) --> (15, 25)
      * Adjacent slices do not get merged: (7, 9) & (9, 12) -->  (*, 9) & (9, *)
      * Slice may span a single value: (13, 14) --> (13, 14)
     """
     npt.assert_array_equal(DBICAN._melt_slices(slices), melted_slices)
示例#6
0
 def test_clusters(self):
     """
     Test for method clusters.
     """
     model = DBICAN(4, 5)
     input_array = np.array([
         1, 2, 21, 22, 22, 22, 24, 38, 54, 54, 55, 56, 65, 65, 66, 67, 68,
         90
     ],
                            dtype=int)
     cluster_arrays = [
         np.array([21, 22, 22, 22, 24], dtype=int),
         np.array([54, 54, 55, 56], dtype=int),
         np.array([65, 65, 66, 67, 68], dtype=int)
     ]
     model.fit(input_array)
     for result, answer in zip(model.clusters(), cluster_arrays):
         npt.assert_array_equal(result, answer)
示例#7
0
 def test_subcluster(self):
     """
     Test for hidden method _subcluster.
     """
     array = np.array([
         1, 2, 21, 22, 22, 22, 24, 38, 54, 54, 55, 56, 65, 65, 66, 67, 68,
         90
     ],
                      dtype=int)
     slices = np.array([(2, 6), (3, 7), (8, 12), (12, 16), (13, 17)],
                       dtype=DBICAN._DTYPE_SLICE)
     npt.assert_array_equal(DBICAN._subcluster(array, 4, 5), slices)
示例#8
0
 def test_flat_cluster(self):
     """
     Test for hidden method _cluster.
     Most edge cases should be caught in tests for component methods.
     """
     sub_slices = np.array([
         1, 2, 21, 22, 22, 22, 24, 38, 54, 54, 55, 56, 65, 65, 66, 67, 68,
         90
     ],
                           dtype=int)
     slices = np.array([(2, 7), (8, 12), (12, 17)],
                       dtype=DBICAN._DTYPE_SLICE)
     npt.assert_array_equal(DBICAN._cluster(sub_slices, 4, 5), slices)
示例#9
0
 def test_fit(self):
     """
     Test to run class via the method fit.
     Points passed to the fit method should be copied into new array.
     New copy of points should be sorted.
     Most edge cases should be caught in tests for component methods.
     """
     model = DBICAN(4, 5)
     input_array = np.array([
         1, 2, 21, 22, 22, 22, 24, 38, 54, 54, 55, 56, 65, 65, 66, 67, 68,
         90
     ],
                            dtype=int)
     answer_array = np.array([
         1, 2, 21, 22, 22, 22, 24, 38, 54, 54, 55, 56, 65, 65, 66, 67, 68,
         90
     ],
                             dtype=int)
     answer_slices = np.array([(2, 7), (8, 12), (12, 17)],
                              dtype=DBICAN._DTYPE_SLICE)
     model.fit(input_array)
     assert model.input_array is not input_array
     npt.assert_array_equal(model.input_array, answer_array)
     npt.assert_array_equal(model.slices, answer_slices)
示例#10
0
    15, 16, 18, 23, 25, 26, 27, 27, 28, 29, 30, 31, 33
])

# variables
mpts = 10
eps = 5

# DBSCAN labels
cores, dbscan_labels = dbscan(points.reshape(-1, 1), eps=eps, min_samples=mpts)

# DBSCAN* labels are same for core points but otherwise -1
dbscanx_labels = np.zeros(len(points), dtype=np.int) - 1
dbscanx_labels[cores] = dbscan_labels[cores]

# DBICAN labels
dbican = DBICAN(min_points=mpts, epsilon=eps)
dbican.fit(points)
dbican_labels = dbican.labels()

# plotting
labels_list = [dbscan_labels, dbscanx_labels, dbican_labels]
name_list = [r'DBSCAN', r'DBSCAN*', 'DBICAN']
title = r'Comparison of Algorithms with $m_{pts}=10$ and $\varepsilon=5$'
legend_labels = ['.', '1', '2']
x_max_ofset = 2
height = 2.5
width = 6
n_row = 3
n_col = 1

rcParams['figure.figsize'] = width, height
示例#11
0
 def test_sorted_ascending(self, array, answer):
     """
     Test for hidden method _sorted_ascending.
     """
     assert DBICAN._sorted_ascending(array) == answer