def test_relabel_B_clusters_each_with_one_point(self):

        # set-up
        m = matching_matrix(10)

        j_1, j_2 = m.relabel_B(0, 1, 0)

        self.assertEqual(0, j_1)
        self.assertEqual(1, j_2)

        # there are 10 clusters 0-9 before the merge_cols
        # after the merge we create cluster 10 which should map to 1
        # in the row dictionary as we merge 0 into 1 arbitrarily taking the second
        # cluster to be the largest
        self.assertDictEqual({10: 1}, m.update_B)
    def test_relabel_B_clusters_first_with_2_second_with_1(self):

        # set-up
        m = matching_matrix(10)

        # merge 0 and 1 to create cluster 10 in step 0 and store it in column 1
        m.merge_columns(0, 1, 0)

        # merge 10 (stored in column 1) and 2 to create cluster 11 in step 1
        j_1, j_2 = m.relabel_B(10, 2, 1)

        self.assertEqual(2, j_1)  # cluster 2 is the smallest
        self.assertEqual(
            1, j_2)  # cluster 10 is the largest and is stored in column 1

        self.assertDictEqual({11: 1}, m.update_B)
    def test_relabel_A_clusters_first_with_1_second_with_2(self):

        # set-up
        m = matching_matrix(10)

        # merge 0 and 1 to create cluster 10 in step 0 and store it in column 1
        m.merge_rows(0, 1, 0)

        # merge 2 and 10 (stored in column 1) to create cluster 11 in step 1
        i_1, i_2 = m.relabel_A(10, 2, 1)

        self.assertEqual(2, i_1)  # cluster 2 is the smallest
        self.assertEqual(
            1, i_2)  #c cluster 10 is the largest and is stored in column 1

        self.assertDictEqual({11: 1}, m.update_A)
    def test_init_correct_defaults(self):

        # Arrange / Act
        m = matching_matrix(4)

        # Assert
        expected_rows = {0: {0: 1}, 1: {1: 1}, 2: {2: 1}, 3: {3: 1}}
        self.assertDictEqual(expected_rows, m.rows)

        expected_columns = {0: {0: 1}, 1: {1: 1}, 2: {2: 1}, 3: {3: 1}}
        self.assertDictEqual(expected_columns, m.columns)

        self.assertDictEqual({0: 1, 1: 1, 2: 1, 3: 1}, m.rtot)
        self.assertDictEqual({0: 1, 1: 1, 2: 1, 3: 1}, m.ctot)
        self.assertEqual(4, m.n)

        self.assertEqual(0, m.T)
        self.assertEqual(0, m.P)
        self.assertEqual(0, m.Q)

        self.assertDictEqual({}, m.update_A)
        self.assertDictEqual({}, m.update_B)
Пример #5
0
    def TPQ_linkages(self, A, B):
        """
    Calculates statistics on two hierarchical clusterings of the same set of objects 
    which can be used to calculate indices to compare two hierarchical clusterings

    Parameters
    ----------
    A : ndarray
        A :math:`(n-1)` by 4 matrix encoding the linkage
        (hierarchical clustering).  See ``linkage`` documentation
        for more information on its form.
    B : A second :math:`(n-1)` by 4 matrix encoding the linkage
        (hierarchical clustering).
    
    Returns
    -------
    T : ndarray
        A vector of size :math:'n-1' where the ``k``'th element
        contains the number of pairs of objects placed into the 
        same cluster for both hierarchical clusterings after the 
        clusters in the ``k``'th row have been merged.  
    P : ndarray 
        A vector of size :math:'n-1' where the ``k``'th element 
        contains the number of pairs of objects placed into the
        same cluster after the clusters in the ``k``th row 
        for the hierarchical clustering A only.
    Q : ndarray 
        A vector of size :math:'n-1' where the ``k``'th element
        contains the number of pairs of objects placed into the
        same cluster after the clusters in the ``k``th row 
        for the hierarchical clustering B only.
    """

        # Convert to array if not already.
        A = np.array(A, 'double')
        B = np.array(B, 'double')

        # Performs checks
        is_valid_linkage(A, throw=True)
        is_valid_linkage(B, throw=True)

        n = len(A) + 1
        n2 = len(B) + 1

        if n != n2:
            raise ValueError(
                "The hierarchical clusterings must be of the same size")

        self.T = np.zeros(n - 2)
        self.P = np.zeros(n - 2)
        self.Q = np.zeros(n - 2)
        self.n = n

        # Creates a new matching matrix (identity of size n)
        m = matching_matrix(n)

        # Merges the required clusters as specified by the input files
        for k, (rows_A, rows_B) in enumerate(zip(A, B)):
            if k != n - 2:
                self.T[k], self.P[k], self.Q[k] = m.merge(
                    rows_A[0], rows_A[1], rows_B[0], rows_B[1], k)
    def test_worked_example(self):

        #================
        #-------A--------
        #================
        #  i1, i2

        #[  3,  4 ]
        #[  1,  5 ]
        #[  0,  2 ]
        #[  6,  7 ]

        #================
        #-------B--------
        #================
        #  j1, j2, j_size

        #[  3,  4,  2]
        #[  0,  2,  2]
        #[  1,  5,  3]
        #[  6,  7,  5]

        # Arrange
        m = matching_matrix(5)
        ''' Merge 0 in A
        0, 1, 2, 3, 4
    0 [ 1, 0, 0, 0, 0] 1
    1 [ 0, 1, 0, 0, 0] 1
    2 [ 0, 0, 1, 0, 0] 1
    5 [ 0, 0, 0, 1, 1] 2
      1  1  1  1  1
    
    '''
        m.merge_rows(i_1=3, i_2=4, k=0)

        self.assertEqual(0, m.T)
        self.assertEqual(1, m.P)
        self.assertEqual(0, m.Q)
        ''' Merge 0 in B
      0, 1, 2, 5
    0 [ 1, 0, 0, 0 ] 1
    1 [ 0, 1, 0, 0 ] 1
    2 [ 0, 0, 1, 0 ] 1
    5 [ 0, 0, 0, 2 ] 2
      1, 1, 1, 2
    '''

        m.merge_columns(j_1=3, j_2=4, k=0)

        self.assertEqual(1, m.T)
        self.assertEqual(1, m.P)
        self.assertEqual(1, m.Q)
        ''' Merge 1 in A
        0, 1, 2, 5
    0 [ 1, 0, 0, 0 ] 1
    2 [ 0, 0, 1, 0 ] 1
    6 [ 0, 1, 0, 2 ] 3
        1, 1, 1, 2 
    '''
        m.merge_rows(i_1=1, i_2=5, k=1)

        self.assertEqual(1, m.T)
        self.assertEqual(3, m.P)
        self.assertEqual(1, m.Q)
        ''' Merge 1 in B
        6  1  5 
    0 [ 1, 0, 0 ] 1
    2 [ 1, 0, 0 ] 1
    6 [ 0, 1, 2 ] 3
        2, 1, 2
    '''
        m.merge_columns(j_1=0, j_2=2, k=1)

        self.assertEqual(1, m.T)
        self.assertEqual(3, m.P)
        self.assertEqual(2, m.Q)
        ''' Merge 2 in A
        6  1  5 
    7 [ 2, 0, 0 ] 2
    6 [ 0, 1, 2 ] 3
        2, 1, 2
    '''
        m.merge_rows(i_1=0, i_2=2, k=2)

        self.assertEqual(2, m.T)
        self.assertEqual(4, m.P)
        self.assertEqual(2, m.Q)
        ''' Merge 2 in B
        6  7 
    7 [ 2, 0 ] 2
    6 [ 0, 3 ] 3
        2, 3
    '''
        m.merge_columns(j_1=1, j_2=5, k=2)

        self.assertEqual(4, m.T)
        self.assertEqual(4, m.P)
        self.assertEqual(4, m.Q)
        ''' Merge 3 in A
        6  7 
    8 [ 2, 3 ] 5
        2, 3
    '''

        m.merge_rows(i_1=6, i_2=7, k=3)

        self.assertEqual(4, m.T)
        self.assertEqual(10, m.P)
        self.assertEqual(4, m.Q)
        ''' Merge 3 in B
        8
    8 [ 5 ] 5
        5
    '''
        m.merge_columns(j_1=6, j_2=7, k=3)

        self.assertEqual(10, m.T)
        self.assertEqual(10, m.P)
        self.assertEqual(10, m.Q)