def test_update_linkage_matrix(self):
        """ create a consensus matrix by sampling a synthesized set of clusters
            assert that the clustering is equivalent
        """
        n_samples = 11
        n_clusters = 3
        cluster_set = np.int_(np.ones(n_samples))
        for r in range(0, n_samples):
            cluster_set[r] = int(np.random.randint(n_clusters))

        n_repeats = 100
        n_test_perm = 5
        n_test_rows = n_samples
        I = np.zeros((n_test_rows, n_test_rows))
        M = np.zeros((n_test_rows, n_test_rows))

        for r in range(0, n_repeats):
            f_perm = np.random.permutation(n_test_rows)
            f_perm = f_perm[0:n_test_perm]
            cluster_p = cluster_set[f_perm]
            I = kn.update_indicator_matrix(f_perm, I)
            M = kn.update_linkage_matrix(cluster_p, f_perm, M)

        CC = M / np.maximum(I, 1e-15)

        for s in range(0, n_clusters):
            s_dex = cluster_set == s
            c_c = CC[s_dex, :]
            c_c = c_c[:, s_dex]
            n_check = c_c - 1
            self.assertEqual(n_check.sum(),
                             0,
                             msg='cluster grouping exception')
def get_linkage_matrix(run_parameters, linkage_matrix, indicator_matrix):
    """ read bootstrap temp_h* and temp_p* files, compute and add the linkage_matrix.

    Args:
        run_parameters: parameter set dictionary.
        linkage_matrix: connectivity matrix from initialization or previous call.

    Returns:
        linkage_matrix: summed with "temp_h*" files in run_parameters["tmp_directory"].
    """
    if run_parameters['processing_method'] == 'distribute':
        tmp_dir = os.path.join(
            run_parameters['cluster_shared_volumn'],
            os.path.basename(os.path.normpath(
                run_parameters['tmp_directory'])))
    else:
        tmp_dir = run_parameters["tmp_directory"]

    dir_list = os.listdir(tmp_dir)
    for tmp_f in dir_list:
        if tmp_f[0:6] == 'tmp_p_':
            pname = os.path.join(tmp_dir, tmp_f)
            hname = os.path.join(tmp_dir, 'tmp_h_' + tmp_f[6:len(tmp_f)])

            sample_permutation = np.load(pname)
            h_mat = np.load(hname)

            linkage_matrix = kn.update_linkage_matrix(h_mat,
                                                      sample_permutation,
                                                      linkage_matrix)
            indicator_matrix = kn.update_indicator_matrix(
                sample_permutation, indicator_matrix)

    return linkage_matrix, indicator_matrix
    def test_perform_kmeans(self):
        """ assert that the kmeans sets of a known cluster as consensus matrix is the
            same as the known cluster
        """
        n_samples = 11
        n_clusters = 3
        cluster_set = np.int_(np.ones(n_samples))
        for r in range(0, n_samples):
            cluster_set[r] = int(np.random.randint(n_clusters))

        n_repeats = 33
        n_test_perm = 5
        n_test_rows = n_samples
        I = np.zeros((n_test_rows, n_test_rows))
        M = np.zeros((n_test_rows, n_test_rows))

        for r in range(0, n_repeats):
            f_perm = np.random.permutation(n_test_rows)
            f_perm = f_perm[0:n_test_perm]
            cluster_p = cluster_set[f_perm]
            I = kn.update_indicator_matrix(f_perm, I)
            M = kn.update_linkage_matrix(cluster_p, f_perm, M)

        CC = M / np.maximum(I, 1e-15)

        label_set = kn.perform_kmeans(CC, n_clusters)

        self.assertTrue(sets_a_eq_b(cluster_set, label_set),
                        msg='kemans sets differ from cluster')
Exemplo n.º 4
0
    def test_update_indicator_matrix(self):
        """ assert that the indicator matrix is not loosing any digits
            Note: correctness test considered as part of linkage matrix test
        """
        n_repeats = 10
        n_test_perm = 11
        n_test_rows = 77
        A = np.zeros((n_test_rows, n_test_rows))
        running_sum = 0
        for r in range(0, n_repeats):
            running_sum += n_test_perm**2
            f_perm = np.random.permutation(n_test_rows)
            f_perm = f_perm[0:n_test_perm]
            A = kn.update_indicator_matrix(f_perm, A)

        self.assertEqual(A.sum(), running_sum, msg='sum of elements exception')