def run_cc_net_nmf_clusters_worker(network_mat, spreadsheet_mat, lap_dag,
                                   lap_val, run_parameters, sample):
    """Worker to execute net_nmf_clusters in a single process

    Args:
        network_mat: genes x genes symmetric matrix.
        spreadsheet_mat: genes x samples matrix.
        lap_dag: laplacian matrix component, L = lap_dag - lap_val.
        lap_val: laplacian matrix component, L = lap_dag - lap_val.
        run_parameters: dictionay of run-time parameters.
        sample: each single loop.

    Returns:
        None
    """

    np.random.seed(sample)
    rows_sampling_fraction = run_parameters["rows_sampling_fraction"]
    cols_sampling_fraction = run_parameters["cols_sampling_fraction"]

    spreadsheet_mat,       \
    sample_permutation     = kn.sample_a_matrix( spreadsheet_mat
                                               , rows_sampling_fraction
                                               , cols_sampling_fraction )

    spreadsheet_mat,       \
    iterations             = kn.smooth_matrix_with_rwr(spreadsheet_mat, network_mat, run_parameters)
    spreadsheet_mat = kn.get_quantile_norm_matrix(spreadsheet_mat)

    h_mat = kn.perform_net_nmf(spreadsheet_mat, lap_val, lap_dag,
                               run_parameters)

    save_a_clustering_to_tmp(h_mat, sample_permutation, run_parameters, sample)
示例#2
0
def run_cc_kmeans_clusters_worker(spreadsheet_mat, run_parameters, sample):
    #-----------------------------------------------------
    """Worker to execute kmeans in a single process

    Args:
        spreadsheet_mat: genes x samples matrix.
        run_parameters: dictionary of run-time parameters.
        sample: each loops.

    Returns:
        None
    """

    import knpackage.toolbox as kn
    import numpy as np

    np.random.seed(sample)
    rows_sampling_fraction = run_parameters["rows_sampling_fraction"]
    cols_sampling_fraction = run_parameters["cols_sampling_fraction"]
    number_of_clusters = run_parameters["number_of_clusters"]
    spreadsheet_mat, sample_permutation = kn.sample_a_matrix(
        spreadsheet_mat, rows_sampling_fraction, cols_sampling_fraction)

    spreadsheet_mat_T = spreadsheet_mat.T
    labels = kn.perform_kmeans(spreadsheet_mat_T, number_of_clusters)
    h_mat = labels_to_hmat(labels, number_of_clusters)
    kn.save_a_clustering_to_tmp(h_mat, sample_permutation, run_parameters,
                                sample)
def run_cc_nmf_clusters_worker(spreadsheet_mat, run_parameters, sample):
    """Worker to execute nmf_clusters in a single process

    Args:
        spreadsheet_mat: genes x samples matrix.
        run_parameters: dictionary of run-time parameters.
        sample: each loops.

    Returns:
        None

    """

    np.random.seed(sample)

    rows_sampling_fraction = run_parameters["rows_sampling_fraction"]
    cols_sampling_fraction = run_parameters["cols_sampling_fraction"]

    spreadsheet_mat,       \
    sample_permutation     = kn.sample_a_matrix( spreadsheet_mat
                                               , rows_sampling_fraction
                                               , cols_sampling_fraction )

    h_mat = kn.perform_nmf(spreadsheet_mat, run_parameters)

    save_a_clustering_to_tmp(h_mat, sample_permutation, run_parameters, sample)
示例#4
0
def run_cc_hclust_clusters_worker(spreadsheet_mat, run_parameters, sample):
    #-----------------------------------------------------
    """Worker to execute hclust in a single process

    Args:
        spreadsheet_mat: genes x samples matrix.
        run_parameters: dictionary of run-time parameters.
        sample: each loops.

    Returns:
        None

    """

    import knpackage.toolbox as kn
    import numpy as np

    np.random.seed(sample)

    rows_sampling_fraction = run_parameters["rows_sampling_fraction"]
    cols_sampling_fraction = run_parameters["cols_sampling_fraction"]
    number_of_clusters = run_parameters["number_of_clusters"]
    affinity_metric = run_parameters['affinity_metric']
    linkage_criterion = run_parameters['linkage_criterion']

    spreadsheet_mat, sample_permutation = kn.sample_a_matrix(
        spreadsheet_mat, rows_sampling_fraction, cols_sampling_fraction)

    labels, _ = perform_hclust(spreadsheet_mat, number_of_clusters,
                               affinity_metric, linkage_criterion)
    h_mat = labels_to_hmat(labels, number_of_clusters)
    kn.save_a_clustering_to_tmp(h_mat, sample_permutation, run_parameters,
                                sample)
    def test_sample_a_matrix(self):
        """ assert that the random sample is of the propper size, the
            permutation points to the correct columns and that the number of
            rows set to zero is correct.
        """
        n_test_rows = 11
        n_test_cols = 5
        pct_smpl = 0.6
        n_zero_rows = int(np.round(n_test_rows * (1 - pct_smpl)))
        n_smpl_cols = int(np.round(n_test_cols * pct_smpl))
        epsilon_sum = max(n_test_rows, n_test_cols) * 1e-15
        A = np.random.rand(n_test_rows, n_test_cols) + epsilon_sum
        B, P = kn.sample_a_matrix(A, pct_smpl, pct_smpl)
        self.assertEqual(B.shape[1],
                         P.size,
                         msg='permutation size not equal columns')
        self.assertEqual(P.size,
                         n_smpl_cols,
                         msg='number of sample columns exception')
        perm_err_sum = 0
        n_zero_err_sum = 0
        B_col = 0
        for A_col in P:
            n_zeros = (np.int_(B[:, B_col] == 0)).sum()
            if n_zeros != n_zero_rows:
                n_zero_err_sum += 1
            C = A[:, A_col] - B[:, B_col]
            C[B[:, B_col] == 0] = 0
            B_col += 1
            if C.sum() > epsilon_sum:
                perm_err_sum += 1

        self.assertEqual(n_zero_err_sum,
                         0,
                         msg='number of zero columns exception')
        self.assertEqual(perm_err_sum, 0, msg='permutation index exception')