Exemplo n.º 1
0
def qm_result_matrices(matrices, tmp_mean, multiprocessing=True):
    """builds the resulting matrices by looking at the rank of their
    original values and retrieving the means at the specified position"""
    if multiprocessing:
        # parallelized ranking
        with util.get_mp_pool() as pool:
            results = pool.map(rank_fun, [(matrix.values, matrix.row_names,
                                           matrix.column_names, tmp_mean)
                                          for matrix in matrices])
        return results
    else:
        # non-parallelized
        result = []
        for i in xrange(len(matrices)):
            matrix = matrices[i]
            values = matrix.values
            num_rows, num_cols = values.shape
            rankvals = util.rrank_matrix(values)
            values = np.reshape(tmp_mean[rankvals], (num_rows, num_cols))
            outmatrix = DataMatrix(num_rows,
                                   num_cols,
                                   matrix.row_names,
                                   matrix.column_names,
                                   values=values)
            result.append(outmatrix)
        return result
Exemplo n.º 2
0
    def __compute_network_cluster_scores(self, network):
        """computes the cluster scores for the given network"""
        global COMPUTE_NETWORK, ALL_GENES, NETWORK_SCORE_MEMBERSHIP
        result = {}
        use_multiprocessing = self.config_params[
            scoring.KEY_MULTIPROCESSING]
        # Set the huge memory objects into globals
        # These are readonly anyways, but using Manager.list() or something
        # similar brings this down to a crawl
        COMPUTE_NETWORK = network
        ALL_GENES = set(self.gene_names())  # optimization: O(1) lookup
        NETWORK_SCORE_MEMBERSHIP = self.membership

        if use_multiprocessing:
            with util.get_mp_pool(self.config_params) as pool:
                map_results = pool.map(compute_network_scores, xrange(1, self.num_clusters() + 1))
            for cluster in xrange(1, self.num_clusters() + 1):
                result[cluster] = map_results[cluster - 1]
        else:
            for cluster in xrange(1, self.num_clusters() + 1):
                result[cluster] = compute_network_scores(cluster)
        # cleanup
        COMPUTE_NETWORK = None
        ALL_GENES = None
        NETWORK_SCORE_MEMBERSHIP = None
        return result
Exemplo n.º 3
0
def qm_result_matrices(matrices, tmp_mean, multiprocessing=True):
    """builds the resulting matrices by looking at the rank of their
    original values and retrieving the means at the specified position"""
    if multiprocessing:
        # parallelized ranking
        with util.get_mp_pool() as pool:
            results = pool.map(rank_fun,
                               [(matrix.values, matrix.row_names, matrix.column_names, tmp_mean)
                                for matrix in matrices])
        return results
    else:
        # non-parallelized
        result = []
        for i in xrange(len(matrices)):
            matrix = matrices[i]
            values = matrix.values
            num_rows, num_cols = values.shape
            rankvals = util.rrank_matrix(values)
            values = np.reshape(tmp_mean[rankvals], (num_rows, num_cols))
            outmatrix = DataMatrix(num_rows,
                                   num_cols,
                                   matrix.row_names,
                                   matrix.column_names,
                                   values=values)
            result.append(outmatrix)
        return result
Exemplo n.º 4
0
def __compute_row_scores_for_clusters(membership, matrix, num_clusters,
                                      config_params):
    """compute the pure row scores for the specified clusters
    without nowmalization"""
    # note that we set the data into globals before we fork it off
    # to save memory and pickling time
    global ROW_SCORE_MATRIX, ROW_SCORE_MEMBERSHIP
    ROW_SCORE_MATRIX = matrix
    ROW_SCORE_MEMBERSHIP = membership

    if config_params['multiprocessing']:
        with util.get_mp_pool(config_params) as pool:
            result = pool.map(compute_row_scores_for_cluster, xrange(1, num_clusters + 1))
    else:
        result = []
        for cluster in range(1, num_clusters + 1):
            result.append(compute_row_scores_for_cluster(cluster))
    # cleanup
    ROW_SCORE_MATRIX = None
    ROW_SCORE_MEMBERSHIP = None
    return result
Exemplo n.º 5
0
    def do_compute(self, iteration_result, ref_matrix):
        """compute method
        Note: will return None if not computed yet and the result of a previous
        scoring if the function is not supposed to actually run in this iteration
        """
        global SET_MATRIX, SET_MEMBERSHIP, SET_SET_TYPE, SET_SYNONYMS, CANONICAL_ROWNAMES, CANONICAL_ROW_INDEXES
        logging.info("Compute scores for set enrichment...")
        start_time = util.current_millis()
        matrix = dm.DataMatrix(len(self.gene_names()), self.num_clusters(),
                               self.gene_names())
        use_multiprocessing = self.config_params[scoring.KEY_MULTIPROCESSING]
        SET_MATRIX = self.ratios
        SET_MEMBERSHIP = self.membership
        SET_SYNONYMS = self.organism.thesaurus()

        if CANONICAL_ROWNAMES is None:
            CANONICAL_ROWNAMES = set(map(lambda n: SET_SYNONYMS[n] if n in SET_SYNONYMS else n,
                                         self.ratios.row_names))

        if CANONICAL_ROW_INDEXES is None:
            CANONICAL_ROW_INDEXES = {}
            for index, row in enumerate(self.ratios.row_names):
                if row in SET_SYNONYMS:
                    CANONICAL_ROW_INDEXES[SET_SYNONYMS[row]] = index
                else:
                    CANONICAL_ROW_INDEXES[row] = index

        ref_min_score = ref_matrix.min()
        logging.info('REF_MIN_SCORE: %f', ref_min_score)

        set_filepath = os.path.join(self.config_params['output_dir'],
                                    'setEnrichment_set.csv')
        pval_filepath = os.path.join(self.config_params['output_dir'],
                                     'setEnrichment_pvalue.csv')

        for set_type in self.__set_types:
            SET_SET_TYPE = set_type
            logging.info("PROCESSING SET TYPE '%s'", set_type.name)
            start1 = util.current_millis()
            if use_multiprocessing:
                with util.get_mp_pool(self.config_params) as pool:
                    results = pool.map(compute_cluster_score,
                                       [(cluster, self.bonferroni_cutoff(), ref_min_score)
                                        for cluster in xrange(1, self.num_clusters() + 1)])
            else:
                results = []
                for cluster in xrange(1, self.num_clusters() + 1):
                    results.append(compute_cluster_score((cluster, self.bonferroni_cutoff(), ref_min_score)))

            elapsed1 = util.current_millis() - start1
            logging.info("ENRICHMENT SCORES COMPUTED in %f s, STORING...",
                         elapsed1 / 1000.0)

            if not os.path.exists(set_filepath):
                setFile = open(set_filepath, 'w')
                setFile.write(',' + ','.join([str(i) for i in xrange(1, self.num_clusters() + 1)]))
                pvFile = open(pval_filepath, 'w')
                pvFile.write(',' + ','.join([str(i) for i in xrange(1, self.num_clusters() + 1)]))
            else:
                setFile = open(set_filepath, 'a')
                pvFile = open(pval_filepath, 'a')

            minSets = []
            pValues = []
            for cluster in xrange(1, self.num_clusters() + 1):
                # store the best enriched set determined
                scores, min_set, min_pvalue = results[cluster - 1]
                minSets.append(min_set)
                pValues.append(min_pvalue)

                for row in xrange(len(self.gene_names())):
                    matrix.values[row][cluster - 1] += scores[row] * set_type.weight
            setFile.write('\n'+str(iteration_result['iteration'])+','+','.join([str(i) for i in minSets]))
            pvFile.write('\n'+str(iteration_result['iteration'])+','+','.join([str(i) for i in pValues]))
            setFile.close()
            pvFile.close()

        logging.info("SET ENRICHMENT FINISHED IN %f s.\n",
                     (util.current_millis() - start_time) / 1000.0)
        # cleanup
        SET_SET_TYPE = None
        SET_MATRIX = None
        SET_MEMBERSHIP = None
        SET_SYNONYMS = None

        return matrix
Exemplo n.º 6
0
    def compute_pvalues(self, iteration_result, num_motifs, force):
        """Compute motif scores.
        The result is a dictionary from cluster -> (feature_id, pvalue)
        containing a sparse gene-to-pvalue mapping for each cluster

        In order to influence the sequences
        that go into meme, the user can specify a list of sequence filter
        functions that have the signature
        (seqs, feature_ids, distance) -> seqs
        These filters are applied in the order they appear in the list.
        """
        global SEQUENCE_FILTERS, ORGANISM, MEMBERSHIP

        cluster_pvalues = {}
        min_cluster_rows_allowed = self.config_params['memb.min_cluster_rows_allowed']
        max_cluster_rows_allowed = self.config_params['memb.max_cluster_rows_allowed']
        use_multiprocessing = self.config_params[scoring.KEY_MULTIPROCESSING]

        # extract the sequences for each cluster, slow
        start_time = util.current_millis()
        SEQUENCE_FILTERS = self.__sequence_filters
        ORGANISM = self.organism
        MEMBERSHIP = self.membership

        cluster_seqs_params = [(cluster, self.seqtype) for cluster in xrange(1, self.num_clusters() + 1)]
        if use_multiprocessing:
            with util.get_mp_pool(self.config_params) as pool:
                seqs_list = pool.map(cluster_seqs, cluster_seqs_params)
        else:
            seqs_list = [cluster_seqs(p) for p in cluster_seqs_params]

        SEQUENCE_FILTERS = None
        ORGANISM = None
        MEMBERSHIP = None
        logging.debug("prepared sequences in %d ms.", util.current_millis() - start_time)

        # Make the parameters, this is fast enough
        start_time = util.current_millis()
        params = {}
        for cluster in xrange(1, self.num_clusters() + 1):
            # Pass the previous run's seed if possible
            if self.__last_motif_infos is not None:
                previous_motif_infos = self.__last_motif_infos.get(cluster, None)
            else:
                previous_motif_infos = None

            seqs, feature_ids = seqs_list[cluster - 1]
            params[cluster] = ComputeScoreParams(iteration_result['iteration'], cluster,
                                                 feature_ids,
                                                 seqs,
                                                 self.used_seqs,
                                                 self.meme_runner(),
                                                 min_cluster_rows_allowed,
                                                 max_cluster_rows_allowed,
                                                 num_motifs,
                                                 previous_motif_infos,
                                                 self.config_params['output_dir'],
                                                 self.config_params['num_iterations'],
                                                 self.config_params['debug'])

        logging.debug("prepared MEME parameters in %d ms.",
                      util.current_millis() - start_time)

        # create motif result map if necessary
        for cluster in xrange(1, self.num_clusters() + 1):
            if not cluster in iteration_result:
                iteration_result[cluster] = {}

        # Optimization:
        # if the cluster hasn't changed since last time, reuse the last results
        # we do this by filtering out the parameters of the clusters that did not
        # change
        if not force and self.__last_results is not None:
            oldlen = len(params)
            params = {cluster: params[cluster]
                      for cluster in xrange(1, self.num_clusters() + 1)
                      if params[cluster].feature_ids != self.__last_results[cluster][0]}
            newlen = len(params)
            if oldlen - newlen > 0:
                logging.debug("%d clusters did not change !!!", oldlen - newlen)

        # compute and store motif results
        self.__last_motif_infos = {}
        if self.__last_results is None:
            self.__last_results = {}

        if use_multiprocessing:
            with util.get_mp_pool(self.config_params) as pool:
                results = pool.map(compute_cluster_score, params.values())
                results = {r[0]: r[1:] for r in results}  # indexed by cluster

                for cluster in xrange(1, self.num_clusters() + 1):
                    if cluster in results:
                        pvalues, run_result = results[cluster]
                        self.__last_results[cluster] = (params[cluster].feature_ids,
                                                        pvalues, run_result)
                    else:
                        feature_ids, pvalues, run_result = self.__last_results[cluster]

                    cluster_pvalues[cluster] = pvalues
                    if run_result:
                        self.__last_motif_infos[cluster] = run_result.motif_infos
                    iteration_result[cluster]['motif-info'] = meme_json(run_result)
                    iteration_result[cluster]['pvalues'] = pvalues
        else:
            for cluster in xrange(1, self.num_clusters() + 1):
                if cluster in params:
                    _, pvalues, run_result = compute_cluster_score(params[cluster])
                    self.__last_results[cluster] = (params[cluster].feature_ids,
                                                    pvalues, run_result)
                else:
                    _, pvalues, run_result = self.__last_results[cluster]

                cluster_pvalues[cluster] = pvalues
                if run_result:
                    self.__last_motif_infos[cluster] = run_result.motif_infos
                iteration_result[cluster]['motif-info'] = meme_json(run_result)
                iteration_result[cluster]['pvalues'] = pvalues

        return cluster_pvalues
Exemplo n.º 7
0
    def compute_pvalues(self, iteration_result, num_motifs, force):
        """Compute motif scores.
        The result is a dictionary from cluster -> (feature_id, pvalue)
        containing a sparse gene-to-pvalue mapping for each cluster

        In order to influence the sequences
        that go into meme, the user can specify a list of sequence filter
        functions that have the signature
        (seqs, feature_ids, distance) -> seqs
        These filters are applied in the order they appear in the list.
        """
        global SEQUENCE_FILTERS, ORGANISM, MEMBERSHIP

        cluster_pvalues = {}
        min_cluster_rows_allowed = self.config_params[
            'memb.min_cluster_rows_allowed']
        max_cluster_rows_allowed = self.config_params[
            'memb.max_cluster_rows_allowed']
        use_multiprocessing = self.config_params[scoring.KEY_MULTIPROCESSING]

        # extract the sequences for each cluster, slow
        start_time = util.current_millis()
        SEQUENCE_FILTERS = self.__sequence_filters
        ORGANISM = self.organism
        MEMBERSHIP = self.membership

        cluster_seqs_params = [(cluster, self.seqtype)
                               for cluster in xrange(1,
                                                     self.num_clusters() + 1)]
        if use_multiprocessing:
            with util.get_mp_pool(self.config_params) as pool:
                seqs_list = pool.map(cluster_seqs, cluster_seqs_params)
        else:
            seqs_list = [cluster_seqs(p) for p in cluster_seqs_params]

        SEQUENCE_FILTERS = None
        ORGANISM = None
        MEMBERSHIP = None
        logging.debug("prepared sequences in %d ms.",
                      util.current_millis() - start_time)

        # Make the parameters, this is fast enough
        start_time = util.current_millis()
        params = {}
        for cluster in xrange(1, self.num_clusters() + 1):
            # Pass the previous run's seed if possible
            if self.__last_motif_infos is not None:
                previous_motif_infos = self.__last_motif_infos.get(
                    cluster, None)
            else:
                previous_motif_infos = None

            seqs, feature_ids = seqs_list[cluster - 1]
            params[cluster] = ComputeScoreParams(
                iteration_result['iteration'], cluster, feature_ids, seqs,
                self.used_seqs, self.meme_runner(), min_cluster_rows_allowed,
                max_cluster_rows_allowed, num_motifs, previous_motif_infos,
                self.config_params['output_dir'],
                self.config_params['num_iterations'],
                self.config_params['debug'])

        logging.debug("prepared MEME parameters in %d ms.",
                      util.current_millis() - start_time)

        # create motif result map if necessary
        for cluster in xrange(1, self.num_clusters() + 1):
            if not cluster in iteration_result:
                iteration_result[cluster] = {}

        # Optimization:
        # if the cluster hasn't changed since last time, reuse the last results
        # we do this by filtering out the parameters of the clusters that did not
        # change
        if not force and self.__last_results is not None:
            oldlen = len(params)
            params = {
                cluster: params[cluster]
                for cluster in xrange(1,
                                      self.num_clusters() + 1) if
                params[cluster].feature_ids != self.__last_results[cluster][0]
            }
            newlen = len(params)
            if oldlen - newlen > 0:
                logging.debug("%d clusters did not change !!!",
                              oldlen - newlen)

        # compute and store motif results
        self.__last_motif_infos = {}
        if self.__last_results is None:
            self.__last_results = {}

        if use_multiprocessing:
            with util.get_mp_pool(self.config_params) as pool:
                results = pool.map(compute_cluster_score, params.values())
                results = {r[0]: r[1:] for r in results}  # indexed by cluster

                for cluster in xrange(1, self.num_clusters() + 1):
                    if cluster in results:
                        pvalues, run_result = results[cluster]
                        self.__last_results[cluster] = (
                            params[cluster].feature_ids, pvalues, run_result)
                    else:
                        feature_ids, pvalues, run_result = self.__last_results[
                            cluster]

                    cluster_pvalues[cluster] = pvalues
                    if run_result:
                        self.__last_motif_infos[
                            cluster] = run_result.motif_infos
                    iteration_result[cluster]['motif-info'] = meme_json(
                        run_result)
                    iteration_result[cluster]['pvalues'] = pvalues
        else:
            for cluster in xrange(1, self.num_clusters() + 1):
                if cluster in params:
                    _, pvalues, run_result = compute_cluster_score(
                        params[cluster])
                    self.__last_results[cluster] = (
                        params[cluster].feature_ids, pvalues, run_result)
                else:
                    _, pvalues, run_result = self.__last_results[cluster]

                cluster_pvalues[cluster] = pvalues
                if run_result:
                    self.__last_motif_infos[cluster] = run_result.motif_infos
                iteration_result[cluster]['motif-info'] = meme_json(run_result)
                iteration_result[cluster]['pvalues'] = pvalues

        return cluster_pvalues
Exemplo n.º 8
0
    def do_compute(self, iteration_result, ref_matrix):
        """compute method
        Note: will return None if not computed yet and the result of a previous
        scoring if the function is not supposed to actually run in this iteration
        """
        global SET_MATRIX, SET_MEMBERSHIP, SET_SET_TYPE, SET_SYNONYMS, CANONICAL_ROWNAMES, CANONICAL_ROW_INDEXES
        logging.info("Compute scores for set enrichment...")
        start_time = util.current_millis()
        matrix = dm.DataMatrix(len(self.gene_names()), self.num_clusters(),
                               self.gene_names())
        use_multiprocessing = self.config_params[scoring.KEY_MULTIPROCESSING]
        SET_MATRIX = self.ratios
        SET_MEMBERSHIP = self.membership
        SET_SYNONYMS = self.organism.thesaurus()

        if CANONICAL_ROWNAMES is None:
            CANONICAL_ROWNAMES = set(
                map(lambda n: SET_SYNONYMS[n]
                    if n in SET_SYNONYMS else n, self.ratios.row_names))

        if CANONICAL_ROW_INDEXES is None:
            CANONICAL_ROW_INDEXES = {}
            for index, row in enumerate(self.ratios.row_names):
                if row in SET_SYNONYMS:
                    CANONICAL_ROW_INDEXES[SET_SYNONYMS[row]] = index
                else:
                    CANONICAL_ROW_INDEXES[row] = index

        ref_min_score = ref_matrix.min()
        logging.info('REF_MIN_SCORE: %f', ref_min_score)

        set_filepath = os.path.join(self.config_params['output_dir'],
                                    'setEnrichment_set.csv')
        pval_filepath = os.path.join(self.config_params['output_dir'],
                                     'setEnrichment_pvalue.csv')

        for set_type in self.__set_types:
            SET_SET_TYPE = set_type
            logging.info("PROCESSING SET TYPE '%s'", set_type.name)
            start1 = util.current_millis()
            if use_multiprocessing:
                with util.get_mp_pool(self.config_params) as pool:
                    results = pool.map(
                        compute_cluster_score,
                        [(cluster, self.bonferroni_cutoff(), ref_min_score)
                         for cluster in xrange(1,
                                               self.num_clusters() + 1)])
            else:
                results = []
                for cluster in xrange(1, self.num_clusters() + 1):
                    results.append(
                        compute_cluster_score(
                            (cluster, self.bonferroni_cutoff(),
                             ref_min_score)))

            elapsed1 = util.current_millis() - start1
            logging.info("ENRICHMENT SCORES COMPUTED in %f s, STORING...",
                         elapsed1 / 1000.0)

            if not os.path.exists(set_filepath):
                setFile = open(set_filepath, 'w')
                setFile.write(',' + ','.join(
                    [str(i) for i in xrange(1,
                                            self.num_clusters() + 1)]))
                pvFile = open(pval_filepath, 'w')
                pvFile.write(',' + ','.join(
                    [str(i) for i in xrange(1,
                                            self.num_clusters() + 1)]))
            else:
                setFile = open(set_filepath, 'a')
                pvFile = open(pval_filepath, 'a')

            minSets = []
            pValues = []
            for cluster in xrange(1, self.num_clusters() + 1):
                # store the best enriched set determined
                scores, min_set, min_pvalue = results[cluster - 1]
                minSets.append(min_set)
                pValues.append(min_pvalue)

                for row in xrange(len(self.gene_names())):
                    matrix.values[row][cluster -
                                       1] += scores[row] * set_type.weight
            setFile.write('\n' + str(iteration_result['iteration']) + ',' +
                          ','.join([str(i) for i in minSets]))
            pvFile.write('\n' + str(iteration_result['iteration']) + ',' +
                         ','.join([str(i) for i in pValues]))
            setFile.close()
            pvFile.close()

        logging.info("SET ENRICHMENT FINISHED IN %f s.\n",
                     (util.current_millis() - start_time) / 1000.0)
        # cleanup
        SET_SET_TYPE = None
        SET_MATRIX = None
        SET_MEMBERSHIP = None
        SET_SYNONYMS = None

        return matrix
Exemplo n.º 9
0
def compute_column_scores(membership, matrix, num_clusters, config_params, BSCM_obj=None):
    """Computes the column scores for the specified number of clusters"""

    def compute_substitution(cluster_column_scores):
        """calculate substitution value for missing column scores"""
        membership_values = []
        for cluster in xrange(1, num_clusters + 1):
            columns = membership.columns_for_cluster(cluster)
            column_scores = cluster_column_scores[cluster - 1]
            if column_scores is not None:
                colnames, scores = column_scores
                for col in xrange(len(colnames)):
                    if colnames[col] in columns:
                        membership_values.append(scores[col])
        return util.quantile(membership_values, 0.95)

    def make_submatrix(cluster):
        row_names = membership.rows_for_cluster(cluster)
        if len(row_names) > 1:
            return matrix.submatrix_by_name(row_names=row_names)
        else:
            return None

    cluster_column_scores = []  # To be filled or overwritten
    if BSCM_obj is None:
        if config_params["multiprocessing"]:
            with util.get_mp_pool(config_params) as pool:
                cluster_column_scores = pool.map(
                    compute_column_scores_submatrix, map(make_submatrix, xrange(1, num_clusters + 1))
                )
        else:
            for cluster in xrange(1, num_clusters + 1):
                cluster_column_scores.append(compute_column_scores_submatrix(make_submatrix(cluster)))
    else:  # if BSCM_obj exists
        num_cores = 1
        if not config_params["num_cores"] is None:
            num_cores = config_params["num_cores"]

        for cluster in xrange(1, num_clusters + 1):
            if make_submatrix(cluster) is None:
                cluster_column_scores.append(None)
            else:
                cur_column_scores = BSCM_obj.getPvals(make_submatrix(cluster).row_names, num_cores=num_cores)
                exp_names = cur_column_scores.keys()
                exp_scores = np.array(cur_column_scores.values())
                cluster_column_scores.append((exp_names, exp_scores))

    substitution = compute_substitution(cluster_column_scores)

    # Convert scores into a matrix that have the clusters as columns
    # and conditions in the rows
    result = dm.DataMatrix(matrix.num_columns, num_clusters, row_names=matrix.column_names)
    rvalues = result.values
    for cluster in xrange(num_clusters):
        column_scores = cluster_column_scores[cluster]

        if column_scores is not None:
            _, scores = column_scores
            scores[np.isnan(scores)] = substitution

        for row_index in xrange(matrix.num_columns):
            if column_scores is None:
                rvalues[row_index, cluster] = substitution
            else:
                _, scores = column_scores
                rvalues[row_index, cluster] = scores[row_index]
    result.fix_extreme_values()
    return result
Exemplo n.º 10
0
def compute_column_scores(membership,
                          matrix,
                          num_clusters,
                          config_params,
                          BSCM_obj=None):
    """Computes the column scores for the specified number of clusters"""
    def compute_substitution(cluster_column_scores):
        """calculate substitution value for missing column scores"""
        membership_values = []
        for cluster in xrange(1, num_clusters + 1):
            columns = membership.columns_for_cluster(cluster)
            column_scores = cluster_column_scores[cluster - 1]
            if column_scores is not None:
                colnames, scores = column_scores
                for col in xrange(len(colnames)):
                    if colnames[col] in columns:
                        membership_values.append(scores[col])
        return util.quantile(membership_values, 0.95)

    def make_submatrix(cluster):
        row_names = membership.rows_for_cluster(cluster)
        if len(row_names) > 1:
            return matrix.submatrix_by_name(row_names=row_names)
        else:
            return None

    cluster_column_scores = []  #To be filled or overwritten
    if BSCM_obj is None:
        if config_params['multiprocessing']:
            with util.get_mp_pool(config_params) as pool:
                cluster_column_scores = pool.map(
                    compute_column_scores_submatrix,
                    map(make_submatrix, xrange(1, num_clusters + 1)))
        else:
            for cluster in xrange(1, num_clusters + 1):
                cluster_column_scores.append(
                    compute_column_scores_submatrix(make_submatrix(cluster)))
    else:  #if BSCM_obj exists
        num_cores = 1
        if not config_params['num_cores'] is None:
            num_cores = config_params['num_cores']

        for cluster in xrange(1, num_clusters + 1):
            if make_submatrix(cluster) is None:
                cluster_column_scores.append(None)
            else:
                cur_column_scores = BSCM_obj.getPvals(
                    make_submatrix(cluster).row_names, num_cores=num_cores)
                exp_names = cur_column_scores.keys()
                exp_scores = np.array(cur_column_scores.values())
                cluster_column_scores.append((exp_names, exp_scores))

    substitution = compute_substitution(cluster_column_scores)

    # Convert scores into a matrix that have the clusters as columns
    # and conditions in the rows
    result = dm.DataMatrix(matrix.num_columns,
                           num_clusters,
                           row_names=matrix.column_names)
    rvalues = result.values
    for cluster in xrange(num_clusters):
        column_scores = cluster_column_scores[cluster]

        if column_scores is not None:
            _, scores = column_scores
            scores[np.isnan(scores)] = substitution

        for row_index in xrange(matrix.num_columns):
            if column_scores is None:
                rvalues[row_index, cluster] = substitution
            else:
                _, scores = column_scores
                rvalues[row_index, cluster] = scores[row_index]
    result.fix_extreme_values()
    return result