Пример #1
0
def get_best_clusters(scores, n, sort=False):
    """retrieve the n best scored clusters for the given row/column score matrix"""
    if sort:
        return {scores.row_names[row]: sorted(util.rorder(scores.row_values(row), n))
                for row in xrange(scores.num_rows)}
    else:
        return {scores.row_names[row]: util.rorder(scores.row_values(row), n)
                for row in xrange(scores.num_rows)}
Пример #2
0
def get_best_clusters(scores, n, sort=False):
    """retrieve the n best scored clusters for the given row/column score matrix"""
    if sort:
        return {
            scores.row_names[row]:
            sorted(util.rorder(scores.row_values(row), n))
            for row in xrange(scores.num_rows)
        }
    else:
        return {
            scores.row_names[row]: util.rorder(scores.row_values(row), n)
            for row in xrange(scores.num_rows)
        }
Пример #3
0
def seed_column_members(data_matrix, row_membership, num_clusters,
                        num_clusters_per_column):
    """Default column membership seeder ('best')
    In case of multiple input ratio matrices, we assume that these
    matrices have been combined into data_matrix"""
    num_rows = data_matrix.num_rows
    num_cols = data_matrix.num_columns
    # create a submatrix for each cluster
    cscores = np.zeros([data_matrix.num_columns, num_clusters])
    for cluster_num in xrange(1, num_clusters + 1):
        current_cluster_rows = []
        for row_index in xrange(num_rows):
            if row_membership[row_index][0] == cluster_num:
                current_cluster_rows.append(data_matrix.row_names[row_index])
        submatrix = data_matrix.submatrix_by_name(
            row_names=current_cluster_rows)
        _, scores = scoring.compute_column_scores_submatrix(submatrix)
        cscores.T[cluster_num - 1] = -scores

    start_time = util.current_millis()
    column_members = [
        util.rorder(cscores[i], num_clusters_per_column)
        for i in xrange(num_cols)
    ]
    elapsed = util.current_millis() - start_time
    logging.debug("seed column members in %f s.", elapsed % 1000.0)
    return column_members
Пример #4
0
def seed_column_members(data_matrix, row_membership, num_clusters,
                        num_clusters_per_column):
    """Default column membership seeder ('best')
    In case of multiple input ratio matrices, we assume that these
    matrices have been combined into data_matrix"""
    num_rows = data_matrix.num_rows
    num_cols = data_matrix.num_columns
    # create a submatrix for each cluster
    cscores = np.zeros([data_matrix.num_columns, num_clusters])
    for cluster_num in xrange(1, num_clusters + 1):
        current_cluster_rows = []
        for row_index in xrange(num_rows):
            if row_membership[row_index][0] == cluster_num:
                current_cluster_rows.append(data_matrix.row_names[row_index])
        submatrix = data_matrix.submatrix_by_name(
            row_names=current_cluster_rows)
        _, scores = scoring.compute_column_scores_submatrix(submatrix)
        cscores.T[cluster_num - 1] = -scores

    start_time = util.current_millis()
    column_members = [util.rorder(cscores[i], num_clusters_per_column)
                      for i in xrange(num_cols)]
    elapsed = util.current_millis() - start_time
    logging.debug("seed column members in %f s.", elapsed % 1000.0)
    return column_members