示例#1
0
def randIndexes(clustering_results):
    '''
    Calculate Rand index and adjusted Rand index over pairwise
    clustering comparisons.
    Use cythonised function to calculate indices
    '''

    # reassign module and gene labels with integer ids, integer comparison is
    # much faster than string comparison
    cluster_labels = clustering_results.values
    map_dict = get_label_map(cluster_labels)

    gene_map = {}
    for r, gene in enumerate(clustering_results.index):
        gene_map[gene] = r
    E.info("mapping gene ids")

    integer_matrix = make_mapped_matrix(map_dict, clustering_results)
    # take a small slice of the matrix for testing 5 genes, 3 clusterings

    E.info("counting clustering consensus")
    # use cythonized function to return rand index matrix
    cy_rand = Timeseries.consensus_metrics(integer_matrix)
    E.info("Rand Index calculated for all clusterings")

    return cy_rand