Exemplo n.º 1
0
def generate_random_partition_num(n_elements, n_clusters):

    clu_list = _random_partition_num_iterator(n_elements, n_clusters)

    new_clustering = Clustering()
    new_clustering.from_cluster_list(clu_list)
    return new_clustering
Exemplo n.º 2
0
def shuffle_memberships_pa(clustering, n_steps=1, constant_num_clusters=True):
    """
        This function creates a new clustering by shuffling the element
        memberships from the original clustering according to the preferential
        attachment model.

        See :cite:`Gates2017impact` for a detailed explaination of the preferential
        attachment model.

        :param Clustering clustering: The original clustering.

        :param int n_steps: optional (default 1)
            The number of times to run the preferential attachment algorithm.

        :param Boolean constant_num_clusters: optional (default True)
            Reject a shuffling move if it leaves a cluster with no elements.
            Set to True to keep the number of clusters constant.

        :returns:
            The new clustering with shuffled memberships.

        >>> import clusim.clugen as clugen
        >>> from clusim.clustering import print_clustering
        >>> orig_clu = clugen.make_random_clustering(n_elements=9, n_clusters=3,
                                              random_model='num')
        >>> print_clustering(orig_clu)
        >>> shuffle_clu = clugen.shuffle_memberships_pa(orig_clu, n_steps=10,
                                                 constant_num_clusters=True)
        >>> print_clustering(shuffle_clu)
    """
    n_elements_norm = 1./float(clustering.n_elements)

    Nclusters = clustering.n_clusters

    cluster_list = clustering.to_cluster_list()
    cluster_size_prob = np.array(list(map(len, cluster_list))) * n_elements_norm
    clusternames = range(Nclusters)

    for istep in range(n_steps):
        from_cluster = np.random.choice(clusternames, p=cluster_size_prob)
        if cluster_size_prob[from_cluster] > 1.5*n_elements_norm or not constant_num_clusters:

            exchanged_element = np.random.choice(cluster_list[from_cluster], 1,
                                                 replace=False)[0]
            new_cluster = np.random.choice(clusternames, p=cluster_size_prob)

            if new_cluster != from_cluster:
                cluster_list[from_cluster].remove(exchanged_element)
                cluster_size_prob[from_cluster] -= n_elements_norm

                cluster_list[new_cluster].append(exchanged_element)
                cluster_size_prob[new_cluster] += n_elements_norm

    new_clustering = Clustering()
    new_clustering.from_cluster_list(cluster_list)

    return new_clustering
Exemplo n.º 3
0
def generate_random_partition_perm(clu_size_seq):
    n_elements = sum(clu_size_seq)
    n_clusters = len(clu_size_seq)
    elm_list = np.random.permutation(np.arange(n_elements))
    clu_idx = np.hstack([[0], np.cumsum(clu_size_seq)])

    cluster_list = [elm_list[clu_idx[iclus]:clu_idx[iclus + 1]]
                    for iclus in range(n_clusters)]

    new_clustering = Clustering()
    new_clustering.from_cluster_list(cluster_list)
    return new_clustering
def compare_scores(nexperiment, true_clusters, true_labels, predicted_clusters,
                   predicted_labels):
    mp_score = score.calculate_mp_score(true_clusters, predicted_clusters)
    nmi = normalized_mutual_info_score(true_labels,
                                       predicted_labels,
                                       average_method='arithmetic')
    anmi = adjusted_mutual_info_score(true_labels, predicted_labels)
    completeness = completeness_score(true_labels, predicted_labels)
    v_measure = v_measure_score(true_labels, predicted_labels)
    rand = adjusted_rand_score(true_labels, predicted_labels)
    fms = fowlkes_mallows_score(true_labels, predicted_labels)

    T = Clustering()
    C = Clustering()
    T.from_cluster_list(true_clusters)
    C.from_cluster_list(predicted_clusters)

    jaccard_index = sim.jaccard_index(T, C)
    nmi2 = sim.nmi(T, C)
    fmeasure = sim.fmeasure(T, C)
    element_sim = sim.element_sim(T, C)
    ri = sim.rand_index(T, C)

    print("------------------")
    print("Example ", nexperiment)
    print("Weigthed Similarity: ", round(mp_score, 3))
    print("NMI: ", round(nmi, 3))
    print("AMI: ", round(anmi, 3))
    print("NMI2: ", round(nmi2, 3))
    print("RI: ", round(ri, 3))
    print("Completeness: ", round(completeness, 3))
    print("V-Measure: ", round(v_measure, 3))
    print("Adjusted Rand: ", round(rand, 3))
    print("Fowlkes Mallows: ", round(fms, 3))
    print("Jaccard Index: ", round(jaccard_index, 3))
    print("F-Measure: ", round(fmeasure, 3))
    print("Element-centric: ", round(element_sim, 3))
    print()