Пример #1
0
def q2(y_true, y_pred, contingency=None, cond_entrop=None, a_i=None, b_j=None):
    if contingency is None:
        contingency = utility.compute_contingency(y_true, y_pred)
    if cond_entrop is None:
        cond_entrop = cond_entropy(y_true, y_pred, contingency)
    if a_i is None:
        a_i = utility.compute_a_i(contingency)
    if b_j is None:
        b_j = utility.compute_b_j(contingency)

    J = np.ma.size(b_j)
    N = np.sum(a_i)

    combi = comb(a_i + J - 1, J - 1)
    log = np.ma.log(combi)
    log = log.filled(0)

    q0 = cond_entrop + (np.sum(log) / N)

    logmin = comb(b_j + J - 1, J - 1)
    logmin = np.ma.log(logmin)
    logmin = logmin.filled(0)

    maxq0 = sklearn.metrics.cluster.entropy(y_true) + np.log(J)
    minq0 = np.sum(logmin) / N

    return (maxq0 - q0) / (maxq0 - minq0)
Пример #2
0
def adjusted_mutual_information(y_true,
                                y_pred,
                                contingency=None,
                                a_i=None,
                                b_j=None,
                                mi=None,
                                real_clustering_entropy=None,
                                predicted_clustering_entropy=None):
    if contingency is None:
        contingency = utility.compute_contingency(y_true, y_pred)
    if a_i is None:
        a_i = utility.compute_a_i(contingency)
    if b_j is None:
        b_j = utility.compute_b_j
    if mi is None:
        mi = mutual_information(y_true, y_pred, contingency, a_i, b_j)
    if real_clustering_entropy is None:
        real_clustering_entropy = sklearn.metrics.cluster.entropy(y_true)
    if predicted_clustering_entropy is None:
        predicted_clustering_entropy = sklearn.metrics.cluster.entropy(y_pred)
    J_a_i = np.repeat(a_i, np.ma.size(b_j), axis=1)
    I_b_j = np.repeat(b_j, np.ma.size(a_i), axis=0)

    N = np.sum(contingency)

    emi = expected_mutual_information(contingency, N)
    maxmi = np.sqrt(real_clustering_entropy * predicted_clustering_entropy)

    return (mi - emi) / (maxmi - emi)
Пример #3
0
def cond_entropy(y_true, y_pred, contingency=None, a_i=None):
    # Compute the average of each predicted-cluster's entropy, which is based on membership diversity of each data on reals clusters
    # Must be distinguished from entropy
    if contingency is None:
        contingency = utility.compute_contingency(y_true, y_pred)
    if a_i is None:
        a_i = utility.compute_a_i(contingency)

    J_a_i = np.repeat(a_i, np.size(contingency, axis=1), axis=1)
    log_value = np.true_divide(contingency, J_a_i)
    log_value = np.ma.log(log_value)  # Required to deal with 0's
    log_value = log_value.filled(0)
    N = np.ma.sum(a_i)
    return -np.sum(contingency * log_value) / N
Пример #4
0
def mutual_information(y_true, y_pred, contingency=None, a_i=None, b_j=None):
    if contingency is None:
        contingency = utility.compute_contingency(y_true, y_pred)
    if a_i is None:
        a_i = utility.compute_a_i(contingency)
    if b_j is None:
        b_j = utility.compute_b_j
    J_a_i = np.repeat(a_i, np.ma.size(b_j), axis=1)
    I_b_j = np.repeat(b_j, np.ma.size(a_i), axis=0)
    N = np.sum(a_i)

    tmp = np.true_divide(N * contingency, J_a_i * I_b_j)
    log_value = np.ma.log(tmp)
    log_value = log_value.filled(0)

    return np.sum(contingency * log_value) / N
Пример #5
0
def normalized_mutual_information(y_true,
                                  y_pred,
                                  contingency=None,
                                  a_i=None,
                                  b_j=None,
                                  mi=None,
                                  real_clustering_entropy=None,
                                  predicted_clustering_entropy=None):
    if contingency is None:
        contingency = utility.compute_contingency(y_true, y_pred)
    if a_i is None:
        a_i = utility.compute_a_i(contingency)
    if b_j is None:
        b_j = utility.compute_b_j
    if mi is None:
        mi = mutual_information(y_true, y_pred, contingency, a_i, b_j)
    if real_clustering_entropy is None:
        real_clustering_entropy = sklearn.metrics.cluster.entropy(y_true)
    if predicted_clustering_entropy is None:
        predicted_clustering_entropy = sklearn.metrics.cluster.entropy(y_pred)
    return mi / np.sqrt(real_clustering_entropy * predicted_clustering_entropy)
Пример #6
0
def adjusted_rand_index(y_true,
                        y_pred,
                        contingency=None,
                        PBV=None,
                        a_i=None,
                        b_j=None):
    if contingency is None:
        contingency = compute_contingency(y_true, y_pred)
    if PBV is None:
        PBV = utility.pair_based_values(contingency)
    if a_i is None:
        a_i = utility.compute_a_i(contingency)
    if b_j is None:
        b_j = utility.compute_b_j(contingency)
    tp, fp, fn, tn = PBV
    N = np.sum(a_i)
    ri = np.sum(comb(contingency,
                     2))  # note : not truly ri, missing division by comb(N,2)
    eri = (np.sum(comb(a_i, 2)) * np.sum(comb(b_j, 2))) / comb(N, 2)  # idem
    maxri = 0.5 * (np.sum(comb(a_i, 2)) + np.sum(comb(b_j, 2)))  # idem
    return (ri - eri) / (maxri - eri)
Пример #7
0
def variation_of_information(y_true,
                             y_pred,
                             contingency=None,
                             a_i=None,
                             b_j=None):
    if contingency is None:
        contingency = utility.compute_contingency(y_true, y_pred)
    if a_i is None:
        a_i = utility.compute_a_i(contingency)
    if b_j is None:
        b_j = utility.compute_b_j(contingency)
    J_a_i = np.repeat(a_i, np.ma.size(b_j), axis=1)
    I_b_j = np.repeat(b_j, np.ma.size(a_i), axis=0)
    N = np.sum(a_i)

    log1 = np.true_divide(contingency, J_a_i)
    log1 = np.ma.log(log1)
    log1 = log1.filled(0)
    log2 = np.true_divide(contingency, I_b_j)
    log2 = np.ma.log(log2)
    log2 = log2.filled(0)

    return -np.sum(contingency * (log1 + log2)) / N
Пример #8
0
def compute_prediction_metrics(y_true, y_pred):
    # Calculate criteria score for each one present in CRITERION_LIST

    # Calculating values that are presents in several formula of criterion
    contingency = utility.compute_contingency(y_true, y_pred)
    a_i = utility.compute_a_i(contingency)
    b_j = utility.compute_b_j(contingency)
    true_clustering_entropy = sklearn.metrics.cluster.entropy(y_true)
    predicted_clustering_entropy = sklearn.metrics.cluster.entropy(y_pred)
    PBV = utility.pair_based_values(contingency)

    # Getting all criterion values
    mi = converted_metrics.mutual_information(y_true, y_pred, contingency, a_i,
                                              b_j)
    ari = converted_metrics.adjusted_rand_index(y_true, y_pred, contingency,
                                                PBV, a_i, b_j)
    ami = converted_metrics.adjusted_mutual_information(
        y_true, y_pred, contingency, a_i, b_j, mi, true_clustering_entropy,
        predicted_clustering_entropy)
    compl = converted_metrics.homogeneity(y_true, y_pred, contingency, mi,
                                          predicted_clustering_entropy)
    homog = converted_metrics.completness(y_true, y_pred, contingency, mi,
                                          true_clustering_entropy)
    vmeasure = converted_metrics.v_measure(y_true, y_pred, 1, mi, contingency,
                                           predicted_clustering_entropy,
                                           true_clustering_entropy, homog,
                                           compl)
    entropy = new_metrics.cond_entropy(y_true, y_pred, contingency, a_i)
    #accuracy = converted_metrics.accuracy(y_true, y_pred, contingency, PBV)  Is equal to ARI in pair-based value context
    precision = converted_metrics.precision(y_true, y_pred, contingency, PBV)
    recall = converted_metrics.recall(y_true, y_pred, contingency, PBV)
    falsealarm = converted_metrics.false_alarm_rate(y_true, y_pred,
                                                    contingency, PBV)
    fm = converted_metrics.fowlkes_mallows(y_true, y_pred, contingency, PBV)
    f1 = converted_metrics.f_beta_score(y_true, y_pred, 1, contingency, PBV,
                                        precision, recall)
    purity = new_metrics.purity(y_true, y_pred, contingency)
    inversed_purity = new_metrics.inversed_purity(y_true, y_pred, contingency)
    epratio = new_metrics.ep_ratio(y_true, y_pred, contingency, a_i, entropy,
                                   purity)
    jaccard = converted_metrics.jaccard_index(y_true, y_pred, contingency, PBV)
    nmi = converted_metrics.normalized_mutual_information(
        y_true, y_pred, contingency, a_i, b_j, mi, true_clustering_entropy,
        predicted_clustering_entropy)
    ri = new_metrics.rand_index(y_true, y_pred, contingency, PBV)
    vi = new_metrics.variation_of_information(y_true, y_pred, contingency, a_i,
                                              b_j)
    # clustering error not calculated : always equal to 1 - accuracy
    goodness = converted_metrics.goodness(y_true, y_pred, contingency, PBV)
    bal_accuracy = converted_metrics.balanced_accuracy(y_true, y_pred,
                                                       contingency, PBV)
    q2 = new_metrics.q2(y_true, y_pred, contingency, entropy, a_i, b_j)

    metrics_dictionnary = {
        "mi": mi,
        "ari": ari,
        "ami": ami,
        "compl": compl,
        "homog": homog,
        "vmeasure": vmeasure,
        "entropy": entropy,
        "precision": precision,
        "recall": recall,
        "falsealarm": falsealarm,
        "fm": fm,
        "f1": f1,
        "purity": purity,
        "inv_purity": inversed_purity,
        "epratio": epratio,
        "jaccard": jaccard,
        "nmi": nmi,
        "ri": ri,
        "vi": vi,
        "goodness": goodness,
        "balacc": bal_accuracy,
        "q2": q2
    }

    if set(metrics_dictionnary.keys()) != CRITERION_LIST:
        print(
            "ERROR : One or several criterion are not computed in main.compute_prediction_metrics"
        )

    return metrics_dictionnary