Exemplos de fowlkes_mallows_score em Python, exemplos de sklearn.metrics.cluster.fowlkes_mallows_score em Python

Exemplo n.º 1

0

Exibir arquivo

Arquivo: porownanie.py Projeto: aleksandramiesiac/pd3

def Porownaj_algorytmy(data, klasy, labels, method, baza):
    """
    Oblicza indeksy AM, AR i FM dla wszystkich algorytmów, aprócz napisanego przeze mnie.
    """

    wektor =[]
    test =[0]*len(method)
    i=0
    #algorytmy linkage
    for name in method:
        Z = linkage(data, name)
        test[i] = cluster.hierarchy.cut_tree(Z,klasy)
        test[i] = [y for x in test[i] for y in x]
        wektor.append([fowlkes_mallows_score(labels,test[i]), adjusted_mutual_info_score(labels, test[i]),adjusted_rand_score(labels,test[i]),baza ])
        i+=1
    # algorytm genieclust
    wynikMG = genieclust.genie.Genie(n_clusters=klasy).fit_predict(data)
    wektor.append([fowlkes_mallows_score(labels,wynikMG), adjusted_mutual_info_score(labels, wynikMG),adjusted_rand_score(labels,wynikMG),baza ])
    
    #MeanShift
    wynikCL = MeanShift(bandwidth=klasy).fit(data).labels_
    wektor.append([fowlkes_mallows_score(labels,wynikCL), adjusted_mutual_info_score(labels, wynikCL),adjusted_rand_score(labels,wynikCL),baza ])
    
    #AgglomerativeClustering
    wynikFA = AgglomerativeClustering(n_clusters=klasy).fit(data).labels_
    wektor.append([fowlkes_mallows_score(labels,wynikFA), adjusted_mutual_info_score(labels, wynikFA),adjusted_rand_score(labels,wynikFA),baza ])
    
    #KMeans
    wynikKM = KMeans(n_clusters=klasy, random_state=123).fit(data).labels_
    wektor.append([fowlkes_mallows_score(labels,wynikKM), adjusted_mutual_info_score(labels, wynikKM),adjusted_rand_score(labels,wynikKM) ,baza])
    
    
    index = ["single",'complete','average','weighted','centroid','median','ward', "genieclust","AgglomerativeClustering","KMeans","MeanShift"]
    
    return pd.DataFrame(wektor, index = index, columns = ["FM","AM","AR", "Dane"])

Exemplo n.º 2

0

Exibir arquivo

Arquivo: test_supervised.py Projeto: antoinewdg/scikit-learn

def test_fowlkes_mallows_score():
    # General case
    score = fowlkes_mallows_score([0, 0, 0, 1, 1, 1], [0, 0, 1, 1, 2, 2])
    assert_almost_equal(score, 4.0 / np.sqrt(12.0 * 6.0))

    # Perfect match but where the label names changed
    perfect_score = fowlkes_mallows_score([0, 0, 0, 1, 1, 1], [1, 1, 1, 0, 0, 0])
    assert_almost_equal(perfect_score, 1.0)

    # Worst case
    worst_score = fowlkes_mallows_score([0, 0, 0, 0, 0, 0], [0, 1, 2, 3, 4, 5])
    assert_almost_equal(worst_score, 0.0)

Exemplo n.º 3

0

Exibir arquivo

def test_fowlkes_mallows_score():
    # General case
    score = fowlkes_mallows_score([0, 0, 0, 1, 1, 1], [0, 0, 1, 1, 2, 2])
    assert_almost_equal(score, 4.0 / np.sqrt(12.0 * 6.0))

    # Perfect match but where the label names changed
    perfect_score = fowlkes_mallows_score([0, 0, 0, 1, 1, 1], [1, 1, 1, 0, 0, 0])
    assert_almost_equal(perfect_score, 1.0)

    # Worst case
    worst_score = fowlkes_mallows_score([0, 0, 0, 0, 0, 0], [0, 1, 2, 3, 4, 5])
    assert_almost_equal(worst_score, 0.0)

Exemplo n.º 4

0

Exibir arquivo

def computeFowklesMallowsIndex(inputDirTDA, inputFileWavelet,
                               sheetNameWavelet):
    #labels_TDA = ['m59','m39','m102', 'm6', 'm47', 'm8', 'm4', 'm98', 'm2', 'm40', 'm3']
    #clustering_TDA, labelsTDA = tdaClustering("../Results/CohomologyOPPregJNP/")
    clustering_TDA, labelsTDA = tdaClustering(inputDirTDA)
    #labels_Wavelet = ['m39','m40', 'm47', 'm98', 'm102', 'm2', 'm3', 'm4', 'm6', 'm8', 'm59']
    clustering_Wavelet, labelsWavelet = waveletClustering(
        inputFileWavelet, sheetNameWavelet)
    # 167
    #[2, 2, 1, 1, 1, 1, 2, 2, 2, 2, 3] -- cutoff 175
    #[2, 2, 1, 1, 1, 1, 4, 4, 3, 3, 5] -- cutoff 160
    #[2, 2, 1, 1, 1, 1, 3, 3, 3, 3, 4] -- cutoff 167

    #Reorganize labels of wavelet clustering according to TDA
    #label so that they are in same order

    reOrganizedWavelet = []
    dicWavelet = {}
    for pair in zip(labelsWavelet, clustering_Wavelet):
        dicWavelet[pair[0]] = pair[1]

    for l in labelsTDA:
        reOrganizedWavelet.append(dicWavelet[l])

    print(reOrganizedWavelet)

    score = fowlkes_mallows_score(clustering_TDA, reOrganizedWavelet)

    #score = fowlkes_mallows_score([1,1,0,0], [0,0,1,1])
    return score

Exemplo n.º 5

0

Exibir arquivo

Arquivo: cluster.py Projeto: AspirinCode/DeepChEmbed

    def true_label_metrics(true_label, assigned_label, print_metric):
        """ https://scikit-learn.org/stable/modules/clustering.html#clustering-evaluation"""
        true_label_metrics = {}
        true_label_metrics['adjusted_rand_score'] = \
            cluster_metric.adjusted_rand_score(true_label, assigned_label)
        # true_label_metrics['adjusted_mutual_info_score'] = \
        #     cluster_metric.adjusted_mutual_info_score(true_label,
        #                                               assigned_label)
        # true_label_metrics['homogeneity_completeness_v_measure'] = \
        #     cluster_metric.homogeneity_completeness_v_measure(true_label,
        #                                                       assigned_label)
        true_label_metrics['fowlkes_mallows_score'] = \
            cluster_metric.fowlkes_mallows_score(true_label, assigned_label)

        if (print_metric):
            print("Metric with True label")
            print("adjusted rand score: % s " %
                  true_label_metrics['adjusted_rand_score'])
            # print("adjusted mutual info score: % s"
            #       % true_label_metrics['adjusted_mutual_info_score'])
            # print("homogeneity completeness v measure:" )
            # print(true_label_metrics['homogeneity_completeness_v_measure'])
            print("fowlkes_mallows : % s" %
                  true_label_metrics['fowlkes_mallows_score'])

        return true_label_metrics

Exemplo n.º 6

0

Exibir arquivo

def get_clustering_metrics(train_data,
                           cluster_labels,
                           ground_truth_labels=None):
    clustering_metric_dict = dict({})
    clustering_metric_dict['silhouette_score'] = silhouette_score(
        train_data, cluster_labels, random_state=42)
    clustering_metric_dict[
        'calinski_harabasz_score'] = calinski_harabasz_score(
            train_data, cluster_labels)
    clustering_metric_dict['davies_bouldin_score'] = davies_bouldin_score(
        train_data, cluster_labels)

    if ground_truth_labels is not None:
        clustering_metric_dict['v_measure_score'] = v_measure_score(
            ground_truth_labels, cluster_labels)
        clustering_metric_dict[
            'fowlkes_mallows_score'] = fowlkes_mallows_score(
                ground_truth_labels, cluster_labels)
        clustering_metric_dict['homogeneity_score'] = homogeneity_score(
            ground_truth_labels, cluster_labels)
        clustering_metric_dict[
            'normalized_mutual_info_score'] = normalized_mutual_info_score(
                ground_truth_labels, cluster_labels)
        clustering_metric_dict['adjusted_rand_score'] = adjusted_rand_score(
            ground_truth_labels, cluster_labels)
        clustering_metric_dict['completeness_score'] = completeness_score(
            ground_truth_labels, cluster_labels)

    return clustering_metric_dict

Exemplo n.º 7

0

Exibir arquivo

	def _score_clustering(self, labels,metric='vm'):
		# Score clustering compared to true model
		if metric=='fm':
			score = fowlkes_mallows_score(self.true_labels_, labels)
		elif metric=='ami':
			score = adjusted_mutual_info_score(self.true_labels_, labels)
		else:
			score = v_measure_score(self.true_labels_[labels>0], labels[labels>0])
		return score

Exemplo n.º 8

0

Exibir arquivo

def _clustering_evaluation(label, labels_true, digits):
    if labels_true is None:
        FM = None
        ARI = None
    else:
        ARI = round(adjusted_rand_score(labels_true, label), digits)
        FM = round(fowlkes_mallows_score(labels_true, label),digits)

    return ARI, FM

Exemplo n.º 9

0

Exibir arquivo

Arquivo: landmark_mf.py Projeto: ItayGabbay/ClusteringAlgorithmSelection

def get_landmarking(dataset_name, df):
    start = time.time()
    record = {'dataset': dataset_name.split('.')[0]}
    results = []
    n_samples = int(len(df)*0.1) if len(df) > 400 else min(df.shape[0], 40)
    data = df.sample(n=n_samples, replace=False)
    labels = get_dbscan(data)
    k = len(np.unique(labels))
    labels2 = get_Kmeans(data, k, 40)
    full_tree = DecisionTreeClassifier()
    full_tree.fit(data, labels)
    worst_attr = np.argmin(full_tree.feature_importances_)

    X_train, X_test, y_train, y_test = train_test_split(data, labels, test_size=0.3)
    best_stump = DecisionTreeClassifier(max_depth=1)
    random_stump = DecisionTreeClassifier(splitter="random", max_depth=1)
    worst_stump = DecisionTreeClassifier(max_depth=1)
    elite_knn = KNeighborsClassifier(n_neighbors=1)
    one_knn = KNeighborsClassifier(n_neighbors=1,
            algorithm="auto",
            weights="uniform",
            p=2,
            metric="minkowski")
    nb = GaussianNB()
    lda = LinearDiscriminantAnalysis()
    best_stump.fit(X_train, y_train)
    random_stump.fit(X_train, y_train)
    worst_stump.fit(X_train.iloc[:, worst_attr].values.reshape(-1, 1), y_train)
    elite_knn.fit(X_train, y_train)
    one_knn.fit(X_train, y_train)
    # lda.fit(X_train, y_train)
    nb.fit(X_train, y_train)

    record['LM1'] = np.log2(df.shape[0])
    record['LM2'] = np.log2(df.shape[1])
    record['LM3'] = accuracy_score(best_stump.predict(X_test), y_test)
    # record['LM4'] = f1_score(best_stump.predict(X_test), y_test, average='weighted')
    record['LM5'] = accuracy_score(random_stump.predict(X_test), y_test)
    # record['LM6'] = f1_score(random_stump.predict(X_test), y_test, average='weighted')
    # record['LM7'] = model.inertia_
    record['LM8'] = accuracy_score(elite_knn.predict(X_test), y_test)
    # record['LM9'] = f1_score(elite_knn.predict(X_test), y_test, average='weighted')
    # record['LM10'] = accuracy_score(lda.predict(X_test), y_test)
    # record['LM11'] = f1_score(lda.predict(X_test), y_test, average='weighted')
    record['LM12'] = accuracy_score(nb.predict(X_test), y_test)
    # record['LM13'] = f1_score(nb.predict(X_test), y_test, average='weighted')
    record['LM14'] = accuracy_score(one_knn.predict(X_test), y_test)
    # record['LM15'] = f1_score(one_knn.predict(X_test), y_test, average='weighted')
    record['LM16'] = accuracy_score(worst_stump.predict(X_test.iloc[:, worst_attr].values.reshape(-1, 1)), y_test)
    # record['LM17'] = f1_score(worst_stump.predict(X_test.iloc[:, worst_attr].values.reshape(-1, 1)), y_test, average='weighted')
    record['LM18'] = adjusted_rand_score(labels, labels2)
    record['LM19'] = adjusted_mutual_info_score(labels, labels2)
    record['LM20'] = completeness_score(labels, labels2)
    record['LM21'] = fowlkes_mallows_score(labels, labels2)

    end = time.time()
    return record, (df.shape[0], df.shape[1], end-start)

Exemplo n.º 10

0

Exibir arquivo

Arquivo: test_supervised.py Projeto: MartinThoma/scikit-learn

def test_int_overflow_mutual_info_fowlkes_mallows_score():
    # Test overflow in mutual_info_classif and fowlkes_mallows_score
    x = np.array([1] * (52632 + 2529) + [2] * (14660 + 793) + [3] * (3271 +
                 204) + [4] * (814 + 39) + [5] * (316 + 20))
    y = np.array([0] * 52632 + [1] * 2529 + [0] * 14660 + [1] * 793 +
                 [0] * 3271 + [1] * 204 + [0] * 814 + [1] * 39 + [0] * 316 +
                 [1] * 20)

    assert_all_finite(mutual_info_score(x, y))
    assert_all_finite(fowlkes_mallows_score(x, y))

Exemplo n.º 11

0

Exibir arquivo

def test_int_overflow_mutual_info_fowlkes_mallows_score():
    # Test overflow in mutual_info_classif and fowlkes_mallows_score
    x = np.array([1] * (52632 + 2529) + [2] * (14660 + 793) + [3] *
                 (3271 + 204) + [4] * (814 + 39) + [5] * (316 + 20))
    y = np.array([0] * 52632 + [1] * 2529 + [0] * 14660 + [1] * 793 +
                 [0] * 3271 + [1] * 204 + [0] * 814 + [1] * 39 + [0] * 316 +
                 [1] * 20)

    assert_all_finite(mutual_info_score(x, y))
    assert_all_finite(fowlkes_mallows_score(x, y))

Exemplo n.º 12

0

Exibir arquivo

Arquivo: sklearn_cluster_performance.py Projeto: ly-joy/NJU-Machine-Learning

def cluster_performance(y_true, y_pred):
    '''
    返回FM指数和Rand指数
    :param y_true:参考模型的簇划分，类型为ndarray
    :param y_pred:聚类模型给出的簇划分，类型为ndarray
    :return: FM指数，Rand指数
    '''
    FM = fowlkes_mallows_score(y_true, y_pred)
    Rand = adjusted_rand_score(y_true, y_pred)
    return FM, Rand

Exemplo n.º 13

0

Exibir arquivo

def cluster_performance(y_true, y_pred):
    '''
    返回FM指数和Rand指数
    :param y_true:参考模型的簇划分，类型为ndarray
    :param y_pred:聚类模型给出的簇划分，类型为ndarray
    :return: FM指数，Rand指数
    '''

    #********* Begin *********#
    return fowlkes_mallows_score(y_true,
                                 y_pred), adjusted_rand_score(y_true, y_pred)

Exemplo n.º 14

0

Exibir arquivo

def print_stats(x, y, quiet=True):
    ari = adjusted_rand_score(x, y)
    ami = adjusted_mutual_info_score(x, y)
    fms = fowlkes_mallows_score(x, y)

    if not quiet:
        print("ARI: {}".format(ari), file=sys.stderr)
        print("AMI: {}".format(ami), file=sys.stderr)
        print("FMS: {}".format(fms), file=sys.stderr)

    return ari, ami, fms

Exemplo n.º 15

0

Exibir arquivo

def cluster_performance(y_true, y_pred):
    """
    返回Rand指数和FM指数
    :param y_true:参考模型的簇划分，类型为ndarray
    :param y_pred:聚类模型给出的簇划分，类型为ndarray
    :return: Rand指数，FM指数
    """
    # ********* Begin *********#
    rand = adjusted_rand_score(y_true, y_pred)
    fm = fowlkes_mallows_score(y_true, y_pred)
    return fm, rand

Exemplo n.º 16

0

Exibir arquivo

def test_fowlkes_mallows_score_properties():
    # handcrafted example
    labels_a = np.array([0, 0, 0, 1, 1, 2])
    labels_b = np.array([1, 1, 2, 2, 0, 0])
    expected = 1. / np.sqrt((1. + 3.) * (1. + 2.))
    # FMI = TP / sqrt((TP + FP) * (TP + FN))

    score_original = fowlkes_mallows_score(labels_a, labels_b)
    assert_almost_equal(score_original, expected)

    # symmetric property
    score_symmetric = fowlkes_mallows_score(labels_b, labels_a)
    assert_almost_equal(score_symmetric, expected)

    # permutation property
    score_permuted = fowlkes_mallows_score((labels_a + 1) % 3, labels_b)
    assert_almost_equal(score_permuted, expected)

    # symmetric and permutation(both together)
    score_both = fowlkes_mallows_score(labels_b, (labels_a + 2) % 3)
    assert_almost_equal(score_both, expected)

Exemplo n.º 17

0

Exibir arquivo

Arquivo: test_supervised.py Projeto: MartinThoma/scikit-learn

def test_fowlkes_mallows_score_properties():
    # handcrafted example
    labels_a = np.array([0, 0, 0, 1, 1, 2])
    labels_b = np.array([1, 1, 2, 2, 0, 0])
    expected = 1. / np.sqrt((1. + 3.) * (1. + 2.))
    # FMI = TP / sqrt((TP + FP) * (TP + FN))

    score_original = fowlkes_mallows_score(labels_a, labels_b)
    assert_almost_equal(score_original, expected)

    # symmetric property
    score_symmetric = fowlkes_mallows_score(labels_b, labels_a)
    assert_almost_equal(score_symmetric, expected)

    # permutation property
    score_permuted = fowlkes_mallows_score((labels_a + 1) % 3, labels_b)
    assert_almost_equal(score_permuted, expected)

    # symmetric and permutation(both together)
    score_both = fowlkes_mallows_score(labels_b, (labels_a + 2) % 3)
    assert_almost_equal(score_both, expected)

Exemplo n.º 18

0

Exibir arquivo

Arquivo: porownanie.py Projeto: aleksandramiesiac/pd3

def Porownaj_algorytmy2(data, klasy, labels, baza):
    """
    Oblicza indeksy AM, AR i FM dla algorytmu napisanego przeze mnie.
    """
    wektor =[]
    
    #moj algorytm
    wynikM = spectral_clustering(data, k=klasy, M=5)
    wektor.append([fowlkes_mallows_score(labels,wynikM), adjusted_mutual_info_score(labels, wynikM),adjusted_rand_score(labels,wynikM),baza ])
    
    index=["Moj"]
    
    return pd.DataFrame(wektor, index = index, columns = ["FM","AM","AR", "Dane"])

Exemplo n.º 19

0

Exibir arquivo

Arquivo: EvaluateClustering.py Projeto: zxchen110/SEG-BERT

 def evaluate(self):
     eval_result_dict = {}
     eval_result_dict['ami'] = adjusted_mutual_info_score(
         self.data['true_y'], self.data['pred_y'])
     eval_result_dict['rand'] = adjusted_rand_score(self.data['true_y'],
                                                    self.data['pred_y'])
     eval_result_dict['comp'] = completeness_score(self.data['true_y'],
                                                   self.data['pred_y'])
     eval_result_dict['fow'] = fowlkes_mallows_score(
         self.data['true_y'], self.data['pred_y'])
     eval_result_dict['hom'] = homogeneity_score(self.data['true_y'],
                                                 self.data['pred_y'])
     eval_result_dict['nmi'] = normalized_mutual_info_score(
         self.data['true_y'], self.data['pred_y'])
     eval_result_dict['v_score'] = v_measure_score(self.data['true_y'],
                                                   self.data['pred_y'])
     return eval_result_dict

Exemplo n.º 20

0

Exibir arquivo

def cluster_hac(num_k):
    feature_ds, label_ds = read_dataset()

    user_max_id = num_k - 1
    sub_feature_ds = []
    sub_label_ds = []
    for i in range(0, len(label_ds)):
        if label_ds[i] <= user_max_id:
            sub_feature_ds.append(feature_ds[i])
            sub_label_ds.append(label_ds[i])

    feature_array = np.array(sub_feature_ds)

    x_scalar = StandardScaler()
    x = x_scalar.fit_transform(feature_array)

    pca = PCA(n_components=0.999)
    components = pca.fit_transform(x)
    hac = AgglomerativeClustering(n_clusters=num_k, linkage='average')
    hac.fit_predict(components)
    print(fowlkes_mallows_score(hac.labels_, sub_label_ds))

Exemplo n.º 21

0

Exibir arquivo

Arquivo: kmeans_test.py Projeto: EnderCheng/YelpLink

def cluster_kmeans(num_k):
    feature_ds, label_ds = read_dataset()

    user_max_id = num_k - 1
    sub_feature_ds = []
    sub_label_ds = []
    for i in range(0, len(label_ds)):
        if label_ds[i] <= user_max_id:
            sub_feature_ds.append(feature_ds[i])
            sub_label_ds.append(label_ds[i])

    feature_array = np.array(sub_feature_ds)

    x_scalar = StandardScaler()
    x = x_scalar.fit_transform(feature_array)

    pca = PCA(n_components=0.999)
    components = pca.fit_transform(x)
    kmeans = KMeans(n_clusters=num_k, random_state=0)
    kmeans.fit_predict(components)
    print(fowlkes_mallows_score(kmeans.labels_, sub_label_ds))

Exemplo n.º 22

0

Exibir arquivo

def compute_external_metrics(labels_true: List[str],
                             labels_pred: List[int]) -> ExternalEvaluation:
    if len(labels_true) == 0 and len(labels_pred) == 0:
        return None

    homogeneity, completeness, v_measure = homogeneity_completeness_v_measure(
        labels_true, labels_pred)
    adjusted_mutual_info = adjusted_mutual_info_score(labels_true, labels_pred)
    adjusted_rand_index = adjusted_rand_score(labels_true, labels_pred)
    fowlkes_mallows = fowlkes_mallows_score(labels_true, labels_pred)

    mat = contingency_matrix(labels_true, labels_pred)
    purity = purity_score(mat)
    inverse_purity = purity_score(mat, inverse=True)

    return ExternalEvaluation(homogeneity=homogeneity,
                              completeness=completeness,
                              v_measure=v_measure,
                              adjusted_mutual_information=adjusted_mutual_info,
                              adjusted_rand_index=adjusted_rand_index,
                              fowlkes_mallows=fowlkes_mallows,
                              purity=purity,
                              inverse_purity=inverse_purity)

Exemplo n.º 23

0

Exibir arquivo

Arquivo: func.py Projeto: swqs1989/ds598

 def externalEval(self, y_pred, true_label):
     true_label = np.array(true_label)
     n_cluster = len(set(true_label))
     y_pred_modi = y_pred.copy()
     result = [[] for i in range(len(set(y_pred)))]
     for i in range(len(y_pred)):
         result[y_pred[i]].append(i)
     dict1 = dict.fromkeys([i for i in range(n_cluster)], None)
     for i in list(dict1.keys()):
         dict1[i] = []
     nummostnum = 0
     for i in range(len(result)):
         if len(true_label[result[i]]) > 0:
             mostnum = Counter(true_label[result[i]]).most_common(1)[0][0]
             nummostnum += Counter(
                 true_label[result[i]]).most_common(1)[0][1]
             dict1[mostnum] += (result[i])
     for r in list(dict1.keys()):
         for i in dict1[r]:
             y_pred_modi[i] = r
     nmi = normalized_mutual_info_score(true_label, y_pred)
     purity = nummostnum / len(y_pred_modi)
     fowlkes_mallows = fowlkes_mallows_score(true_label, y_pred_modi)
     return nmi, purity, fowlkes_mallows

Exemplo n.º 24

0

Exibir arquivo

Arquivo: k_means.py Projeto: shubhampachori12110095/Machine_Learning

def kmeans(data, k):

    centroid = initialize_centroids(data, k)
    a = np.zeros((k, k))
    b = np.zeros(k)
    c1 = np.zeros((k, k))
    d = np.zeros(k)
    clusnew = np.zeros(len(data))
    i = 1
    while (i < 100):
        clusters = closest_centroid(data, centroid, k)
        for l in range(0, k):
            centroid[l, :] = np.mean(data[(np.where(clusters == l))], axis=0)
        i = i + 1
        print i
    c = confusion_matrix(clusters, digits.target)
    for j in range(0, k):
        c1[j, :] = c[:, (np.argmax(c[j, :]))]
        clusnew[clusters == (np.argmax(c[j, :]))] = j
        d[j] = sum(c1[:, j])
    c1[:, (np.argmin(d))] = -1
    print('Confusion Matrix: ', c1)
    print('Fowlkes Mallows Score: ',
          fowlkes_mallows_score(digits.target, clusnew))

Exemplo n.º 25

0

Exibir arquivo

Arquivo: eval.py Projeto: ChahatBansal8060/2018ANZ8060_HW2

output_file = sys.argv[1]
correct_file = sys.argv[2]

values = np.loadtxt(correct_file, dtype=int)
num_lines = sum(1 for line in open(correct_file))
result = np.zeros(num_lines)

cur_clus = -1

with open(output_file) as f:
    content = f.readlines()

for i in range(0, len(content)):
    if (content[i][0] == '#'):
        cur_clus += 1
        continue

    result[int(content[i])] = cur_clus

net_score = fowlkes_mallows_score(values, result)
'''
unique, counts = np.unique(result, return_counts=True)
print np.asarray((unique, counts)).T

unique, counts = np.unique(values, return_counts=True)
print np.asarray((unique, counts)).T
'''
logs = open('DBSCANLogs.txt', 'a')
logs.write(str(net_score) + '\n')
logs.close()

Exemplo n.º 26

0

Exibir arquivo

Arquivo: clustering.py Projeto: rtrad89/authorship_clustering_code_repo

    def _eval_clustering(self, labels_true, labels_predicted):
        # To address when COP-KMeans fails to satisfy all constraints at a k:
        if labels_predicted is None:
            # return an empty dictionary to expose in the final output
            return {"nmi": None,
                    "ami": None,
                    "ari": None,
                    "fms": None,
                    "v_measure": None,
                    "bcubed_precision": None,
                    "bcubed_recall": None,
                    "bcubed_fscore": None,
                    "Silhouette": None,
                    "Calinski_harabasz": None,
                    "Davies_Bouldin": None
                    }

        nmi = normalized_mutual_info_score(labels_true,
                                           labels_predicted,
                                           average_method="max")

        ami = adjusted_mutual_info_score(labels_true,
                                         labels_predicted,
                                         average_method="arithmetic")

        ari = adjusted_rand_score(labels_true,
                                  labels_predicted)

        v_measure = v_measure_score(labels_true,
                                    labels_predicted,
                                    beta=1.0)

        fms = fowlkes_mallows_score(labels_true,
                                    labels_predicted)

        # Reshape labels for BCubed measures
        true_dict = self._reshape_labels_as_dicts(labels_true)
        pred_dict = self._reshape_labels_as_dicts(labels_predicted)

        bcubed_precision = bcubed.precision(cdict=pred_dict, ldict=true_dict)
        bcubed_recall = bcubed.recall(cdict=pred_dict, ldict=true_dict)
        bcubed_f1 = bcubed.fscore(bcubed_precision, bcubed_recall)

        # =====================================================================
        # Unsupervised Metrics
        # =====================================================================
        if not labels_predicted.nunique() in (1, len(self.data)):
            sil = silhouette_score(X=self.data,
                                   labels=labels_predicted,
                                   metric=self.distance_metric,
                                   random_state=13712)

            ch = calinski_harabasz_score(X=self.data, labels=labels_predicted)

            dv = davies_bouldin_score(X=self.data, labels=labels_predicted)
        else:
            sil = None
            ch = None
            dv = None

        ret = {}
        ret.update({"nmi": round(nmi, 4),
                    "ami": round(ami, 4),
                    "ari": round(ari, 4),
                    "fms": round(fms, 4),
                    "v_measure": round(v_measure, 4),
                    "bcubed_precision": round(bcubed_precision, 4),
                    "bcubed_recall": round(bcubed_recall, 4),
                    "bcubed_fscore": round(bcubed_f1, 4),
                    "Silhouette": round(sil, 4
                                        ) if sil is not None else None,
                    "Calinski_harabasz": round(ch, 4
                                               ) if ch is not None else None,
                    "Davies_Bouldin": round(dv, 4
                                            ) if dv is not None else None
                    # Here goes the unsupervised indices
                    })

        return ret

Exemplo n.º 27

0

Exibir arquivo

def _fm(labels, labels_true,digits):
    return round(fowlkes_mallows_score(labels_true, labels),digits)

Exemplo n.º 28

0

Exibir arquivo

data_copy = copy.copy(data)

# Drop the class
inputs = data.drop('species', axis=1)

# Test from n_clusters = 2 until n_clusters = 6
for n_clusters in range(2, 6 + 1):
    # Fowkes-Mallows and Silhouette evaluation:
    agglo = Agglomerative(n_clusters=n_clusters)
    agglo.fit(inputs)
    labels = np.array(agglo.predict(inputs))

    print("n_clusters =", n_clusters)

    print("Menggunakan metode Fowlkes-Mallows: ")
    fowlkes_mallows = fowlkes_mallows_score(labels, target)
    print("Fowlkes Mallows Score:", fowlkes_mallows)

    print("Menggunakan metode Silhouette:")
    silhouette_avg = silhouette_score(inputs, labels)
    print("Hasil rata-rata skor silhouette:", silhouette_avg)
    print()
    print()

    silhouette_values_per_point = silhouette_samples(inputs, labels)

    # Visualize Silhouette subplot
    # 1 row and 2 columns: Left -> silhouette plot and Right -> Cluster Visualization
    fig, silhouette_viz = plt.subplots(1)
    fig.set_size_inches(18, 7)

Exemplo n.º 29

0

Exibir arquivo

def report_clustering(distance_file,
                      biom_file,
                      metadata_file,
                      num_clusters,
                      verbose,
                      L=2,
                      output_file=None):
    if not isinstance(distance_file, list):
        distance_matrix = CSV.read(distance_file)
    else:
        distance_matrix = distance_file

    if output_file is not None:
        f = open(output_file, 'w')

    output_matrix = []

    AgglomerativeCluster = AgglomerativeClustering(
        n_clusters=num_clusters, affinity='precomputed',
        linkage='complete').fit_predict(distance_matrix)
    KMedoidsCluster = KMedoids(n_clusters=num_clusters,
                               metric='precomputed',
                               method='pam',
                               init='heuristic').fit_predict(distance_matrix)

    PCoA_Samples = BW.extract_samples(biom_file)
    metadata = meta.extract_metadata(metadata_file)
    region_names = []
    for i in range(len(PCoA_Samples)):
        if metadata[PCoA_Samples[i]]['body_site'] not in region_names:
            region_names.append(metadata[PCoA_Samples[i]]['body_site'])
        PCoA_Samples[i] = region_names.index(
            metadata[PCoA_Samples[i]]['body_site'])

    if verbose and L == 1:
        print('Printing results for L1-UniFrac:')
    elif verbose and L == 2:
        print('Printing results for L2-UniFrac:')
    if verbose:
        print('Metric\t\t\t\t\t\t\tAgglomerativeClustering\t\tKMedoids')

    if output_file is not None:
        if L == 1:
            f.write('Printing results for L1-UniFrac:\n')
        elif L == 2:
            f.write('Printing results for L2-UniFrac:\n')
        f.write('Metric\t\t\t\tAgglomerativeClustering\t\t\tKMedoids\n')

    if L == 1:
        output_matrix.append(['Printing results for L1-UniFrac:'])
    if L == 2:
        output_matrix.append(['Printing results for L2-UniFrac:'])
    output_matrix.append(['Metric', 'AgglomerativeClustering', 'KMedoids'])

    RI1 = rand_score(PCoA_Samples, AgglomerativeCluster)
    RI2 = rand_score(PCoA_Samples, KMedoidsCluster)
    if verbose:
        print(f'Rand Index Score:               {RI1}\t\t\t{RI2}')
    ARI1 = adjusted_rand_score(PCoA_Samples, AgglomerativeCluster)
    ARI2 = adjusted_rand_score(PCoA_Samples, KMedoidsCluster)
    if verbose:
        print(f'Adjusted Rand Index Score:      {ARI1}\t\t\t{ARI2}')
    NMI1 = normalized_mutual_info_score(PCoA_Samples, AgglomerativeCluster)
    NMI2 = normalized_mutual_info_score(PCoA_Samples, KMedoidsCluster)
    if verbose:
        print(f'Normalized Mutual Index Score:  {NMI1}\t\t\t{NMI2}')
    AMI1 = adjusted_mutual_info_score(PCoA_Samples, AgglomerativeCluster)
    AMI2 = adjusted_mutual_info_score(PCoA_Samples, KMedoidsCluster)
    if verbose:
        print(f'Adjusted Mutual Info Score:     {AMI1}\t\t\t{AMI2}')
    FM1 = fowlkes_mallows_score(PCoA_Samples, AgglomerativeCluster)
    FM2 = fowlkes_mallows_score(PCoA_Samples, KMedoidsCluster)
    if verbose:
        print(f'Fowlkes Mallows Score:          {FM1}\t\t\t{FM2}')

    if output_file is not None:
        f.write(f'Rand Index Score:               {RI1}\t\t\t{RI2}\n')
        f.write(f'Adjusted Rand Index Score:      {ARI1}\t\t\t{ARI2}\n')
        f.write(f'Normalized Mutual Index Score:  {NMI1}\t\t\t{NMI2}\n')
        f.write(f'Adjusted Mutual Info Score:     {AMI1}\t\t\t{AMI2}\n')
        f.write(f'Fowlkes Mallows Score:          {FM1}\t\t\t{FM2}\n')

    output_matrix.append(['Rand Index Score:', RI1, RI2])
    output_matrix.append(['Adjusted Rand Index Score:', ARI1, ARI2])
    output_matrix.append(['Normalized Mutual Index Score:', NMI1, NMI2])
    output_matrix.append(['Adjusted Mutual Info Score:', AMI1, AMI2])
    output_matrix.append(['Fowlkes Mallows Score:', FM1, FM2])

    return output_matrix

Exemplo n.º 30

0

Exibir arquivo

from time import time
import numpy as np
from scipy import ndimage
from matplotlib import pyplot as plt
from sklearn import manifold, datasets
from sklearn.cluster import AgglomerativeClustering
from sklearn.metrics import confusion_matrix
from sklearn.metrics.cluster import fowlkes_mallows_score
from sklearn.preprocessing import scale
digits = datasets.load_digits(n_class=10)
X = scale(digits.data)
y = digits.target
n_samples, n_features = X.shape
X_red = manifold.SpectralEmbedding(n_components=2).fit_transform(X)
clusnew = np.zeros(len(X))
clustering = AgglomerativeClustering(linkage='ward', n_clusters=10)
t0 = time()
clustering.fit(X_red)
print("%s : %.2fs" % ('ward', time() - t0))
c1 = np.zeros((10, 10))
d = np.zeros(10)
c = confusion_matrix(clustering.labels_, y)
for j in range(0, 10):
    c1[j, :] = c[:, (np.argmax(c[j, :]))]
    clusnew[clustering.labels_ == (np.argmax(c[j, :]))] = j
    d[j] = sum(c1[:, j])
c1[:, (np.argmin(d))] = -1
print('Confusion Matrix: ', c1)
print('Fowlkes Mallows Score: ', fowlkes_mallows_score(y, clusnew))

Exemplo n.º 31

0

Exibir arquivo

with open(fi, 'w') as outfile:
    json.dump(label_test.tolist(), outfile)
fi = os.getcwd() + "/svm/json/label_train.json"
with open(fi, 'w') as outfile:
    json.dump(label_train.tolist(), outfile)

print "\nLinear SVC: "
Classifier = svm.SVC(kernel='linear', probability=True)
Classifier.fit(feature_train, label_train)
joblib.dump(Classifier, 'linear_2.pkl')
print "predicting.."
predict = Classifier.predict(feature_test)
print "Expected output:", label_test
print "Predicted output:", predict
print "Confusion Matrix:\n", metrics.confusion_matrix(label_test, predict)
print "Fowlkes Mallows Score", fowlkes_mallows_score(label_test, predict)

try:
    print "Precision Score", precision_score(label_test, predict)
    print "Recall Score", recall_score(label_test, predict)
    print "F-measure", f1_score(label_test, predict)
    # exit()
except:
    pass

print "\nRBF SVC: "
Classifier = svm.SVC(kernel='rbf')
Classifier.fit(feature_train, label_train)
joblib.dump(Classifier, 'rbf_2.pkl')
print "predicting.."
predict = Classifier.predict(feature_test)

Exemplo n.º 32

0

Exibir arquivo

    def compute_scores(self, x):

        self.cluster_labels = np.ndarray((x.shape[0], ))

        for i in range(0, x.shape[0], self.batch_size):
            predictions = self.kmeans.predict(x[i:(i + self.batch_size)])
            self.cluster_labels[i:(i + self.batch_size)] = predictions

        if (i + self.batch_size) > x.shape[0]:
            predictions = self.kmeans.predict(x[i:x.shape[0]])
            self.cluster_labels[i:x.shape[0]] = predictions

        confusion_matrix = cscores.contingency_matrix(self.labels_true,
                                                      self.labels_pred)
        purity_score = np.sum(np.amax(confusion_matrix,
                                      axis=0)) / np.sum(confusion_matrix)
        homogeneity_score, completeness_score, v_measure_score = cscores.homogeneity_completeness_v_measure(
            self.labels_true, self.labels_pred)

        scores = [
            #['calinski_harabasz_score', 'internal', cscores.calinski_harabasz_score(x, self.cluster_labels)],
            [
                'davies_bouldin_score', 'internal',
                metrics.davies_bouldin_score(x, self.cluster_labels)
            ],
            [
                'silhouette_score', 'internal',
                metrics.silhouette_score(x, self.cluster_labels)
            ],
            #['silhouette_samples', 'internal', cscores.silhouette_samples(x, self.cluster_labels)],
            ['purity_score', 'external', purity_score],
            [
                'adjusted_rand_score', 'external',
                cscores.adjusted_rand_score(self.labels_true, self.labels_pred)
            ],
            ['completeness_score', 'external', completeness_score],
            [
                'fowlkes_mallows_score', 'external',
                cscores.fowlkes_mallows_score(self.labels_true,
                                              self.labels_pred)
            ],
            ['homogeneity_score', 'external', homogeneity_score],
            [
                'adjusted_mutual_info_score', 'external',
                cscores.adjusted_mutual_info_score(self.labels_true,
                                                   self.labels_pred)
            ],
            [
                'mutual_info_score', 'external',
                cscores.mutual_info_score(self.labels_true, self.labels_pred)
            ],
            [
                'normalized_mutual_info_score', 'external',
                cscores.normalized_mutual_info_score(self.labels_true,
                                                     self.labels_pred)
            ],
            ['v_measure_score', 'external', v_measure_score]
        ]

        scores = pd.DataFrame(scores, columns=['name', 'type', 'score'])
        scores.to_csv(files.small_images_classes_kmeans_scores, index=False)

Exemplo n.º 33

0

Exibir arquivo

	n_samples, n_features = X.shape
	np.random.seed(0)
	k=10
	labels_y = list(set(y))

	print 50*"_"
	print "KMeans clustering (implementation of algo from question 1a)"
	no_of_iterations = 10
	dat = {i:0 for i in range(n_samples)} 
	t0 = time()
	k_centers, dat = k_means(X, dat, k,no_of_iterations) 
	y_pred1 = [value for key,value in dat.iteritems()]
	c_m1 = confusion_matrix(y,y_pred1, labels_y)	
	print "PROTOCOL1: The cluster predictions for 10 clusters, i.e k = 10 are:\n",getClusterRepresentatives(c_m1, k)
	print "PROTOCOL2: Confusion Matrix: \n",c_m1
	print "PROTOCOL3: Fowlkes-Mallows score:", fowlkes_mallows_score(y, y_pred1)
	print "Time taken: %.2fs" % (time() - t0)

	print 50*"_"
	print "KMeans clustering (using sklearn)"
	clustering1 = KMeans(n_clusters=k, init='k-means++', n_init=10, max_iter=300, tol=0.0001, precompute_distances='auto', verbose=0, random_state=None, copy_x=True, n_jobs=1, algorithm='auto')
	t01 = time()
	y_pred11 = clustering1.fit_predict(X)
	c_m11 = confusion_matrix(y,y_pred11)	
	print "PROTOCOL1: The cluster predictions for 10 clusters, i.e k = 10 are:\n",getClusterRepresentatives(c_m11, k)
	print "PROTOCOL2: Confusion Matrix: \n",c_m11
	print "PROTOCOL3: Fowlkes-Mallows score:", fowlkes_mallows_score(y, y_pred11)
	print "Time taken: %.2fs" % (time() - t01)

	print 50*"_"
	print "Agglomerative Clustering with Ward linkage"