def run_k_means_on_loan_data(path): data_set = 'loan' x_train, y_train = load_data(path + 'data/' + data_set + '/train/') estimator = GaussianMixture(n_components=3, random_state=0) estimator.fit(x_train) predictions = estimator.predict(x_train) ss = metrics.silhouette_score(x_train, predictions, metric='euclidean', sample_size=300) print("%.6f" % homogeneity_score(y_train, predictions)) print("%.6f" % ss) estimator = KMeans(n_clusters=3, random_state=0) estimator.fit(x_train) predictions = estimator.predict(x_train) ss = metrics.silhouette_score(x_train, estimator.labels_, metric='euclidean', sample_size=300) print("%.6f" % homogeneity_score(y_train, predictions)) print("%.6f" % ss) ss = metrics.silhouette_score(x_train, predictions, metric='euclidean', sample_size=300) print("%.6f" % ss)
def test_full_vs_elkan(): km1 = KMeans(algorithm='full', random_state=13) km2 = KMeans(algorithm='elkan', random_state=13) km1.fit(X) km2.fit(X) homogeneity_score(km1.predict(X), km2.predict(X)) == 1.0
def reduce_EM_and_score(k, x_train, y_train, x_test, y_test, f): print('scoring...') kmeans = GaussianMixture(n_components=k, random_state=0).fit(x_train) base_predictions = kmeans.predict(x_test) pca = TruncatedSVD(n_components=k) pca_x_train = pca.fit_transform(x_train) pca_x_test = pca.transform(x_test) kmeans = GaussianMixture(n_components=k, random_state=0).fit(pca_x_train) predictions = kmeans.predict(pca_x_test) base_score = homogeneity_score(base_predictions, predictions) true_score = homogeneity_score(y_test, predictions) f.write('%.3f\t%.3f\t%.3f\n' % (base_score, true_score, 0.0))
def reduce_kmeans_and_score(k, x_train, y_train, x_test, y_test, f): print('scoring...') kmeans = KMeans(n_clusters=k, random_state=0).fit(x_train) base_predictions = kmeans.predict(x_test) pca = FastICA(n_components=k) pca_x_train = pca.fit_transform(x_train) pca_x_test = pca.transform(x_test) kmeans = KMeans(n_clusters=k, random_state=0).fit(pca_x_train) predictions = kmeans.predict(pca_x_test) base_score = homogeneity_score(base_predictions, predictions) true_score = homogeneity_score(y_test, predictions) f.write('%.3f\t%.3f\t%.3f\n' % (base_score, true_score, 0.0))
def cluster(train, val, type, number_of_clusters, plot_folder, classes): # todo this should be a class if type == "spectral_clustering": clustering_model = SpectralClustering(n_clusters=number_of_clusters, assign_labels="discretize", random_state=0).fit( train["data"]) elif type == "kmeans": clustering_model = KMeans(n_clusters=number_of_clusters, random_state=0).fit(train["data"]) else: raise NotImplementedError # compute metrics accuracies = {} random_array = np.random.randint(9, size=train["labels"].shape) centroids = find_centroids(number_of_clusters, train, clustering_model.labels_) test_classifications = cluster_test(val, centroids) visualize_clustering(train, clustering_model.labels_, type + "_training", plot_folder, number_of_clusters, centroids) visualize_clustering(val, np.asarray(test_classifications), type + "_validation", plot_folder, number_of_clusters, centroids) accuracies["random_score"] = homogeneity_score(train["labels"], random_array) accuracies["v_measure_score"] = v_measure_score(train["labels"], clustering_model.labels_) accuracies["homogeneity_score"] = homogeneity_score( train["labels"], clustering_model.labels_) accuracies["completeness_score"] = completeness_score( train["labels"], clustering_model.labels_) accuracies["silhouette_score"] = silhouette_score(train["data"], clustering_model.labels_) accuracies["purity_score"], accuracies[ "contingency_matrix"] = purity_score(train["labels"], clustering_model.labels_) accuracies["v_measure_score_test"] = v_measure_score( val["labels"], test_classifications) accuracies["homogeneity_score_test"] = homogeneity_score( val["labels"], test_classifications) accuracies["completeness_score_test"] = completeness_score( val["labels"], test_classifications) accuracies["silhouette_score_test"] = silhouette_score( val["data"], test_classifications) accuracies["purity_score_test"], accuracies[ "contingency_matrix_test"] = purity_score(val["labels"], test_classifications) return accuracies
def fit_and_score_model(X, y_train, y_test, id_train, id_test): X_train = X[id_train, :] X_test = X[id_test, :] # k means kmeans = KMeans(n_clusters=2, random_state=0) kmeans.fit(X_train) # score print("Homogeneity score (training):", homogeneity_score(y_train, kmeans.labels_)) print( "Homogeneity score (test):", homogeneity_score(y_test, kmeans.predict(X_test)), )
def basic_stats(filename, mode='normal', num_phrases=5): d = AutoDupCoarse(filename, mode=mode, num_phrases=num_phrases) d.clustering() #d.print_clusters() #label_list, graph = post_process(d) #non_singleton = [len(comp) for comp in nx.connected_components(graph) if len(comp) > 1] true_labels = d.data['cluster_label'].values labels = d.labels print('\n{} stats: with noise in one cluster'.format(mode)) print('ari', adjusted_rand_score(true_labels, labels)) print('hom', homogeneity_score(true_labels, labels)) print('nmi', normalized_mutual_info_score(true_labels, labels)) indices = [i for i, label in enumerate(true_labels) if label != -1] true_labels_noise = [ label if i in indices else i for i, label in enumerate(true_labels) ] labels_noise = [ label if i in indices else i for i, label in enumerate(labels) ] print('\n{} stats: with noise in all cluster'.format(mode)) print('ari', adjusted_rand_score(true_labels_noise, labels_noise)) print('hom', homogeneity_score(true_labels_noise, labels_noise)) print('nmi', normalized_mutual_info_score(true_labels_noise, labels_noise)) true_labels_no_noise = [ label for i, label in enumerate(true_labels) if i in indices ] labels_no_noise = [label for i, label in enumerate(labels) if i in indices] print('\n{} stats: without noise'.format(mode)) print('ari', adjusted_rand_score(true_labels_no_noise, labels_no_noise)) print('hom', homogeneity_score(true_labels_no_noise, labels_no_noise)) print('nmi', normalized_mutual_info_score(true_labels_no_noise, labels_no_noise)) ''' print('num components:', nx.number_connected_components(graph)) print('num non-singleton:', len(non_singleton)) print('max component:', max(non_singleton)) print('min component:', min(non_singleton)) print('avg component:', sum(non_singleton) / len(non_singleton)) print('nodes:', graph.number_of_nodes()) ''' num_components = nx.number_connected_components(graph) print('num_components', num_components) return num_components, len(non_singleton), max( non_singleton), sum(non_singleton) / len(non_singleton), d
def main(): m = 2000 # number of points n = 50 # Number of dimensions k = 30 # Number of latent clusters np.random.seed(3) X, z_true = draw_points(m, n, k=k) show_points(X, z_true, title="True") S = fully_connected_similarity(X) # Unnormalized spectral clustering # A = spectral_clustering(S, k) # Normalized spectral clustering according to Shi and Malik (2000) # A = spectral_clustering(S, k, normalization=LaplacianNorm.symmetric, generalized_eigenproblem=True) # Normalized spectral clustering according to Ng, Jordan, and Weiss (2002) A = spectral_clustering(S, k, normalization=LaplacianNorm.symmetric, norm_rows=True) show_points(X, A, title="Spectral Clustering") complete_score = completeness_score(z_true, A) homog_score = homogeneity_score(z_true, A) print("Completeness score: %s" % complete_score) print("Homogeneity score: %s" % homog_score)
def calc_homogenity_comp_vmeas_training(df, y_train): # user_input_df['predict'] = y1 # confidence_counter = -1 # for c in candidates: # confidence_counter += 1 # adj = c.getSource() # noun = c.getTarget() # candidate_df = user_input_df.loc[(user_input_df['adj'] == adj) & (user_input_df['noun'] == noun)] # print(candidate_df["adj"][confidence_counter]) # print(candidate_df["noun"][confidence_counter]) # if candidate_df["class"][confidence_counter] != 2: # truelabels.append(candidate_df["class"][confidence_counter]) # predictlabels.append(candidate_df["predict"][confidence_counter]) # print("truelables:",truelabels) # print("predictlabels:",predictlabels) # homogenity_scr = homogeneity_score(truelabels,predictlabels) # vmeasure_scr = v_measure_score(truelabels,predictlabels) # completness_scr =completeness_score(truelabels,predictlabels) # print("homogenity_scr={},vmeasure_scr={},completness_scr={}".format(homogenity_scr,vmeasure_scr,completness_scr)) truelabels = df['class'] predictlabels = y_train homogenity_scr = homogeneity_score(truelabels, predictlabels) vmeasure_scr = v_measure_score(truelabels, predictlabels) completness_scr = completeness_score(truelabels, predictlabels) print("truelables:", truelabels) print("predictlabels:", predictlabels) print("homogenity_scr={},vmeasure_scr={},completness_scr={}".format( homogenity_scr, vmeasure_scr, completness_scr))
def get_purity(c, data, label_data, linkage_proc='ward'): #hierarchical cluster = AgglomerativeClustering(n_clusters=c, affinity='euclidean', linkage=linkage_proc) data_p = cluster.fit_predict(data) return homogeneity_score(label_data, data_p)
def evaluate_bins(self): self.labels = [] newcolors = [] for bin in self.bins: for b in bin: self.labels.append(self.bins.index(bin)) if self.colors != None: newcolors.append(self.colors[b]) self.colors = newcolors labels = numpy.array(self.labels) colors = numpy.array(self.colors) points = [] for bin in self.bins: for b in bin: start_lat = self.data[b]['trip_start_location'][1] start_lon = self.data[b]['trip_start_location'][0] end_lat = self.data[b]['trip_end_location'][1] end_lon = self.data[b]['trip_end_location'][0] path = [start_lat, start_lon, end_lat, end_lon] points.append(path) if self.colors != None: a = metrics.silhouette_score(numpy.array(points), labels) b = homogeneity_score(colors, labels) c = completeness_score(colors, labels) print 'number of bins is ' + str(len(self.bins)) print 'silhouette score is ' + str(a) print 'homogeneity is ' + str(b) print 'completeness is ' + str(c) print 'accuracy is ' + str(((a+1)/2.0 + b + c)/3.0)
def show_result(self, prediction, msg): new_line(50) print(msg) new_line(50) real = self.train_labels print "Confusion Matrix: " print str(confusion_matrix(real, prediction)) homo_score = homogeneity_score(real, prediction) complete_score = completeness_score(real, prediction) v_score = v_measure_score(real, prediction) rand_score = adjusted_rand_score(real, prediction) mutual_info = adjusted_mutual_info_score(real, prediction) print("Homogeneity Score: %0.3f" % homo_score) print("Completeness Score: %0.3f" % complete_score) print("V-measure: %0.3f" % v_score) print("Adjusted Rand Score: %0.3f" % rand_score) print("Adjusted Mutual Info Score: %0.3f\n" % mutual_info) return { 'Homogeneity': homo_score, 'Completeness': complete_score, 'V-measure': v_score, 'RAND': rand_score, 'Mutual': mutual_info }
def get_clustering_metrics(train_data, cluster_labels, ground_truth_labels=None): clustering_metric_dict = dict({}) clustering_metric_dict['silhouette_score'] = silhouette_score( train_data, cluster_labels, random_state=42) clustering_metric_dict[ 'calinski_harabasz_score'] = calinski_harabasz_score( train_data, cluster_labels) clustering_metric_dict['davies_bouldin_score'] = davies_bouldin_score( train_data, cluster_labels) if ground_truth_labels is not None: clustering_metric_dict['v_measure_score'] = v_measure_score( ground_truth_labels, cluster_labels) clustering_metric_dict[ 'fowlkes_mallows_score'] = fowlkes_mallows_score( ground_truth_labels, cluster_labels) clustering_metric_dict['homogeneity_score'] = homogeneity_score( ground_truth_labels, cluster_labels) clustering_metric_dict[ 'normalized_mutual_info_score'] = normalized_mutual_info_score( ground_truth_labels, cluster_labels) clustering_metric_dict['adjusted_rand_score'] = adjusted_rand_score( ground_truth_labels, cluster_labels) clustering_metric_dict['completeness_score'] = completeness_score( ground_truth_labels, cluster_labels) return clustering_metric_dict
def compute_V_measure(clusters, classes): class_list, cluster_list = [], [] # not_found_id = 1000000 clustered_but_unaligned = 0 for read in clusters: if read in classes: class_list.append(classes[read]) cluster_list.append(clusters[read]) else: # print("Read was clustered but unaligned:", read) clustered_but_unaligned += 1 # added the unprocessed reads to the measure not_clustered = set(classes.keys()) - set(clusters.keys()) highest_cluster_id = max(clusters.values()) highest_cluster_id += 1 for read in not_clustered: class_list.append(classes[read]) cluster_list.append(highest_cluster_id) highest_cluster_id += 1 v_score = v_measure_score(class_list, cluster_list) compl_score = completeness_score(class_list, cluster_list) homog_score = homogeneity_score(class_list, cluster_list) ari = adjusted_rand_score(class_list, cluster_list) print("Not included in clustering but aligned:", len(not_clustered)) print("V:", v_score, "Completeness:", compl_score, "Homogeneity:", homog_score) print( "Nr reads clustered but unaligned (i.e., no class and excluded from V-measure): ", clustered_but_unaligned) return v_score, compl_score, homog_score, clustered_but_unaligned, ari
def compareAB(A, B, X): #measures the similarity of the two assignments, ignoring permutations and with chance normalization ars = metrics.adjusted_rand_score(A, B) ars_str = '%17.3f' % ars # each cluster contains only members of a single class hs = homogeneity_score(A, B) hs_str = '%17.3f' % hs #all members of a given class are assigned to the same cluster cs = completeness_score(A, B) cs_str = '%17.3f' % cs vms = metrics.v_measure_score(A, B) vms_str = '%17.3f' % vms # geometric mean of the pairwise precision and recall fowlkes_mallows_score = metrics.fowlkes_mallows_score(A, B) fms_str = '%17.3f' % fowlkes_mallows_score sc = metrics.silhouette_score(X, B, metric='euclidean') sc_str = '%17.3f' % sc my_str = ars_str + "&" + hs_str + "&" + cs_str + "&" + vms_str + "&" + fms_str + "&" + sc_str return my_str
def correlation(self, X, Y, heatmap=False): nb_classes = len(set(Y)) print nb_classes km = KMeans(n_clusters=nb_classes, random_state=0).fit(X) label_kmeans = km.labels_ purity = metric.compute_purity(label_kmeans, Y) nmi = normalized_mutual_info_score(Y, label_kmeans) ari = adjusted_rand_score(Y, label_kmeans) homogeneity = homogeneity_score(Y, label_kmeans) ami = adjusted_mutual_info_score(Y, label_kmeans) print('NMI = {}, ARI = {}, Purity = {},AMI = {}, Homogeneity = {}'. format(nmi, ari, purity, ami, homogeneity)) if heatmap: x_ticks = [''] * len(Y) y_ticks = [''] * len(Y) idx = [] for i in range(nb_classes): sub_idx = [j for j, item in enumerate(Y) if item == i] idx += [j for j, item in enumerate(Y) if item == i] x_ticks[len(idx) - 1] = str(i) assert len(idx) == len(Y) X = X[idx, :] Y = Y[idx] #similarity_mat = pairwise_distances(X,metric='cosine') similarity_mat = cosine_similarity(X) #sns.heatmap(similarity_mat,cmap='Blues') fig, ax = plt.subplots() #ax.set_yticks(range(len(y_ticks))) ax.set_yticklabels(y_ticks) ax.set_xticks(range(len(x_ticks))) ax.set_xticklabels(x_ticks) im = ax.imshow(similarity_mat, cmap='Blues') plt.colorbar(im) plt.savefig('heatmap_%s_dim%d.png' % (self.name, X.shape[1]), dpi=600)
def evaluate(self,timestamp,batch_idx): if has_label: data_y, label_y = self.y_sampler.load_all() else: data_y = self.y_sampler.load_all() data_x_, data_x_onehot_ = self.predict_x(data_y) label_infer = np.argmax(data_x_onehot_, axis=1) print([list(label_infer).count(item) for item in np.unique(label_infer)]) print([list(label_y).count(item) for item in np.unique(label_y)]) # label_kmeans = KMeans(n_clusters=4, n_init = 200).fit_predict(data_y) # ari = adjusted_rand_score(label_y, label_kmeans) # nmi = normalized_mutual_info_score(label_y, label_kmeans) # print('kmeans',ari,nmi,self.nb_classes) # sys.exit() if has_label: purity = metric.compute_purity(label_infer, label_y) nmi = normalized_mutual_info_score(label_y, label_infer) ari = adjusted_rand_score(label_y, label_infer) h**o = homogeneity_score(label_y,label_infer) print('scDEC: NMI = {}, ARI = {}, Homogeneity = {}'.format(nmi,ari,h**o)) if is_train: f = open('%s/log.txt'%self.save_dir,'a+') f.write('NMI = {}\tARI = {}\tHomogeneity = {}\t batch_idx = {}\n'.format(nmi,ari,h**o,batch_idx)) f.close() np.savez('{}/data_at_{}.npz'.format(self.save_dir, batch_idx+1),data_x_,data_x_onehot_,label_y) else: np.savez('results/{}/data_pre.npz'.format(self.data),data_x_,data_x_onehot_,label_y) else: if is_train: np.savez('{}/data_at_{}.npz'.format(self.save_dir, batch_idx+1),data_x_,data_x_onehot_) else: np.savez('results/{}/data_pre.npz'.format(self.data),data_x_,data_x_onehot_)
def test_full_vs_elkan(): km1 = KMeans(algorithm='full', random_state=13).fit(X) km2 = KMeans(algorithm='elkan', random_state=13).fit(X) assert homogeneity_score( km1.predict(X), km2.predict(X) ) == pytest.approx(1.0)
def reduce_kmeans_and_score(k, x_train, y_train, x_test, y_test, f): print('scoring...') kmeans = KMeans(n_clusters=k, random_state=0).fit(x_train) base_predictions = kmeans.predict(x_test) true_score = homogeneity_score(base_predictions, y_test) f.write('%.3f\t%.3f\n' % (true_score, 0.0))
def get_homogeneity(self, cat_y, label): #get matched (values, indices) = cat_y.max(dim=1) pred_label = indices.view(-1).cpu().data.numpy() true_label = label.view(-1).cpu().data.numpy() return homogeneity_score(true_label, pred_label)
def reduce_EM_and_score(k, x_train, y_train, x_test, y_test, f): print('scoring...') kmeans = GaussianMixture(n_components=k, random_state=0).fit(x_train) base_predictions = kmeans.predict(x_test) true_score = homogeneity_score(base_predictions, y_test) f.write('%.3f\t%.3f\n' % (true_score, 0.0))
def cluster_eval(labels_true, labels_infer): purity = metric.compute_purity(labels_infer, labels_true) nmi = normalized_mutual_info_score(labels_true, labels_infer) ari = adjusted_rand_score(labels_true, labels_infer) homogeneity = homogeneity_score(labels_true, labels_infer) ami = adjusted_mutual_info_score(labels_true, labels_infer) #print('NMI = {}, ARI = {}, Purity = {},AMI = {}, Homogeneity = {}'.format(nmi,ari,purity,ami,homogeneity)) return nmi, ari, homogeneity
def print_scores(labels, predicted, svd): print "Homogeneity: " + str(homogeneity_score(labels, predicted)) print "completeness: " + str(completeness_score(labels, predicted)) print "V-measure: " + str(v_measure_score(labels, predicted)) print "RAND score: " + str(adjusted_rand_score(labels, predicted)) print "Mutual Info: " + str(adjusted_mutual_info_score(labels, predicted)) ret = [] ret.append(homogeneity_score(labels, predicted)) ret.append(completeness_score(labels, predicted)) ret.append(v_measure_score(labels, predicted)) ret.append(adjusted_rand_score(labels, predicted)) ret.append(adjusted_mutual_info_score(labels, predicted)) if svd: svd_all.append(ret) else: nmf_all.append(ret) return homogeneity_score(labels, predicted)
def five_measure_scores(label_true, label_pred): print("Homogeneity_score = %f" % homogeneity_score(label_true, label_pred)) print("Completeness_score = %f" % completeness_score(label_true, label_pred)) print("Adjusted_rand_score = %f" % adjusted_rand_score(label_true, label_pred)) print("V_measure_score = %f" % v_measure_score(label_true, label_pred)) print("Adjusted_mutual_info_score = %f" % adjusted_mutual_info_score(label_true, label_pred))
def v_measure(cluster_labels, true_labels): h_score = homogeneity_score(true_labels, cluster_labels) c_score = completeness_score(true_labels, cluster_labels) v_score = v_measure_score(true_labels, cluster_labels) print("Homogeneity Score: %.6f" % h_score) print("Completeness Score: %.6f" % c_score) print("V Measure Score: %.6f" % v_score) return h_score, c_score, v_score
def calc_performance_score(self, algo_type: str, predicted, y_train): homo_score = homogeneity_score(y_train, predicted) complete_socre = completeness_score(y_train, predicted) adjusted_mute_info_score = adjusted_mutual_info_score( y_train, predicted) print(algo_type + ' homo_score ' + "{:.2f}".format(homo_score)) print(algo_type + ' complete_socre ' + "{:.2f}".format(complete_socre)) print(algo_type + ' adjusted_mute_info_score ' + "{:.2f}".format(adjusted_mute_info_score))
def process(data): emotions = [] acts = [] for item in data['metas']: if item is None: emotions.append(7) acts.append(4) else: emotions.append(int(item['emotion'])) acts.append(int(item['act']) - 1) las = [] for _, _, la in data['z']: las.extend(la) las = [transfer(elem) for elem in las] act_h = homogeneity_score(las, acts) emotion_h = homogeneity_score(las, emotions) print('The act homogeneity is:' + str(act_h)) print('The emotion homogeneity is:' + str(emotion_h))
def print_scores(labels, predicted): print "Contingency: " print str(confusion_matrix(labels, predicted)) ret = [] ret.append(homogeneity_score(labels, predicted)) ret.append(completeness_score(labels, predicted)) ret.append(v_measure_score(labels, predicted)) ret.append(adjusted_rand_score(labels, predicted)) ret.append(adjusted_mutual_info_score(labels, predicted)) print "Homogeneity: " + str(homogeneity_score(labels, predicted)) print "completeness: " + str(completeness_score(labels, predicted)) print "V-measure: " + str(v_measure_score(labels, predicted)) print "RAND score: " + str(adjusted_rand_score(labels, predicted)) print "Mutual Info: " + str(adjusted_mutual_info_score(labels, predicted)) return ret
def evaluate(X, Y, predicted_labels): df = pd.DataFrame(predicted_labels, columns=['label']) if len(df.groupby('label').groups) < 2: return [0, 0, 0, 0, 0, 0] try: sil = silhouette_score(X, predicted_labels, metric='euclidean') except: sil = 0 return [ sil, homogeneity_score(Y, predicted_labels), homogeneity_score(predicted_labels, Y), normalized_mutual_info_score(Y, predicted_labels), adjusted_mutual_info_score(Y, predicted_labels), adjusted_rand_score(Y, predicted_labels) ]
def clusteringAverageLink(d, testInputs, testOutputs, graphname, clusteres): print("\n\t[" + str(graphname) + "]") averagelink = AgglomerativeClustering(n_clusters=clusteres, linkage='average') resultados = averagelink.fit(d) print("\n\t\tClúster: " + str(resultados.labels_)) homogeneidad = homogeneity_score( testOutputs, averagelink.fit_predict(testInputs, testOutputs)) print("\n\t\tHomogeneidad: " + str(homogeneidad)) print("\n\n") return homogeneidad
def get_homogeneity_and_completeness(self, clusters, category): labels = getattr(self.scb.nodes, 'to_' + category)(range(len(self.scb.nodes))) keys = dict() for i, label in enumerate(labels): if label not in keys: keys[label] = len(keys) labels[i] = keys[label] hs = homogeneity_score(labels, clusters) cs = completeness_score(labels, clusters) return {'homogeneity': hs, 'completeness': cs}
def evaluate(colors, labels): b = homogeneity_score(colors, labels) c = completeness_score(colors, labels) print 'homogeneity is ' + str(b) print 'completeness is ' + str(c)
trt_4 = run_icc_meanDist(samples[sample], dataDir, fsDir, thr, hemi, ['1a', '1b', '2a', '2b']) trt_2 = run_icc_meanDist(samples[sample], dataDir, fsDir, thr, hemi, ['1ab', '2ab']) nan_mask = run_nan_grmask(samples[sample], dataDir, hemi) data_gmm = {} for n_comp in num_gmm_comps: data = meanDist * 1000 gmm = mixture.GMM(n_components=n_comp, n_iter=1000) gmm.fit(data[cort]) bic = gmm.bic(data[cort]) aic = gmm.aic(data[cort]) res = np.zeros(10242) res[cort] = gmm.predict(data[cort]) res[cort] = res[cort] +1 data_gmm[n_comp] = res homogeneity = homogeneity_score(res[cort],yeo7[0][cort]) df_gmm_eval.loc[len(df_gmm_eval)] = [str(thr), hemi, n_comp, bic, aic, homogeneity, gmm.converged_] for node in range(10242): df.loc[len(df)] = [sample, str(thr), hemi, node, aparc[2][aparc[0][node]], aparca2009s[2][aparca2009s[0][node]], yeo7_names[yeo7[0][node]], clus7[node], yeo17[2][yeo17[0][node]], meanDist[node], meanDist_norm[node], stdev[node], trt_4[0, node], trt_4[1, node], trt_4[2, node], trt_4[3, node], trt_4[4, node], trt_4[5, node], trt_2[0, node], trt_2[1, node], trt_2[2, node], trt_2[3, node], trt_2[4, node], trt_2[5, node], data_gmm[num_comps[0]][node], data_gmm[num_comps[1]][node], data_gmm[num_comps[2]][node], data_gmm[num_comps[3]][node], data_gmm[num_comps[4]][node], data_gmm[num_comps[5]][node],
def evaluate(colors, labels): b = homogeneity_score(colors, labels) c = completeness_score(colors, labels) logging.debug('homogeneity is %d' % b) logging.debug('completeness is %d' % c)
def check_clusters(self): print self.colors print 'number of clusters is ' + str(self.clusters) print 'silhouette score is ' + str(self.sil) print 'homogeneity is ' + str(homogeneity_score(self.colors, self.labels)) print 'completeness is ' + str(completeness_score(self.colors, self.labels))
from sklearn.metrics.cluster import homogeneity_score print(homogeneity_score([0, 0, 1, 1], [1, 1, 0, 0])) print(homogeneity_score([0, 0, 0, 1, 1, 1], [3, 2, 2, 2, 3, 3])) from sklearn.metrics.cluster import completeness_score print(completeness_score([0, 0, 1, 1], [1, 1, 0, 0])) print(completeness_score([0, 0, 0, 1, 1, 1], [3, 2, 2, 2, 3, 3]))