def test_real_example_on_overlapping_community(): ground_truth_community = json.load( open("ground_truth_community_Philosophy.json", "r") ) detected_community = json.load(open("detected_community_Philosophy.json", "r")) c1 = Clustering(elm2clu_dict=ground_truth_community) c2 = Clustering(elm2clu_dict=detected_community) start = time.time() sim_ppr_pack = sim.element_sim( c1, c2, alpha=0.9, r=1.0, r2=None, rescale_path_type="max", ppr_implementation="prpack", ) end = time.time() print("prpack elapsed time: {}s".format(end - start)) start = time.time() sim_ppr_power_iteration = sim.element_sim( c1, c2, alpha=0.9, r=1.0, r2=None, rescale_path_type="max", ppr_implementation="power_iteration", ) end = time.time() print("power iteration elapsed time: {}s".format(end - start)) assert_approx_equal(sim_ppr_pack, sim_ppr_power_iteration, significant=3)
def test_simple_example(): c1_elm2clu_dict = {0: [0, 1], 1: [1, 2], 2: [1, 3], 3: [0], 4: [2], 5: [1]} c2_elm2clu_dict = {0: [0], 1: [1], 2: [1], 3: [0, 3], 4: [2, 4], 5: [2]} c1 = Clustering(elm2clu_dict=c1_elm2clu_dict) c2 = Clustering(elm2clu_dict=c2_elm2clu_dict) sim_ppr_pack = sim.element_sim( c1, c2, alpha=0.9, r=1.0, r2=None, rescale_path_type="max", ppr_implementation="prpack", ) sim_ppr_power_iteration = sim.element_sim( c1, c2, alpha=0.9, r=1.0, r2=None, rescale_path_type="max", ppr_implementation="power_iteration", ) assert_approx_equal(sim_ppr_pack, sim_ppr_power_iteration, significant=3)
def paint_similarity_trace(b, oks, output=None, figsize=(3, 3), dpi=200, **kwargs): clu_base = Clustering() fig, ax = plt.subplots(figsize=figsize, dpi=300) e_sim_list = [] clu_base.from_membership_list(b) for g in oks.trace_mb.values(): clu = Clustering() clu.from_membership_list(g[1]) e_sim_list += [sim.element_sim(clu_base, clu)] ax.autoscale() ax.margins(0.1) # ax.set_aspect(1) plt.xlabel("steps") plt.ylabel("Element-centric similarity") plt.yticks(np.linspace(0, 1, 5)) ax.tick_params(direction="in") plt.plot(e_sim_list) if output is not None: plt.savefig(output, dpi=dpi, transparent=True)
def mantel_elsim_r_average_and_errors(some_participants): global column_category_label, all_data, cards, total_participants, perms_of_each_n, perms_of_mantel_test dis2 = dissimilarity_matrix(total_participants) c2 = clustering_with_clusim(dis2) mantel_SUM = 0 elsim_SUM = 0 # r is between -1 and 1 mantel_minimum = 10 mantel_maximum = -10 elsim_minimum = 10 elsim_maximum = -10 for i in range(perms_of_each_n): dis1 = dissimilarity_matrix(some_participants) # Mantel Method mantel = Mantel.test(dis1, dis2, perms_of_mantel_test, method='pearson', tail='two-tail') mantel_r = mantel[0] #find errors (minimum and maximum) if mantel_r < mantel_minimum: mantel_minimum = mantel_r elif mantel_r > mantel_maximum: mantel_maximum = mantel_r mantel_SUM = mantel_SUM + mantel_r c1 = clustering_with_clusim(dis1) # Element-centric Similarity elsim_r = sim.element_sim(c1, c2, r=1.0, alpha=0.9) #find errors (minimum and maximum) if elsim_r < elsim_minimum: elsim_minimum = elsim_r elif elsim_r > elsim_maximum: elsim_maximum = elsim_r elsim_SUM = elsim_SUM + elsim_r mantel_average = mantel_SUM / perms_of_each_n #average of mantel_r mantel_l_error = mantel_average - mantel_minimum #mantel_lower_error mantel_u_error = mantel_maximum - mantel_average #mantel_upper_error elsim_average = elsim_SUM / perms_of_each_n #average of elsim_r elsim_l_error = elsim_average - elsim_minimum #mantel_lower_error elsim_u_error = elsim_maximum - elsim_average #mantel_upper_error return mantel_average, mantel_l_error, mantel_u_error, elsim_average, elsim_l_error, elsim_u_error
def mantel_elsim_r_average_and_errors(some_participants): global column_category_label, all_data, cards, total_participants, count_of_samples_for_each_n, perms_of_mantel_test dis2 = dissimilarity_matrix(total_participants) c2 = clustering_with_clusim(dis2) mantel_r_table = [] elsim_r_table = [] for i in range(count_of_samples_for_each_n): dis1 = dissimilarity_matrix(some_participants) # Mantel Method mantel = Mantel.test(dis1, dis2, perms_of_mantel_test, method='pearson', tail='two-tail') mantel_r = mantel[0] mantel_r_table.append(mantel_r) c1 = clustering_with_clusim(dis1) # Element-centric Similarity elsim_r = sim.element_sim(c1, c2, r=1.0, alpha=0.9) elsim_r_table.append(elsim_r) mantel_average = statistics.mean(mantel_r_table) # average of mantel_r mantel_l_error = mantel_average - min(mantel_r_table) # mantel_lower_error mantel_u_error = max(mantel_r_table) - mantel_average # mantel_upper_error mantel_sd = statistics.stdev(mantel_r_table) elsim_average = statistics.mean(elsim_r_table) # average of elsim_r elsim_l_error = elsim_average - min(elsim_r_table) # mantel_lower_error elsim_u_error = max(elsim_r_table) - elsim_average # mantel_upper_error elsim_sd = statistics.stdev(elsim_r_table) return mantel_average, mantel_l_error, mantel_u_error, mantel_sd, elsim_average, elsim_l_error, elsim_u_error, elsim_sd
def paint_mds(oks, figsize=(20, 20)): l2 = len(oks.trace_mb.keys()) l = int(l2**0.5) X = np.zeros([l2, l2]) for idx_1, pair_1 in enumerate(combinations(range(1, l + 1), 2)): b = oks.trace_mb[pair_1] clu_1 = Clustering() clu_1.from_membership_list(b) for idx_2, pair_2 in enumerate(combinations(range(1, l + 1), 2)): b = oks.trace_mb[pair_2] clu_2 = Clustering() clu_2.from_membership_list(b) X[idx_1][idx_2] = 1 - sim.element_sim(clu_1, clu_2) X[idx_2][idx_1] = X[idx_1][idx_2] def _plot_embedding(X, title=None): x_min, x_max = np.min(X, 0), np.max(X, 0) X = (X - x_min) / (x_max - x_min) plt.figure(figsize=figsize) for ind, i in enumerate(range(X.shape[0])): plt.text(X[i, 0], X[i, 1], str(list(oks.trace_mb.keys())[ind]), color=plt.cm.Set1(1 / 10.), fontdict={ 'weight': 'bold', 'size': 12 }) plt.xticks([]), plt.yticks([]) if title is not None: plt.title(title) clf = manifold.MDS(n_components=2, n_init=10, max_iter=10000, dissimilarity="precomputed") X_mds = clf.fit_transform(X) _plot_embedding(X_mds)
def compare_scores(nexperiment, true_clusters, true_labels, predicted_clusters, predicted_labels): mp_score = score.calculate_mp_score(true_clusters, predicted_clusters) nmi = normalized_mutual_info_score(true_labels, predicted_labels, average_method='arithmetic') anmi = adjusted_mutual_info_score(true_labels, predicted_labels) completeness = completeness_score(true_labels, predicted_labels) v_measure = v_measure_score(true_labels, predicted_labels) rand = adjusted_rand_score(true_labels, predicted_labels) fms = fowlkes_mallows_score(true_labels, predicted_labels) T = Clustering() C = Clustering() T.from_cluster_list(true_clusters) C.from_cluster_list(predicted_clusters) jaccard_index = sim.jaccard_index(T, C) nmi2 = sim.nmi(T, C) fmeasure = sim.fmeasure(T, C) element_sim = sim.element_sim(T, C) ri = sim.rand_index(T, C) print("------------------") print("Example ", nexperiment) print("Weigthed Similarity: ", round(mp_score, 3)) print("NMI: ", round(nmi, 3)) print("AMI: ", round(anmi, 3)) print("NMI2: ", round(nmi2, 3)) print("RI: ", round(ri, 3)) print("Completeness: ", round(completeness, 3)) print("V-Measure: ", round(v_measure, 3)) print("Adjusted Rand: ", round(rand, 3)) print("Fowlkes Mallows: ", round(fms, 3)) print("Jaccard Index: ", round(jaccard_index, 3)) print("F-Measure: ", round(fmeasure, 3)) print("Element-centric: ", round(element_sim, 3)) print()