def Testcheck(embedding, embeddingLabels, name): Maxscore = -10000 NumberOfCluster = 0 emb_norm = normalize_vectors(embedding) c, num_clust, req_c = FINCH(emb_norm) print("num_clust: ", num_clust) for nc in num_clust: emb_norm = normalize_vectors(emb_norm) TempLabels = clustering(emb_norm, nc) score = silhouette_score(emb_norm, TempLabels) print('nc: ', nc, ', score: ', score) if score > Maxscore: Maxscore = score # tClusterLabels = TempLabels NumberOfCluster = nc clusters_pred = list(clustering(emb_norm, num_clusters=NumberOfCluster)) print("clusters_pred: ", clusters_pred) print("embeddingLabels: ", embeddingLabels) prec, rec, f1 = pairwise_precision_recall_f1(clusters_pred, embeddingLabels) print('pairwise precision', '{:.5f}'.format(prec), 'recall', '{:.5f}'.format(rec), 'f1', '{:.5f}'.format(f1)) tSNEAnanlyse(embedding, labels=embeddingLabels, trueLabels=embeddingLabels, savepath=join(settings.OUT_DIR, name))
def check(embedding, embeddingLabels, name): emb_norm = normalize_vectors(embedding) clusters_pred = list( clustering(emb_norm, num_clusters=len(list(set(embeddingLabels))))) print("clusters_pred: ", clusters_pred) print("embeddingLabels: ", embeddingLabels) prec, rec, f1 = pairwise_precision_recall_f1(clusters_pred, embeddingLabels) print('pairwise precision', '{:.5f}'.format(prec), 'recall', '{:.5f}'.format(rec), 'f1', '{:.5f}'.format(f1)) tSNEAnanlyse(embedding, labels=embeddingLabels, trueLabels=embeddingLabels, savepath=join(settings.OUT_DIR, name)) return f1
def tarin(self, batchX, batchy, testX, testy, AllX, Ally, epochs=3000): model = self.buildModel() loss, opt = self.buildOptimizer(model) with tf.Session() as sess: sess.run(tf.global_variables_initializer()) sess.run(tf.local_variables_initializer()) # Train model for epoch in range(epochs): # for batchid, batchX in enumerate(X): # batchy = y[batchid] # Construct feed dictionary feed_dict = {self.placeholder['input']: batchX, self.placeholder['labels']: batchy} # Run single weight update outs = sess.run([loss, opt], feed_dict=feed_dict) # Compute average loss lossScale = outs[0] # accScale = outs[2] print("Epoch:", '%04d' % (epoch + 1), "loss=", "{:.5f}".format(lossScale)) # print("Epoch:", '%04d' % (epoch + 1), "loss=", "{:.5f}".format(lossScale), ',acc = {:.5f}'.format(accScale)) # testacc = sess.run([acc], feed_dict={self.placeholder['input']: testX, self.placeholder['labels']: testy}) # print ("test acc: ", testacc) # check embedding ClusterCheckX = AllX ClusterCheckY = Ally embedding = sess.run(model, feed_dict={self.placeholder['input']: ClusterCheckX, self.placeholder['labels']: ClusterCheckY}) print (embedding.shape) emb_norm = normalize_vectors(embedding) clusters_pred = list(clustering(emb_norm, num_clusters=len(list(set(ClusterCheckY))))) embeddingLabels = self.codeLabel(ClusterCheckY) print ("clusters_pred: ", clusters_pred) print ("embeddingLabels: ", embeddingLabels) prec, rec, f1 = pairwise_precision_recall_f1(clusters_pred, embeddingLabels) print('pairwise precision', '{:.5f}'.format(prec), 'recall', '{:.5f}'.format(rec), 'f1', '{:.5f}'.format(f1))
def preprocess(name): adj, features, labels = load_local_data(exp_name, IDF_THRESHOLD, name=name) # Store original adjacency matrix (without diagonal entries) for later adj_orig = adj adj_orig = adj_orig - sp.dia_matrix( (adj_orig.diagonal()[np.newaxis, :], [0]), shape=adj_orig.shape) adj_orig.eliminate_zeros() adj_train = gen_train_edges(adj) adj = adj_train # Some preprocessing adj_norm = preprocess_graph(adj) edge_labels = [] n_samples = len(labels) for i in range(n_samples - 1): for j in range(i + 1, n_samples): if labels[i] == labels[j]: edge_labels.append([i, j]) edge_labels = np.array(edge_labels) adj_label = sp.coo_matrix((np.ones(edge_labels.shape[0]), (edge_labels[:, 0], edge_labels[:, 1])), shape=(features.shape[0], features.shape[0]), dtype=np.float32) pos_weight = float(adj_label.shape[0] * adj_label.shape[0] - adj_label.sum()) / adj_label.sum() norm = adj_label.shape[0] * adj_label.shape[0] / float( (adj_label.shape[0] * adj_label.shape[0] - adj_label.nnz) * 2) adj_label = adj_label + sp.eye(adj_label.shape[0]) adj_label = sparse_to_tuple(adj_label) if FLAGS.is_sparse: # TODO to test # features = sparse_to_tuple(features.tocoo()) # features_nonzero = features[1].shape[0] features = features.todense() # TODO else: features = normalize_vectors(features) # print("positive_label", len(edge_labels)) print('positive edge weight', pos_weight) # negative edges/pos edges print('norm', norm) # negative edges/pos edges return adj_norm, adj_label, features, pos_weight, norm, labels
def main(): names = load_train_names() Prec = 0 Recall = 0 F1 = 0 for name in names: adj, features, labels, Ids = load_local_data(name=name) emb_norm = normalize_vectors(features) clusters_pred = clustering(emb_norm, len(list(set(labels)))) prec, rec, f1 = pairwise_precision_recall_f1(clusters_pred, labels) print('pairwise precision', '{:.5f}'.format(prec), 'recall', '{:.5f}'.format(rec), 'f1', '{:.5f}'.format(f1)) Prec += prec Recall += rec F1 += f1 Prec = Prec / (1.0 * len(names)) Recall = Recall / (1.0 * len(names)) F1 = F1 / (1.0 * len(names)) print('All pairwise precision', '{:.5f}'.format(Prec), 'recall', '{:.5f}'.format(Recall), 'f1', '{:.5f}'.format(F1))
def gae_for_na(name, rawfeature): """ train and evaluate disambiguation results for a specific name :param name: author name :return: evaluation results """ adj, features, labels = load_local_data(name=name, rawfeature=rawfeature) # Store original adjacency matrix (without diagonal entries) for later adj_orig = adj adj_orig = adj_orig - sp.dia_matrix((adj_orig.diagonal()[np.newaxis, :], [0]), shape=adj_orig.shape) adj_orig.eliminate_zeros() adj_train = gen_train_edges(adj) adj = adj_train # Some preprocessing adj_norm = preprocess_graph(adj) num_nodes = adj.shape[0] input_feature_dim = features.shape[1] if FLAGS.is_sparse: # TODO to test # features = sparse_to_tuple(features.tocoo()) # features_nonzero = features[1].shape[0] features = features.todense() # TODO else: features = normalize_vectors(features) # Define placeholders placeholders = { # 'features': tf.sparse_placeholder(tf.float32), 'features': tf.placeholder(tf.float32, shape=(None, input_feature_dim)), 'adj': tf.sparse_placeholder(tf.float32), 'adj_orig': tf.sparse_placeholder(tf.float32), 'dropout': tf.placeholder_with_default(0., shape=()) } # Create model model = None if model_str == 'gcn_ae': model = GCNModelAE(placeholders, input_feature_dim) elif model_str == 'gcn_vae': model = GCNModelVAE(placeholders, input_feature_dim, num_nodes) pos_weight = float(adj.shape[0] * adj.shape[0] - adj.sum()) / adj.sum() # negative edges/pos edges print('positive edge weight', pos_weight) norm = adj.shape[0] * adj.shape[0] / float((adj.shape[0] * adj.shape[0] - adj.nnz) * 2) # Optimizer with tf.name_scope('optimizer'): if model_str == 'gcn_ae': opt = OptimizerAE(preds=model.reconstructions, labels=tf.reshape(tf.sparse_tensor_to_dense(placeholders['adj_orig'], validate_indices=False), [-1]), pos_weight=pos_weight, norm=norm) elif model_str == 'gcn_vae': opt = OptimizerVAE(preds=model.reconstructions, labels=tf.reshape(tf.sparse_tensor_to_dense(placeholders['adj_orig'], validate_indices=False), [-1]), model=model, num_nodes=num_nodes, pos_weight=pos_weight, norm=norm) # Initialize session sess = tf.Session() sess.run(tf.global_variables_initializer()) adj_label = adj_train + sp.eye(adj_train.shape[0]) adj_label = sparse_to_tuple(adj_label) def get_embs(): feed_dict.update({placeholders['dropout']: 0}) emb = sess.run(model.z_mean, feed_dict=feed_dict) # z_mean is better return emb # Train model for epoch in range(FLAGS.epochs): t = time.time() # Construct feed dictionary feed_dict = construct_feed_dict(adj_norm, adj_label, features, placeholders) feed_dict.update({placeholders['dropout']: FLAGS.dropout}) # Run single weight update outs = sess.run([opt.opt_op, opt.cost, opt.accuracy], feed_dict=feed_dict) # Compute average loss avg_cost = outs[1] avg_accuracy = outs[2] print("Epoch:", '%04d' % (epoch + 1), "train_loss=", "{:.5f}".format(avg_cost), "train_acc=", "{:.5f}".format(avg_accuracy), "time=", "{:.5f}".format(time.time() - t)) emb = get_embs() n_clusters = len(set(labels)) emb_norm = normalize_vectors(emb) clusters_pred = clustering(emb_norm, num_clusters=n_clusters) prec, rec, f1 = pairwise_precision_recall_f1(clusters_pred, labels) print('pairwise precision', '{:.5f}'.format(prec), 'recall', '{:.5f}'.format(rec), 'f1', '{:.5f}'.format(f1)) clusters_pred2 = clustering(features, num_clusters=n_clusters) prec2, rec2, f12 = pairwise_precision_recall_f1(clusters_pred2, labels) print('pairwise precision', '{:.5f}'.format(prec2), 'recall', '{:.5f}'.format(rec2), 'f1', '{:.5f}'.format(f12)) from sklearn.manifold import TSNE features_new = TSNE(learning_rate=100).fit_transform(features) emb_new = TSNE(learning_rate=100).fit_transform(emb_norm) labels = np.array(labels) + 2 clusters_pred = np.array(clusters_pred) + 2 clusters_pred2 = np.array(clusters_pred2) + 2 if rawfeature == RAW_INTER_NAME: tSNEAnanlyse(emb_norm, labels, join(settings.PIC_DIR, "FINALResult", "rawReature_%s_gae_final_raw.png" % (name))) tSNEAnanlyse(features, labels, join(settings.PIC_DIR, "FINALResult", "rawReature_%s_gae_features_raw.png" % (name))) elif rawfeature == ATTENTIONFEATURE: tSNEAnanlyse(emb_new, labels, join(settings.PIC_DIR, "FINALResult", "rawReature_%s_gae_final.png" % (name))) tSNEAnanlyse(features_new, labels, join(settings.PIC_DIR, "FINALResult", "rawReature_%s_gae_features.png" % (name))) tSNEAnanlyse(emb_new, clusters_pred, join(settings.PIC_DIR, "FINALResult", "rawReature_%s_gae_final_clusterresult.png" % (name))) tSNEAnanlyse(features_new, clusters_pred2, join(settings.PIC_DIR, "FINALResult", "rawReature_%s_gae_features_clusterresult.png" % (name))) else: tSNEAnanlyse(emb_norm, labels, join(settings.PIC_DIR, "FINALResult", "rawReature_%s_gae_final_triplet.png" % (name))) tSNEAnanlyse(features, labels, join(settings.PIC_DIR, "FINALResult", "rawReature_%s_gae_features_triplet.png" % (name))) return [prec, rec, f1], num_nodes, n_clusters
def gae_for_na(name, n_clusters): # 对一个具体的姓名预测其消歧结果 评估值[pre, rec, f1], 文档数, 聚类数 """ train and evaluate disambiguation results for a specific name :param name: author name :return: evaluation results """ adj, features, pids = load_local_data(name=name) # 邻接矩阵(i,j)=1, 文档特征集(y, aid), 标记集 aid索引编号i; features与labels关于下标 一一对应 # Store original adjacency matrix (without diagonal entries) for later adj_orig = adj adj_orig = adj_orig - sp.dia_matrix((adj_orig.diagonal()[np.newaxis, :], [0]), shape=adj_orig.shape) adj_orig.eliminate_zeros() # 在原邻接矩阵的基础上 去对角线元素 删除0 adj_train = gen_train_edges(adj) # 这里搞了半天 感觉就是 把adj的对角元素删了 用的csr_matrix 类型 adj = adj_train # 完整的邻接矩阵 # Some preprocessing adj_norm = preprocess_graph(adj) # 标准化 矩阵 A^' 返回的是元组tuple 坐标(x,y), 值, 形状 num_nodes = adj.shape[0] # 节点数 input_feature_dim = features.shape[1] # 输入 特征 维数 [0]是个数 if FLAGS.is_sparse: # TODO to test # features = sparse_to_tuple(features.tocoo()) # features_nonzero = features[1].shape[0] features = features.todense() # TODO else: # 条件 进入的是 这边 features = normalize_vectors(features)# 特征向量 标准化 # Define placeholders # tf.placeholder 此函数可以理解为形参,用于定义过程,在执行的时候再赋具体的值 ?_? placeholders = { # 'features': tf.sparse_placeholder(tf.float32), 'features': tf.placeholder(tf.float32, shape=(None, input_feature_dim)), 'adj': tf.sparse_placeholder(tf.float32),# 为稀疏张量插入占位符,该稀疏张量将始终被提供 'adj_orig': tf.sparse_placeholder(tf.float32), 'dropout': tf.placeholder_with_default(0., shape=())# 该函数将返回一个张量。与 input 具有相同的类型。一个占位符张量,默认为 input 的占位符张量 (如果未送入)。 } # Create model model = None if model_str == 'gcn_ae': model = GCNModelAE(placeholders, input_feature_dim) elif model_str == 'gcn_vae':# 使用的模型 是 gcn_vae model = GCNModelVAE(placeholders, input_feature_dim, num_nodes) pos_weight = float(adj.shape[0] * adj.shape[0] - adj.sum()) / adj.sum() # negative edges/pos edges print('positive edge weight', pos_weight)# 负边/正边 norm = adj.shape[0] * adj.shape[0] / float((adj.shape[0] * adj.shape[0] - adj.nnz) * 2) # 矩阵中非零元素的数量nnz # Optimizer with tf.name_scope('optimizer'): if model_str == 'gcn_ae': opt = OptimizerAE(preds=model.reconstructions, labels=tf.reshape(tf.sparse_tensor_to_dense(placeholders['adj_orig'], validate_indices=False), [-1]), pos_weight=pos_weight, norm=norm) elif model_str == 'gcn_vae':# 使用的模型 是 gcn_vae opt = OptimizerVAE(preds=model.reconstructions, labels=tf.reshape(tf.sparse_tensor_to_dense(placeholders['adj_orig'], validate_indices=False), [-1]), model=model, num_nodes=num_nodes, pos_weight=pos_weight, norm=norm) # Initialize session sess = tf.Session() sess.run(tf.global_variables_initializer()) adj_label = adj_train + sp.eye(adj_train.shape[0])# sp.eye 单位矩阵 标记, 解码应该得到原矩阵 adj_label = sparse_to_tuple(adj_label)# 稀疏矩阵 -> 元组 (坐标, 值, 维度形状) def get_embs():# 获得内部 嵌入z feed_dict.update({placeholders['dropout']: 0}) emb = sess.run(model.z_mean, feed_dict=feed_dict) # z_mean is better return emb # Train model for epoch in range(FLAGS.epochs):# 训练批次 epoch t = time.time() # Construct feed dictionary feed_dict = construct_feed_dict(adj_norm, adj_label, features, placeholders) feed_dict.update({placeholders['dropout']: FLAGS.dropout}) # Run single weight update outs = sess.run([opt.opt_op, opt.cost, opt.accuracy], feed_dict=feed_dict) # Compute average loss avg_cost = outs[1] avg_accuracy = outs[2] print("Epoch:", '%04d' % (epoch + 1), "train_loss=", "{:.5f}".format(avg_cost), "train_acc=", "{:.5f}".format(avg_accuracy), "time=", "{:.5f}".format(time.time() - t)) emb = get_embs() # 经过 编码器后 的 嵌入层 ''' n_clusters = int(name_to_ncluster.get(name, 0)) n_clusters = len(set(labels))# 直接获得 真实的 聚类大小 if n_clusters == 1: return None, None, None, None ''' #n_clusters = len(set(labels))# 直接获得 真实的 聚类大小 emb_norm = normalize_vectors(emb)# 标准化 嵌入层 clusters_pred = clustering(emb_norm, num_clusters=max(n_clusters,1)) # 聚类, 嵌入集 与 聚类大小 print('clusters_pred: ', clusters_pred) ret = {} for i, pred_label in enumerate(clusters_pred): pred_label = str(pred_label) if pred_label not in ret: ret[pred_label] = [] ret[pred_label].append(pids[i]) rett = [] for pred_label in ret: tmp = [] for pid in ret[pred_label]: tmp.append(pid) rett.append(tmp) return rett ''' ret = {}
def main(): """ train and evaluate YUTAO results for a specific name :param name: author name :return: evaluation results """ # Store original adjacency matrix (without diagonal entries) for later # Define placeholders placeholders = { # 'features': tf.sparse_placeholder(tf.float32), 'features': tf.placeholder(tf.float32, shape=(None, input_feature_dim)), 'adj': tf.sparse_placeholder(tf.float32), 'adj_orig': tf.sparse_placeholder(tf.float32), 'dropout': tf.placeholder_with_default(0., shape=()), 'pos_weight': tf.placeholder(tf.float32, shape=()), 'norm': tf.placeholder(tf.float32), } # Create model model = None if model_str == 'gcn_ae': model = GCNModelInductiveAE(placeholders, input_feature_dim) # Optimizer with tf.name_scope('optimizer'): if model_str == 'gcn_ae': opt = OptimizerInductiveAE(preds=model.reconstructions, labels=tf.reshape( tf.sparse_tensor_to_dense( placeholders['adj_orig'], validate_indices=False), [-1]), pos_weight=model.pos_weight, norm=model.norm) saver = tf.train.Saver() # Initialize session sess = tf.Session() sess.run(tf.global_variables_initializer()) def infer(): feed_dict.update({placeholders['dropout']: 0}) acc, emb = sess.run([opt.accuracy, model.z_mean], feed_dict=feed_dict) # z_mean is better return acc, emb train_name_list, _ = settings.get_split_name_list(train_dataset_name) _, test_name_list = settings.get_split_name_list(test_dataset_name) # Train model for epoch in range(FLAGS.epochs): epoch_avg_cost = 0 epoch_avg_accuracy = 0 for name in train_name_list: adj_norm, adj_label, features, pos_weight, norm, labels = load_local_preprocess_result( exp_name, IDF_THRESHOLD, name) # print('positive edge weight', pos_weight) # negative edges/pos edges t = time.time() # Construct feed dictionary feed_dict = construct_feed_dict_inductive(adj_norm, adj_label, features, pos_weight, norm, placeholders) feed_dict.update({placeholders['dropout']: FLAGS.dropout}) # Run single weight update outs = sess.run([opt.opt_op, opt.cost, opt.accuracy], feed_dict=feed_dict) # Compute average loss avg_cost = outs[1] avg_accuracy = outs[2] epoch_avg_cost += avg_cost epoch_avg_accuracy += avg_accuracy # print(avg_cost, avg_accuracy) print("Epoch:", '%04d' % (epoch + 1), "train_loss=", "{:.5f}".format(epoch_avg_cost / len(train_name_list)), "train_acc=", "{:.5f}".format(epoch_avg_accuracy / len(train_name_list)), "time=", "{:.5f}".format(time.time() - t)) metrics = np.zeros(3) tp_fp_fn_sum = np.zeros(3) avg_acc = 0 for name in test_name_list: adj_norm, adj_label, features, pos_weight, norm, labels = load_local_preprocess_result( exp_name, IDF_THRESHOLD, name) feed_dict = construct_feed_dict_inductive(adj_norm, adj_label, features, pos_weight, norm, placeholders) acc, emb = infer() n_clusters = len(set(labels)) emb_norm = normalize_vectors(emb) clusters_pred = clustering(emb_norm, num_clusters=n_clusters) tp, fp, fn, prec, rec, f1 = pairwise_precision_recall_f1( clusters_pred, labels) tp_fp_fn_sum += np.array([tp, fp, fn]) metrics += np.array([prec, rec, f1]) avg_acc += acc macro_prec = metrics[0] / len(test_name_list) macro_rec = metrics[1] / len(test_name_list) avg_acc /= len(test_name_list) macro_f1 = cal_f1(macro_prec, macro_rec) tp, fp, fn = tp_fp_fn_sum micro_precision = tp / (tp + fp) micro_recall = tp / (tp + fn) micro_f1 = 2 * micro_precision * micro_recall / (micro_precision + micro_recall) print( 'average,acc:{0:.5f},macro_prec:{1:.5f},macro_rec:{2:.5f},macro_f1:{3:.5f},micro_precision:{4:.5f},micro_recall:{5:5f},micro_f1:{6:5f}\n' .format(avg_acc, macro_prec, macro_rec, macro_f1, micro_precision, micro_recall, micro_f1)) path = join(settings.get_data_dir(exp_name), 'local', 'model-{}'.format(IDF_THRESHOLD), model_name) saver.save(sess, path)
def gae_for_na(name, mode=0): """ train and evaluate disambiguation results for a specific name :param name: author name :return: evaluation results :mode: 0-train 1-val """ pids, adj, features, labels = load_local_data(name=name) # Store original adjacency matrix (without diagonal entries) for later adj_orig = adj adj_orig = adj_orig - sp.dia_matrix( (adj_orig.diagonal()[np.newaxis, :], [0]), shape=adj_orig.shape) adj_orig.eliminate_zeros() adj_train = gen_train_edges(adj) adj = adj_train # Some preprocessing adj_norm = preprocess_graph(adj) num_nodes = adj.shape[0] input_feature_dim = features.shape[1] if FLAGS.is_sparse: # TODO to test # features = sparse_to_tuple(features.tocoo()) # features_nonzero = features[1].shape[0] features = features.todense() # TODO else: features = normalize_vectors(features) # Define placeholders placeholders = { # 'features': tf.sparse_placeholder(tf.float32), 'features': tf.placeholder(tf.float32, shape=(None, input_feature_dim)), 'adj': tf.sparse_placeholder(tf.float32), 'adj_orig': tf.sparse_placeholder(tf.float32), 'dropout': tf.placeholder_with_default(0., shape=()) } # Create model model = None if model_str == 'gcn_ae': model = GCNModelAE(placeholders, input_feature_dim) elif model_str == 'gcn_vae': model = GCNModelVAE(placeholders, input_feature_dim, num_nodes) pos_weight = float(adj.shape[0] * adj.shape[0] - adj.sum()) / adj.sum() # negative edges/pos edges print('positive edge weight', pos_weight) norm = adj.shape[0] * adj.shape[0] / float( (adj.shape[0] * adj.shape[0] - adj.nnz) * 2) # Optimizer with tf.name_scope('optimizer'): if model_str == 'gcn_ae': opt = OptimizerAE(preds=model.reconstructions, labels=tf.reshape( tf.sparse_tensor_to_dense( placeholders['adj_orig'], validate_indices=False), [-1]), pos_weight=pos_weight, norm=norm) elif model_str == 'gcn_vae': opt = OptimizerVAE(preds=model.reconstructions, labels=tf.reshape( tf.sparse_tensor_to_dense( placeholders['adj_orig'], validate_indices=False), [-1]), model=model, num_nodes=num_nodes, pos_weight=pos_weight, norm=norm) # Initialize session sess = tf.Session() sess.run(tf.global_variables_initializer()) adj_label = adj_train + sp.eye(adj_train.shape[0]) adj_label = sparse_to_tuple(adj_label) def get_embs(): feed_dict.update({placeholders['dropout']: 0}) emb = sess.run(model.z_mean, feed_dict=feed_dict) # z_mean is better return emb # Train model for epoch in range(FLAGS.epochs): t = time.time() # Construct feed dictionary feed_dict = construct_feed_dict(adj_norm, adj_label, features, placeholders) feed_dict.update({placeholders['dropout']: FLAGS.dropout}) # Run single weight update outs = sess.run([opt.opt_op, opt.cost, opt.accuracy], feed_dict=feed_dict) # Compute average loss avg_cost = outs[1] avg_accuracy = outs[2] print("Epoch:", '%04d' % (epoch + 1), "train_loss=", "{:.5f}".format(avg_cost), "train_acc=", "{:.5f}".format(avg_accuracy), "time=", "{:.5f}".format(time.time() - t)) emb = get_embs() for idx, pid in enumerate(pids): local_output[pid] = emb[idx] # Train mode calcul F1 if not (mode == 2): n_clusters = len(set(labels)) emb_norm = normalize_vectors(emb) model = AgglomerativeClustering(n_clusters=n_clusters) model.fit(emb_norm) prec, rec, f1 = pairwise_precision_recall_f1(model.labels_, labels) print('pairwise precision', '{:.5f}'.format(prec), 'recall', '{:.5f}'.format(rec), 'f1', '{:.5f}'.format(f1)) return [prec, rec, f1], num_nodes, n_clusters