def build_tadw(g, path, configs): # TADW OpenNE print("TADW processing...") model_tadw = TADW(g, dim=embedding_size, lamb=lamb) model_tadw.save_embeddings("{}/TADW.nv".format(path)) print("TADW finished\n") embedding = load_embedding("{}/TADW.nv".format(path)) return embedding
def build_sdne(g, path, configs): # SDNE OpenNE print("SDNE processing...") model_sdne = SDNE(g, encoder_layer_list=encoder_list, epoch=epochs) model_sdne.save_embeddings("{}/SDNE.nv".format(path)) print("SDNE finished\n") embedding = load_embedding("{}/SDNE.nv".format(path)) return embedding
def build_line(g, path, configs): # LINE OpenNE print("LINE processing...") model_line = LINE(g, epoch=epochs, rep_size=embedding_size) model_line.save_embeddings("{}/LINE.nv".format(path)) print("LINE finished\n") embedding = load_embedding("{}/LINE.nv".format(path)) return embedding
def build_hope(g, path, configs): # HOPE OpenNE print("HOPE processing...") model_hope = HOPE(graph=g, d=embedding_size) model_hope.save_embeddings("{}/HOPE.nv".format(path)) print("HOPE finished\n") embedding = load_embedding("{}/HOPE.nv".format(path)) return embedding
def build_grarep(g, path, configs): # GraRep OpenNE print("GraRep processing...") model_grarep = GraRep(graph=g, Kstep=kstep, dim=embedding_size) model_grarep.save_embeddings("{}/GraRep.nv".format(path)) print("GraRep finished\n") embedding = load_embedding("{}/GraRep.nv".format(path)) return embedding
def build_gf(g, path, configs): # GF OpenNE print("GF processing...") model_gf = GraphFactorization(graph=g, rep_size=embedding_size) model_gf.save_embeddings("{}/GF.nv".format(path)) print("GF finished\n") embedding = load_embedding("{}/GF.nv".format(path)) return embedding
def build_lle(g, path, configs): # LLE OpenNE print("LLE processing...") model_lle = LLE(graph=g, d=embedding_size) model_lle.save_embeddings("{}/LLE.nv".format(path)) print("LLE finished\n") embedding = load_embedding("{}/LLE.nv".format(path)) return embedding
def build_le(g, path, configs): # Laplacian Eigenmaps OpenNE print("Lapacian Eigenmaps processing...") model_lap = LaplacianEigenmaps(g, rep_size=embedding_size) model_lap.save_embeddings("{}/Lap.nv".format(path)) print("Laplacian Eigenmaps finished\n") embedding = load_embedding("{}/Lap.nv".format(path)) return embedding
def build_n2v(g, path, configs): # node2vec OpenNE print("Node2vec processing...") model_n2v = Node2vec(graph=g, path_length=walk_length, num_paths=number_walks, dim=embedding_size, workers=workers, p=p, q=q, window=window_size) model_n2v.save_embeddings("{}/Node2vec.nv".format(path)) print("Node2vec finished\n") embedding = load_embedding("{}/Node2vec.nv".format(path)) return embedding
def build_dw(g, path, configs): # DeepWalk OpenNE print("DeepWalk processing...") model_deepwalk = Node2vec(graph=g, path_length=walk_length, num_paths=number_walks, dim=embedding_size, window=window_size, workers=workers, dw=True) model_deepwalk.save_embeddings("{}/DeepWalk.nv".format(path)) print("DeepWalk finished\n") embedding = load_embedding("{}/DeepWalk.nv".format(path)) return embedding
def build_gcae(g, path, configs): gcae = GCAE(g, path, configs) t_total = time.time() print("GCAE Start Training") for epoch in range(epochs): gcae.train(epoch) print("GCAE Optimization Finished!") print("Total time elapsed: {:.4f}s\n".format(time.time() - t_total)) embedding = load_embedding("{}/GCAE.nv".format(path)) return embedding
def main(configs): timespells = configs.timespells Path(configs.RESULT_PATH).mkdir(parents=True, exist_ok=True) # calculate for each model for model in configs.models: # load embeddings across time spells embs = [] dist_ts = [] cos_ts = [] for ts in range(1, int(timespells) + 1): embs.append( load_embedding(f"{configs.EMBEDDING_PATH}/TS{ts}/{model}.nv")) for i in range(1, int(timespells)): e_prev = embs[i - 1] e_new = embs[i] e_new_rotated = get_rotated_embedding( e_prev, e_new, [j for j in range(e_prev.shape[0])]) dist = get_embedding_distance(e_prev, e_new_rotated) cos = get_embedding_cosine(e_prev, e_new_rotated) dist_ts.append(dist) cos_ts.append(cos) # export to csv f = open(f"{configs.RESULT_PATH}/{model}.csv", "w") header = "NodeID, " + ", ".join(["Shift" + str(i) for i in range(1, timespells)]) + \ ", " + ", ".join(["Sim" + str(i) for i in range(1, timespells)]) + "\n" f.write(header) n_nodes = max([len(l) for l in dist_ts]) for i in range(n_nodes): dists = [] coss = [] for j in range(len(dist_ts)): if i < len(dist_ts[j]): dists.append(str(dist_ts[j][i])) coss.append(str(cos_ts[j][i])) else: dists.append(" ") coss.append(" ") row = "{}, ".format( str(i)) + ", ".join(dists) + ", " + ", ".join(coss) + "\n" f.write(row) f.close()
def build_gate(g, embedding_path, configs): print("GATE processing...") G, X = gate_utils.load_data(configs) feature_dim = X.shape[1] gate_args['hidden_dims'] = [feature_dim] + gate_args['hidden_dims'] G_tf, S, R = gate_utils.prepare_graph_data(G, configs) trainer = Trainer(gate_args) trainer(G_tf, X, S, R) embeddings, attentions = trainer.infer(G_tf, X, S, R) f = open("{}/GATE.nv".format(embedding_path), "w") f.write(" ".join([str(x) for x in embeddings.shape])) f.write("\n") for i in range(embeddings.shape[0]): d = " ".join([str(x) for x in embeddings[i]]) f.write("{} {}\n".format(str(i), d)) f.close() print("GATE finished\n") embedding = load_embedding("{}/GATE.nv".format(embedding_path)) return embedding
def build_vgae(g, path, configs): vgae = VGAE(g, path, configs) vgae.train(epochs) embedding = load_embedding("{}/VGAE.nv".format(path)) return embedding
def main(configs, LOAD_TRAINED_EMBEDDING, n_cluster): process_node_index(configs.edgelist_filename, configs.node_index_filename, configs.embedding_mapping) temp = open(configs.node_index_filename, 'rb') node_index = pickle.load(temp) temp.close() # load dataset print("====================\nLoading edgelist") t1 = time.time() # load graph from edgelist and feature file graph = Graph_Int() graph.read_edgelist(filename=configs.edgelist_filename, node_index=node_index, weighted=configs.weighted_graph, directed=False) graph_str = Graph_Str() graph_str.read_edgelist(filename=configs.edgelist_filename, node_index=node_index, weighted=configs.weighted_graph, directed=False) if configs.have_features: graph.read_node_features(node_index=node_index, filename=configs.current_feature_file) print("Data Loaded. Time elapsed: {:.3f}\n====================\n".format( time.time() - t1)) graph_embeddings = {} if LOAD_TRAINED_EMBEDDING: # load graph embeddings print("====================\nLoading Graph Embeddings\n") for model in configs.models: embedding_file = (f"{configs.current_embedding_path}/{model}.nv") graph_embeddings[model] = load_embedding(embedding_file) print("Embeddings Loaded.\n====================") else: # build graph embedding print("====================\nBuilding Graph Embeddings\n") t2 = time.time() for model in configs.models: graph_embeddings[model] = build_embedding( graph, graph_str, model, configs.current_embedding_path, configs) print( "Embeddings Constructed. Total time elapsed: {:.3f}\n====================" .format(time.time() - t2)) # GEM graph reconstruction evaluation print("====================\nEvaluating Graph Embeddings") t3 = time.time() reconstruction_performance = {} for model in configs.models: reconstruction_performance[model] = evaluate_embedding( graph.G, graph_embeddings[model]) print( "Embeddings Evaluated. Total time elapsed: {:.3f}\n====================" .format(time.time() - t3)) # clustering evaluation print("====================\nEvaluating Node Clusters") t4 = time.time() kmeans_performance = {} dbscan_performance = {} # KMeans if KMEANS_EVAL: kmeans_prediction = {} tsne_kmeans = {} for model in configs.models: print("[KMeans] Clustering {} Embedding".format(model)) temp_t = time.time() kmeans = KMeans(n_clusters=n_cluster).fit(graph_embeddings[model]) kmeans_prediction[model] = kmeans.labels_ kmeans_performance[model] = evaluate_clustering_performance( graph_embeddings[model], kmeans_prediction[model]) print( "[KMeans] Clustering Finished for {} Embedding. Time elapsed: {:.3f}" .format(model, time.time() - temp_t)) # DBSCAN if DBSCAN_EVAL: dbscan_predcition = {} tsne_dbscan = {} for model in configs.models: print("[DBSCAN] Clustering {} Embedding".format(model)) temp_t = time.time() dbscan = DBSCAN(eps=eps).fit(graph_embeddings[model]) dbscan_predcition[model] = dbscan.labels_ dbscan_performance[model] = evaluate_clustering_performance( graph_embeddings[model], dbscan_predcition[model]) print( "[DBSCAN] Clustering Finished for {} Embedding. Time elapsed: {:.3f}" .format(model, time.time() - temp_t)) tsne_result = {} tsne_time = {} for model in configs.models: tsne = TSNE(n_components=2, init='pca', random_state=0) temp_t = time.time() tsne_result[model] = tsne.fit_transform(graph_embeddings[model]) t_model = time.time() - temp_t print("t-SNE for {} embedding finished ({}s)".format(model, t_model)) tsne_time[model] = t_model print( "Clustering Results Evaluated. Total time elapsed: {:.3f}\n====================" .format(time.time() - t4)) # Generate Report f = open( "{}results-{}.tsv".format(configs.current_report_path, str(n_cluster)), "w") for model in configs.models: f.write("{}\t".format(model)) MAP, prec_curv = reconstruction_performance[model] f.write("{:.3f}\t".format(MAP)) if KMEANS_EVAL: k_s, k_c, k_d = kmeans_performance[model] f.write("{:.3f}\t{:.3f}\t{:.3f}\t".format(k_s, k_c, k_d)) if DBSCAN_EVAL: d_s, d_c, d_d = dbscan_performance[model] f.write("{:.3f}\t{:.3f}\t{:.3f}\t".format(d_s, d_c, d_d)) f.write("{}\n".format("\t".join( ["{:.3f}".format(x) for x in prec_curv[:10]]))) f.close() # dump data to cache f = open( "{}experiment-{}.cache".format(configs.current_report_path, str(n_cluster)), "wb") data_cache = [ graph_embeddings, reconstruction_performance, tsne_result, tsne_time ] if KMEANS_EVAL: data_cache.append(kmeans_prediction) data_cache.append(kmeans_performance) if DBSCAN_EVAL: data_cache.append(dbscan_predcition) data_cache.append(dbscan_performance) pickle.dump(data_cache, f) f.close()
return (MAP, prec_curv) # Change the name of the dataset dataset = "pastebin" json_path = f"./data/{dataset}/config.json" configs = load_json(json_path) configs = dict2dotdict(configs) timespells = configs.timespells graph_embeddings = {} reconstruction_performance = {} reconstruction_performance_curve = {} for ts in range(1, int(timespells)+1): f = open(f"{configs.DATA_PATH}/TS{str(ts)}/generated/graphs.pkl", 'rb') graph, graph_str = pickle.load(f) f.close() for model in configs.models: if model not in graph_embeddings.keys(): graph_embeddings[model] = {} reconstruction_performance[model] = {} reconstruction_performance_curve[model] = {} graph_embeddings[model][ts] = load_embedding(f"{configs.EMBEDDING_PATH}/TS{ts}/{model}.nv") reconstruction_performance[model][ts], reconstruction_performance_curve[model][ts] = evaluate_embedding(graph.G, graph_embeddings[model][ts]) f = open(f"{configs.RESULT_PATH}/MAP.csv", "w") header = "Model, " + ", ".join(["TS" + str(i) for i in range(1, timespells + 1)]) + "\n" f.write(header) for model in configs.models: row = model + ", " + ", ".join([str(reconstruction_performance[model][i]) for i in range(1, timespells + 1)]) + "\n" f.write(row) f.close()