def execute_walks_graph_embedding(graph: nx.Graph, length: int, iterations: int, p: float = 1.0, q: float = 1.0, **kwargs: Dict) -> np.ndarray: """Execute first/second order walks using GraphEmbedding walker. Parameters -------------------------- graph: nx.Graph, The graph on which to run the walks. length: int, Lenght of the walks. iterations: int, Number of walks to start from each node. p: float = 1.0, Inverse weight for making the walk local. By default, the walk will be uniform. q: float = 1.0, Inverse weight for making the walk a deep first. By default, the walk will be uniform. kwargs: Dict, Additional parameters to be used in other libraries but not this one. Returns -------------------------- Computed walks as numpy array. """ return Node2Vec(graph, walk_length=length, num_walks=iterations, workers=cpu_count(), p=p, q=q).sentences
def get_embeddings(self, inst, th=1): G = nx.parse_edgelist(self._compose_edge_list(inst.dist_mat, th), create_using=nx.DiGraph(), nodetype=None, data=[('weight', float)]) if self._embedding == 'deepwalk': model = DeepWalk(G, walk_length=10, num_walks=80, workers=1) model.train(window_size=5, iter=3) elif self._embedding == 'node2vec': model = Node2Vec(G, walk_length=10, num_walks=80, p=0.25, q=4, workers=1) # init model model.train(window_size=5, iter=3) # train model elif self._embedding == 'line': model = LINE(G, embedding_size=128, order='second') # init model,order can be ['first','second','all'] model.train(batch_size=1024, epochs=50, verbose=2) # train model elif self._embedding == 'sdne': model = SDNE(G, hidden_size=[256, 128]) # init model model.train(batch_size=3000, epochs=40, verbose=2) # train model elif self._embedding == 'struc2vec': model = Struc2Vec(G, 10, 80, workers=4, verbose=40, ) # init model model.train(window_size=5, iter=3) # train model else: return self._normalise(inst) ebds = model.get_embeddings() coords = [] for i in range(inst.n): coords.append(ebds[str(i)]) return np.array(coords)
emb_list = [] for k in X: emb_list.append(embeddings[k]) emb_list = np.array(emb_list) model = TSNE(n_components=2) node_pos = model.fit_transform(emb_list) color_idx = {} for i in range(len(X)): color_idx.setdefault(Y[i][0], []) color_idx[Y[i][0]].append(i) # 分辨率参数-dpi,画布大小参数-figsize plt.figure(dpi=300, figsize=(24, 12)) for c, idx in color_idx.items(): plt.scatter(node_pos[idx, 0], node_pos[idx, 1], label=c) plt.legend() plt.show() if __name__ == "__main__": G=nx.read_edgelist('../data/wiki/Wiki_edgelist.txt', create_using = nx.DiGraph(), nodetype = None, data = [('weight', int)]) model = Node2Vec(G, walk_length=10, num_walks=80, p=0.25, q=4, workers=1, use_rejection_sampling=0) model.train(window_size = 5, iter = 3) embeddings=model.get_embeddings() # evaluate_embeddings(embeddings) plot_embeddings(embeddings)
parser = argparse.ArgumentParser() parser.add_argument('--dataset', type=str, default="cora", help='dataset for training') parser.add_argument('--tr_frac', type=float, default=0.2, help='tr_frac') args = parser.parse_args() # load_data adj, features, y_train, y_val, y_test, train_mask, val_mask, test_mask, labels = load_data( args.dataset) idx = np.argmax(labels, axis=1) f = open('labels.txt', 'w') for i in range(labels.shape[0]): f.write(f'{i} {idx[i]} \n') adj = adj.toarray() G = nx.from_numpy_array(adj, create_using=nx.DiGraph()) nx.write_edgelist(G, "test.edgelist", data=[('weight', int)]) G = nx.read_edgelist('test.edgelist', create_using=nx.DiGraph(), nodetype=None, data=[('weight', int)]) model = Node2Vec(G, walk_length=10, num_walks=80, p=0.25, q=4, workers=1) model.train(window_size=5, iter=3) embeddings = model.get_embeddings() evaluate_embeddings(embeddings, args.tr_frac) plot_embeddings(embeddings)
node_pos = model.fit_transform(emb_list) color_idx = {} for i in range(len(X)): color_idx.setdefault(Y[i][0], []) color_idx[Y[i][0]].append(i) for c, idx in color_idx.items(): plt.scatter(node_pos[idx, 0], node_pos[idx, 1], label=c) # c=node_colors) plt.legend() plt.show() if __name__ == "__main__": G = nx.read_edgelist('../data/flight/brazil-airports.edgelist', create_using=nx.DiGraph(), nodetype=None, data=[('weight', int)]) model = Node2Vec(G, 10, 80, workers=1, p=0.25, q=2) model.train() embeddings = model.get_embeddings() evaluate_embeddings(embeddings) plot_embeddings(embeddings)
# p=0.25, q=2, workers=1, use_rejection_sampling=0) # model.train(window_size=5, iter=3) # # embeddings = model.get_embeddings() # dic = evaluate_embeddings(embeddings) # sum_mic+=dic['micro'] # sum_mac+=dic['macro'] # sum_acc+=dic['acc'] # # print('ave_micro:') # print(sum_mic/iter) # print('ave_macro:') # print(sum_mac/iter) # print('ave_acc:') # print(sum_acc/iter) # model = DeepWalk(G, embed_size=128,walk_length=10, num_walks=80, workers=1) # model.train(window_size=5, iter=3) # model = LINE(G, embedding_size=128, order='second') # model = LINE(G, embedding_size=128, order='first') # model = LINE(G, embedding_size=128, order='all') # model.train(batch_size=1024, epochs=50, verbose=2) # model = Node2Vec(G, embed_size=128,walk_length=10, num_walks=80, p=0.25, q=2, workers=1, use_rejection_sampling=0) model.train(window_size=5, iter=3) embeddings = model.get_embeddings() plot_embeddings(embeddings) # plot_embeddings_3D(embeddings)
emb_list = [] for k in X: emb_list.append(embeddings[k]) emb_list = np.array(emb_list) model = TSNE(n_components=2) node_pos = model.fit_transform(emb_list) color_idx = {} for i in range(len(X)): color_idx.setdefault(Y[i][0], []) color_idx[Y[i][0]].append(i) for c, idx in color_idx.items(): plt.scatter(node_pos[idx, 0], node_pos[idx, 1], label=c)) plt.legend() plt.show() if __name__ == "__main__": G=nx.read_edgelist('../data/wiki/Wiki_edgelist.txt', create_using = nx.DiGraph(), nodetype = None, data = [('weight', int)]) model=Node2Vec(G, walk_length = 10, num_walks = 80, p = 0.25, q = 4, workers = 1) model.train(window_size = 5, iter = 3) embeddings=model.get_embeddings() evaluate_embeddings(embeddings) plot_embeddings(embeddings)
# embeddings = model.get_embeddings() # # evaluate_embeddings(embeddings) # plot_embeddings(embeddings) import pandas as pd df = pd.DataFrame() df['source'] = [str(i) for i in [0, 1, 2, 3, 4, 4, 6, 7, 7, 9]] df['target'] = [str(i) for i in [1, 4, 4, 4, 6, 7, 5, 8, 9, 8]] G = nx.from_pandas_edgelist(df, create_using=nx.Graph()) model = Node2Vec(G, 10, 80, workers=1, p=0.25, q=2, use_rejection_sampling=0) model.train(embed_size=2) embeddings = model.get_embeddings() # print(embeddings) x, y = [], [] print(sorted(embeddings.items(), key=lambda x: x[0])) for k, i in embeddings.items(): x.append(i[0]) y.append(i[1]) plt.scatter(x, y) plt.show()