def deep_walk_run(): Graph = read_graph('wiki/Wiki_edgelist.txt') deepwalk = DeepWalk(Graph=Graph, per_vertex=80, walk_length=10, window_size=5, dimension_size=128, work=4) embeddings = deepwalk.transform() eval = evaluate_tools(embeddings=embeddings, label_path='wiki/Wiki_labels.txt') eval.plot_embeddings()
def get_embeds(g): d = {} for n in g.nodes: d[n] = str(n) g1 = nx.relabel_nodes(g, d) graph_model = DeepWalk(g1, num_walks=args.num_walks, walk_length=args.walk_len, workers=args.cpu if args.cpu > 0 else cpu_count()) graph_model.train(window_size=args.win, iter=args.emb_iters, embed_size=action_dim) embs = {} emb1 = graph_model.get_embeddings() for n in emb1.keys(): embs[int(n)] = emb1[n] return embs
def main(args): _, _, _, X_tr, Y_tr, V_tr, E_tr = get_x_y_v_e(args.filepath) print('Learning embeddings with {}...'.format(args.embedding)) if args.embedding == 'deepwalk': ## DeepWalk default values: number_walks=10,representation_size=64,seed=0,walk_length=40,window_size=5,workers=1 label_emb = DeepWalk().transform(E_tr, 'edgedict') elif args.embedding == 'node2vec': ## Node2Vec default values: num_walks=10,dimensions=64,walk_length=40,window_size=5,workers=1,p=1,q=1, ## weighted=False,directed=False,iter=1 label_emb = Node2Vec().transform(E_tr, 'edgedict') else: raise NotImplemented label_emb_wv = label_emb.wv print('Calling compare...') compare = Compare(args.labelfile, label_emb_wv) compare.invoke() import pdb pdb.set_trace() print('end')
def get_rule(args): if args.rule == 'deepwalk': return DeepWalk() elif args.rule == 'node2vec': return Node2Vec() else: raise NotImplemented
def model_fn(): return DeepWalk(graph, config['walk_len'], config['window_size'], config['node_count'], config['hidden_dim'], config['neg_num'], config['batch_size'], s2h=config['s2h'], ps_hosts=config['ps_hosts'], temperature=config['temperature'])
total_embeddings.append(model.get_embeddings()) result = pd.concat(total_embeddings, axis=0) dim_log.write(name + 'NetRA number instances :' + str(result.shape[0]) + '\n') result.to_pickle('./Result/' + name + 'netra_embedding' + str(DIM) + '.pickle') print('NetRA algorithm successful conducted') except BaseException: print('Net RA error ') try: # 1. deep walk print('beging deep walk algorithm ') print('\n' * 3) model = DeepWalk(G, global_dimension, walk_length=10, num_walks=80, workers=4) model.train(window_size=5, workers=4, iter=3) model.save_embeddings('./Result/' + name + 'deepwalk_embeddings' + str(DIM) + '.pickle') print('deep walk algorithm successful conducted') except BaseException: error_file.write('deep walk algorithm error!\n') else: error_file.write('deep walk algorithm success \n') # 2. line try: print('beging LINE algorithm ') print('\n' * 3)
def gen_embs(E, emb_file_name='label_emb', emb_file_path='', nrl='deepwalk', format='edgedict', walks=20, vec_len=VECTOR_LENGTH, seed=0, walk_len=20, window=7, work=8, p=1, q=1, weight=False, directed=False, iter=1): """ Generates DeepWalk vectors :param format: Input format of the graph; default: 'edgedict' :param nrl: Which NRL to use ['deepwalk', 'node2vec'] :param directed: Flag to denote if graph is directed. :param weight: Flag to denote if graph is weighted. :param emb_file_path: Path to save generated embeddings :param work: Number of workers to use :param window: window size for skip-gram model :param walk_len: length of each random walk :param seed: :param vec_len: Length of generated vectors :param walks: Number of walks per node :param E: list of edges for DeepWalk in [edgelist] format. :param emb_file_name: file path to store generated vectors in w2v format. :return: Generated Embeddings in Gensim KeyedVector format. """ ## renaming output file with DeepWalk param values emb_file_name = emb_file_name+'_'+str(nrl)+'_'+str(walks)+'_'+str(vec_len)+'_'+str(walk_len)+'_'+str(window)\ +'_'+str(work)+'_'+str(weight)+'_'+str(directed) emb_file_name = os.path.join(emb_file_path, emb_file_name) if os.path.exists( emb_file_name): ## checks if embeddings were generated previously print('Embeddings already exist at:', emb_file_name) from gensim.models import KeyedVectors label_emb_wv = KeyedVectors.load_word2vec_format(emb_file_name) else: if nrl == 'deepwalk': ## DeepWalk default values: number_walks=10,representation_size=64,seed=0,walk_length=40,window_size=5,workers=1 label_emb = DeepWalk(number_walks=walks, representation_size=vec_len, seed=seed, walk_length=walk_len, window_size=window, workers=work).transform(E, format) elif nrl == 'node2vec': ## Node2Vec default values: num_walks=10,dimensions=64,walk_length=40,window_size=5,workers=1,p=1,q=1, ## weighted=False,directed=False,iter=1 label_emb = Node2Vec(num_walks=walks, dimensions=vec_len, walk_length=walk_len, window_size=window, workers=work, p=p, q=q, weighted=weight, directed=directed, iter=iter).transform(E, format) else: raise NotImplemented label_emb_wv = label_emb.wv directory = os.path.dirname(emb_file_name) if not os.path.exists(directory): os.makedirs(directory) label_emb_wv.save_word2vec_format(emb_file_name) # label_emb.save(emb_file_name) # saves in non-human readable format print('Saved generated vectors at:', emb_file_name) # FYI # (pdb) label_emb_wv.__dict__.keys() # => dict_keys(['vectors', 'vocab', 'vector_size', 'index2word', 'vectors_norm']) return label_emb_wv
import numpy as np import random from sklearn.metrics import roc_auc_score from deepwalk import DeepWalk random.seed(616) edges = np.load('../tencent/train_edges.npy') G = nx.Graph() for i in range(169209): G.add_node(i) G.add_edges_from(edges) deepwalk = DeepWalk(G, emb_size=128, length_walk=50, num_walks=10, window_size=10, num_iters=2) w2v = deepwalk.train(workers=4, is_loadmodel=False, is_loaddata=False) pos_test = np.load('../tencent/test_edges.npy') neg_test = np.load('../tencent/test_edges_false.npy') y_true = [True] * pos_test.shape[0] + [False] * neg_test.shape[0] X = np.vstack([pos_test, neg_test]) print('Testing...') y_score = [] for u, v in X: y_score.append(w2v.wv.similarity(str(u), str(v)))
X, Y = read_node_label('../data/wiki_labels.txt') emb_list = [] for k in X: emb_list.append(embeddings[k]) emb_list = np.array(emb_list) model = TSNE(n_components=2) node_pos = model.fit_transform(emb_list) color_idx = {} for i in range(len(X)): color_idx.setdefault(Y[i][0], []) color_idx[Y[i][0]].append(i) for c, idx in color_idx.items(): plt.scatter(node_pos[idx, 0], node_pos[idx, 1], label=c) plt.legend() plt.show() if __name__ == '__main__': G = nx.read_edgelist('../data/Wiki_edgelist.txt', create_using=nx.DiGraph(), nodetype=None, data=[("weight", int)]) print(len(G.nodes())) model = DeepWalk(G, walk_length=10, num_walks=80, workers=1) model.train(w_size=5, iter_num=3) embeddings = model.get_embedding_all() evaluate_embeddings(embeddings) plot_embeddings(embeddings)
Author : haxu date: 2019/4/3 ------------------------------------------------- Change Activity: 2019/4/3: ------------------------------------------------- """ __author__ = 'haxu' import networkx as nx from deepwalk import DeepWalk from classify import read_node_label, Classifier from sklearn.linear_model import LogisticRegression if __name__ == '__main__': G = nx.read_edgelist('../data/Wiki_edgelist.txt', create_using=nx.DiGraph(), nodetype=None, data=[('weight', int)]) model = DeepWalk(G, walk_length=30, num_walks=80, workers=4) model.train(window_size=5, iter=3) embeddings = model.get_embeddings() X, Y = read_node_label('../data/wiki_labels.txt') tr_frac = 0.8 clf = Classifier(embeddings=embeddings, clf=LogisticRegression()) clf.split_train_evaluate(X, Y, tr_frac)
def train_embedding(subgraph): return DeepWalk(subgraph)
plot_MSE(MSE) plot_SC(SC) # 分類並將顧客分群 def customer_vote(G_list,G_vector): kmean_model = KMeans(n_clusters=50) kmean_model.fit(G_vector) labels = kmean_model.labels_ # print(len(G_list)) # print(len(labels)) if __name__ == '__main__': # Preprocess('../91ForNTUDataSet/OrderSlaveData.csv') #Translate('dataset/dataset.txt') #Trans_format('dataset/confidence.txt') G = DeepWalk('dataset/result.txt') model = Word2Vec.load('graph_embedding_model') G_list = G.nodes() G_vector = [] for item in G_list: G_vector.append(model.wv[item]) #find_best_k(G_vector) customer_vote(G_list,G_vector)