def node2vec(netData, **kwargs): d = kwargs.get('d', 2) max_iter = kwargs.get('max_iter', 1) walk_len = kwargs.get('walk_len', 80) num_walks = kwargs.get('num_walks', 10) con_size = kwargs.get('con_size', 10) ret_p = kwargs.get('ret_p', 1) inout_p = kwargs.get('inout_p', 1) from gem.embedding.node2vec import node2vec emb = node2vec(d=d, max_iter=max_iter, walk_len=walk_len, num_walks=num_walks, con_size=con_size, ret_p=ret_p, inout_p=1) return attMethods.GEMexport(netData, emb)
def __init__(self, d=2, max_iter=10, walk_len=80, num_walks=10, con_size=10, ret_p=1, inout_p=1): if not N2VC_available: raise RuntimeError( 'node2vec binary not found on PATH. Please compile and install from https://github.com/snap-stanford/snap/tree/master/examples/node2vec' ) self.model = node2vec(d=d, max_iter=max_iter, walk_len=walk_len, num_walks=num_walks, con_size=con_size, ret_p=ret_p, inout_p=inout_p)
def _get_embeddings(self, embedding_space): # You can comment out the methods you don't want to run models = list() for embed_method in self.embeddings: ## if embed_method == EMEDDINGS.GRAPH_FACTORIZATIONE_MBEDDINGS: ## models.append(GraphFactorization(embedding_space, 100000, 1 * 10 ** -4, 1.0)) if embed_method == EMEDDINGS.LAPLACIAN_EIGENMAPS_EMBEDDINGS: models.append(LaplacianEigenmaps(embedding_space)) if embed_method == EMEDDINGS.LOCALLY_LINEAR_EMBEDDING: models.append(LocallyLinearEmbedding(embedding_space)) if embed_method == EMEDDINGS.HOPE_EMBEDDING: models.append(HOPE(2 + 1, 0.01)) if embed_method == EMEDDINGS.NODE2VEC_EMBEDDING_EMBEDDINGS: models.append(node2vec(2, 1, 80, 10, 10, 1, 1)) # Embeddings I was unable to get working yet - it seems that HOPE converts k to k+1 for some reason.... # if embed_method == EMEDDINGS.SDNE_EMBEDDING_EMBEDDINGS: # models.append(SDNE(d=2, beta=5, alpha=1e-5, nu1=1e-6, nu2=1e-6, K=3,n_units=[50, 15,], rho=0.3, n_iter=50, xeta=0.01,n_batch=500, # modelfile=[base_path + '/intermediate/enc_model.json', base_path + '/intermediate/dec_model.json'], # weightfile=[base_path + '/intermediate/enc_weights.hdf5', base_path + '/intermediate/dec_weights.hdf5'])) return models
AP_LE[it2][it1] = AP ROC_LE[it2][it1] = ROC print("evaluating for DEEPWALK") for it2 in xrange(len(dimensions)): print(it1, it2) dim = dimensions[it2] file_name = 'SAVER/' + fig_name[fig] + str( it1 + 1) + '/DEEPWALK_' + str(dim) parameter_file = open(file_name, 'rb') X = pickle.load(parameter_file) parameter_file.close() embedding = node2vec(d=dim, max_iter=1, walk_len=80, num_walks=10, con_size=10, ret_p=1, inout_p=1) embedding._X = X AP, ROC = evaluation_measures.calc_aproc_us( embedding, X, train_digraph, test_digraph, sample_edges) AP_DEEPWALK[it2][it1] = AP ROC_DEEPWALK[it2][it1] = ROC print("evaluating for n2vA") for it2 in xrange(len(dimensions)): print(it1, it2) dim = dimensions[it2] file_name = 'SAVER/' + fig_name[fig] + str( it1 + 1) + '/n2vA_' + str(dim)
# create an empty list of models models = [] # using and else if statement load the model for this task # The end result is a list that is 1 long if use_model_type == "HOPE": models.append(HOPE(d=dims * 2, beta=0.01)) elif use_model_type == "LapEig": models.append(LaplacianEigenmaps(d=dims)) elif use_model_type == "LLE": models.append(LocallyLinearEmbedding(d=dims)) elif use_model_type == "node2vec": models.append( node2vec(d=2, max_iter=1, walk_len=80, num_walks=10, con_size=10, ret_p=1, inout_p=1)) else: # This logically has to be SDNE as there are no other options models.append( SDNE(d=dims * 2, beta=5, alpha=1e-5, nu1=1e-6, nu2=1e-6, K=3, n_units=[ 50, 15,
# weightfile=['intermediate/enc_weights.hdf5', 'intermediate/dec_weights.hdf5'])) tasks.buildEmbedding(dataset, G, methods) #exit() # CARICA L'EMBEDDING COSTRUITO ED ESEGUE IL TASK DI NODE CLASSIFICATION dataset = "lastfm" method = "LaplacianEigenmaps" dim = 128 print "\nDataset:", dataset, "- Metodo:", method, "- Dimensioni degli Embedding:", repr( dim) embsLike = np.load('embs/embsLike_' + dataset + '_' + method + '_' + repr(dim) + '.npy', allow_pickle=True) tasks.executeNodeClassification(dataset, embsLike) exit() # ESEGUE IL TASK DI LINK PREDICTION dataset = "lastfm" method = node2vec( dataset, 128, 1, 80, 10, 10, 1, 1 ) # METODO, DATASET E DIMENSIONI DA TESTARE PER IL TASK DI LINK PREDICTION tasks.executeLinkPrediction(dataset, True, method) exit()
) except UnicodeDecodeError: node_colors = pickle.load( open('data/sbm_node_labels.pickle', 'rb'), encoding='latin1' ) node_colors_arr = [None] * node_colors.shape[0] for idx in range(node_colors.shape[0]): node_colors_arr[idx] = np.where(node_colors[idx, :].toarray() == 1)[1][0] models = [] # Load the models you want to run models.append(GraphFactorization(d=128, max_iter=1000, eta=1 * 10**-4, regu=1.0, data_set='sbm')) models.append(HOPE(d=256, beta=0.01)) models.append(LaplacianEigenmaps(d=128)) models.append(LocallyLinearEmbedding(d=128)) models.append(node2vec(d=182, max_iter=1, walk_len=80, num_walks=10, con_size=10, ret_p=1, inout_p=1, data_set='sbm')) models.append(SDNE(d=128, beta=5, alpha=1e-5, nu1=1e-6, nu2=1e-6, K=3,n_units=[500, 300,], rho=0.3, n_iter=30, xeta=0.001,n_batch=500, modelfile=['enc_model.json', 'dec_model.json'], weightfile=['enc_weights.hdf5', 'dec_weights.hdf5'])) # For each model, learn the embedding and evaluate on graph reconstruction and visualization for embedding in models: print ('Num nodes: %d, num edges: %d' % (G.number_of_nodes(), G.number_of_edges())) t1 = time() # Learn embedding - accepts a networkx graph or file with edge list Y, t = embedding.learn_embedding(graph=G, edge_f=None, is_weighted=True, no_python=True) print (embedding._method_name+':\n\tTraining time: %f' % (time() - t1)) # Evaluate on graph reconstruction MAP, prec_curv, err, err_baseline = gr.evaluateStaticGraphReconstruction(G, embedding, Y, None) #--------------------------------------------------------------------------------- print(("\tMAP: {} \t preccision curve: {}\n\n\n\n"+'-'*100).format(MAP,prec_curv[:5])) #---------------------------------------------------------------------------------
weight = nextlinks['weight'].to_list() edge_tuple = list(zip(from_id, to_id, weight)) # edge_tuple = tuple(from_id,to_id,weight) print('adding') G.add_weighted_edges_from(edge_tuple) G = G.to_directed() print('finish create graph!') print('start train n2v') look_back = list(G.nodes()) embeddings = {} models = [] models.append( node2vec(d=128, max_iter=10, walk_len=80, num_walks=10, con_size=10, ret_p=1, inout_p=1)) for embedding in models: Y, t = embedding.learn_embedding(graph=G, edge_f=None, is_weighted=True, no_python=True) for i, embedding in enumerate(embedding.get_embedding()): embeddings[look_back[i]] = embedding np.save(root_path + 'graph_embeddings_retp1.npy', embeddings) print('nextlink graph embedding retp 1 finish!') # displays "world" del models gc.collect()
def benchmark(x, cv=5): """This function automatically runs through a series of benchmarks for unsupervised learning (MAP), semi-supervised learning, and supervised learning (cross validation accuracy with random forest classifiers) for the provided input dataset. # Arguments: x (NEGraph): A NeuroEmbed graph. cv (int): Optional. Number of cross-validation folds to use. # Returns: dict: A result dictionary with all models and results. """ all_results = {} G, X, y, S, names = x.G, x.X, x.y, x.S, x.names out_metrics = {} model = ASEEmbedding() model.fit(X) MAP, prec_curv, err, err_baseline = gr.evaluateStaticGraphReconstruction( G, model, model.H, is_undirected=False, is_weighted=True) out_metrics['MAP'] = MAP d = model.H.shape[1] // 2 out_metrics = generate_metrics(G, model, model.H, y, model.y, S, cv=cv) all_results['ASE'] = out_metrics raw_model = RawEmbedding() raw_model.fit(X, n_components=d) out_metrics = generate_metrics(G, raw_model, raw_model.H, y, raw_model.y, S, cv=cv) all_results['Raw'] = out_metrics G = nx.from_numpy_matrix(X, create_using=nx.DiGraph) Gd = nx.from_numpy_matrix(X + 1e-9, create_using=nx.DiGraph) models = {} if N2VC_available: models['node2vec'] = node2vec(d=d, max_iter=10, walk_len=80, num_walks=10, con_size=10, ret_p=1, inout_p=1) models['HOPE'] = HOPE(d=d, beta=0.01) models['Laplacian Eigenmaps'] = LaplacianEigenmaps(d=d) for model_name, embedding in models.items(): if model_name == 'node2vec': Xh, t = embedding.learn_embedding(graph=Gd, edge_f=None, is_weighted=True, no_python=True) MAP, prec_curv, err, err_baseline = gr.evaluateStaticGraphReconstruction( Gd, embedding, Xh, is_undirected=False, is_weighted=False) else: Xh, t = embedding.learn_embedding(graph=G, edge_f=None, is_weighted=True, no_python=True) MAP, prec_curv, err, err_baseline = gr.evaluateStaticGraphReconstruction( G, embedding, Xh, is_undirected=False, is_weighted=False) Xh = np.real(Xh) if y is not None: clf = RandomForestClassifier(n_estimators=200) clf = MLPClassifier(alpha=1, max_iter=100000) clusterer = GaussianMixture(n_components=Xh.shape[1]) clusterer.fit(Xh) predict_labels = clusterer.predict(Xh) scores = cross_val_score(clf, Xh, y, cv=cv) out_metrics['CV'] = scores.mean() if S is not None: scores = cross_val_score(clf, np.hstack((Xh, S)), y, cv=cv) out_metrics['CVAnatomy+Graph'] = scores.mean() scores = cross_val_score(clf, S, y, cv=cv) out_metrics['CVAnatomyOnly'] = scores.mean() out_metrics['ARC Clustering'] = metrics.adjusted_rand_score( y, predict_labels) out_metrics['AMI Clustering'] = metrics.adjusted_mutual_info_score( y, predict_labels) out_metrics['MAP'] = MAP print(model_name, out_metrics) all_results[model_name] = out_metrics return all_results
# Optionally, you can add weights as third column: source target weight edge_f = 'karate.edgelist' # Specify whether the edges are directed isDirected = True # Load graph G = graph_util.loadGraphFromEdgeListTxt(edge_f, directed=isDirected) G = G.to_directed() models = [] # You can comment out the methods you don't want to run models.append(GraphFactorization(2, 100000, 1 * 10**-4, 1.0)) models.append(HOPE(4, 0.01)) models.append(LaplacianEigenmaps(2)) models.append(LocallyLinearEmbedding(2)) models.append(node2vec(2, 1, 80, 10, 10, 1, 1)) models.append( SDNE(d=2, beta=5, alpha=1e-5, nu1=1e-6, nu2=1e-6, K=3, n_units=[ 50, 15, ], rho=0.3, n_iter=50, xeta=0.01, n_batch=500,
models = [] # Load the models you want to run models.append( GraphFactorization(d=64, max_iter=50000, eta=1 * 10**-4, regu=1.0, data_set=gfile)) models.append(HOPE(d=64, beta=0.01)) models.append(LaplacianEigenmaps(d=64)) models.append(LocallyLinearEmbedding(d=64)) models.append( node2vec(d=64, max_iter=100, walk_len=80, num_walks=10, con_size=10, ret_p=1, inout_p=1, data_set=gfile)) models.append( SDNE(d=64, beta=5, alpha=1e-5, nu1=1e-6, nu2=1e-6, K=3, n_units=[ 500, 300, ], rho=0.3,