def node2vec(netData, **kwargs):
     d = kwargs.get('d', 2)
     max_iter = kwargs.get('max_iter', 1)
     walk_len = kwargs.get('walk_len', 80)
     num_walks = kwargs.get('num_walks', 10)
     con_size = kwargs.get('con_size', 10)
     ret_p = kwargs.get('ret_p', 1)
     inout_p = kwargs.get('inout_p', 1)
     from gem.embedding.node2vec import node2vec
     emb = node2vec(d=d,
                    max_iter=max_iter,
                    walk_len=walk_len,
                    num_walks=num_walks,
                    con_size=con_size,
                    ret_p=ret_p,
                    inout_p=1)
     return attMethods.GEMexport(netData, emb)
Exemplo n.º 2
0
 def __init__(self,
              d=2,
              max_iter=10,
              walk_len=80,
              num_walks=10,
              con_size=10,
              ret_p=1,
              inout_p=1):
     if not N2VC_available:
         raise RuntimeError(
             'node2vec binary not found on PATH. Please compile and install from https://github.com/snap-stanford/snap/tree/master/examples/node2vec'
         )
     self.model = node2vec(d=d,
                           max_iter=max_iter,
                           walk_len=walk_len,
                           num_walks=num_walks,
                           con_size=con_size,
                           ret_p=ret_p,
                           inout_p=inout_p)
Exemplo n.º 3
0
    def _get_embeddings(self, embedding_space):

        # You can comment out the methods you don't want to run
        models = list()
        for embed_method in self.embeddings:
##            if embed_method == EMEDDINGS.GRAPH_FACTORIZATIONE_MBEDDINGS:
##                models.append(GraphFactorization(embedding_space, 100000, 1 * 10 ** -4, 1.0))
            if embed_method == EMEDDINGS.LAPLACIAN_EIGENMAPS_EMBEDDINGS:
                models.append(LaplacianEigenmaps(embedding_space))
            if embed_method == EMEDDINGS.LOCALLY_LINEAR_EMBEDDING:
                models.append(LocallyLinearEmbedding(embedding_space))
            if embed_method == EMEDDINGS.HOPE_EMBEDDING:
                models.append(HOPE(2 + 1, 0.01))
            if embed_method == EMEDDINGS.NODE2VEC_EMBEDDING_EMBEDDINGS:
                models.append(node2vec(2, 1, 80, 10, 10, 1, 1))
            # Embeddings I was unable to get working yet - it seems that HOPE converts k to k+1 for some reason....
            # if embed_method == EMEDDINGS.SDNE_EMBEDDING_EMBEDDINGS:
            #     models.append(SDNE(d=2, beta=5, alpha=1e-5, nu1=1e-6, nu2=1e-6, K=3,n_units=[50, 15,], rho=0.3, n_iter=50, xeta=0.01,n_batch=500,
            #                modelfile=[base_path + '/intermediate/enc_model.json', base_path + '/intermediate/dec_model.json'],
            #                weightfile=[base_path + '/intermediate/enc_weights.hdf5', base_path + '/intermediate/dec_weights.hdf5']))
        return models
Exemplo n.º 4
0
                AP_LE[it2][it1] = AP
                ROC_LE[it2][it1] = ROC

            print("evaluating for DEEPWALK")
            for it2 in xrange(len(dimensions)):
                print(it1, it2)
                dim = dimensions[it2]
                file_name = 'SAVER/' + fig_name[fig] + str(
                    it1 + 1) + '/DEEPWALK_' + str(dim)
                parameter_file = open(file_name, 'rb')
                X = pickle.load(parameter_file)
                parameter_file.close()
                embedding = node2vec(d=dim,
                                     max_iter=1,
                                     walk_len=80,
                                     num_walks=10,
                                     con_size=10,
                                     ret_p=1,
                                     inout_p=1)
                embedding._X = X
                AP, ROC = evaluation_measures.calc_aproc_us(
                    embedding, X, train_digraph, test_digraph, sample_edges)
                AP_DEEPWALK[it2][it1] = AP
                ROC_DEEPWALK[it2][it1] = ROC

            print("evaluating for n2vA")
            for it2 in xrange(len(dimensions)):
                print(it1, it2)
                dim = dimensions[it2]
                file_name = 'SAVER/' + fig_name[fig] + str(
                    it1 + 1) + '/n2vA_' + str(dim)
Exemplo n.º 5
0
# create an empty list of models
models = []
# using and else if statement load the model for this task
# The end result is a list that is 1 long
if use_model_type == "HOPE":
    models.append(HOPE(d=dims * 2, beta=0.01))
elif use_model_type == "LapEig":
    models.append(LaplacianEigenmaps(d=dims))
elif use_model_type == "LLE":
    models.append(LocallyLinearEmbedding(d=dims))
elif use_model_type == "node2vec":
    models.append(
        node2vec(d=2,
                 max_iter=1,
                 walk_len=80,
                 num_walks=10,
                 con_size=10,
                 ret_p=1,
                 inout_p=1))
else:
    # This logically has to be SDNE as there are no other options
    models.append(
        SDNE(d=dims * 2,
             beta=5,
             alpha=1e-5,
             nu1=1e-6,
             nu2=1e-6,
             K=3,
             n_units=[
                 50,
                 15,
Exemplo n.º 6
0
    #               weightfile=['intermediate/enc_weights.hdf5', 'intermediate/dec_weights.hdf5']))

    tasks.buildEmbedding(dataset, G, methods)

#exit()

# CARICA L'EMBEDDING COSTRUITO ED ESEGUE IL TASK DI NODE CLASSIFICATION

dataset = "lastfm"
method = "LaplacianEigenmaps"
dim = 128

print "\nDataset:", dataset, "- Metodo:", method, "- Dimensioni degli Embedding:", repr(
    dim)
embsLike = np.load('embs/embsLike_' + dataset + '_' + method + '_' +
                   repr(dim) + '.npy',
                   allow_pickle=True)
tasks.executeNodeClassification(dataset, embsLike)

exit()

# ESEGUE IL TASK DI LINK PREDICTION

dataset = "lastfm"
method = node2vec(
    dataset, 128, 1, 80, 10, 10, 1, 1
)  # METODO, DATASET E DIMENSIONI DA TESTARE PER IL TASK DI LINK PREDICTION
tasks.executeLinkPrediction(dataset, True, method)

exit()
Exemplo n.º 7
0
    )
except UnicodeDecodeError:
    node_colors = pickle.load(
        open('data/sbm_node_labels.pickle', 'rb'), encoding='latin1'
    )
node_colors_arr = [None] * node_colors.shape[0]
for idx in range(node_colors.shape[0]):
    node_colors_arr[idx] = np.where(node_colors[idx, :].toarray() == 1)[1][0]

models = []
# Load the models you want to run
models.append(GraphFactorization(d=128, max_iter=1000, eta=1 * 10**-4, regu=1.0, data_set='sbm'))
models.append(HOPE(d=256, beta=0.01))
models.append(LaplacianEigenmaps(d=128))
models.append(LocallyLinearEmbedding(d=128))
models.append(node2vec(d=182, max_iter=1, walk_len=80, num_walks=10, con_size=10, ret_p=1, inout_p=1, data_set='sbm'))
models.append(SDNE(d=128, beta=5, alpha=1e-5, nu1=1e-6, nu2=1e-6, K=3,n_units=[500, 300,], rho=0.3, n_iter=30, xeta=0.001,n_batch=500,
                modelfile=['enc_model.json', 'dec_model.json'],
                weightfile=['enc_weights.hdf5', 'dec_weights.hdf5']))
# For each model, learn the embedding and evaluate on graph reconstruction and visualization
for embedding in models:
    print ('Num nodes: %d, num edges: %d' % (G.number_of_nodes(), G.number_of_edges()))
    t1 = time()
    # Learn embedding - accepts a networkx graph or file with edge list
    Y, t = embedding.learn_embedding(graph=G, edge_f=None, is_weighted=True, no_python=True)
    print (embedding._method_name+':\n\tTraining time: %f' % (time() - t1))
    # Evaluate on graph reconstruction
    MAP, prec_curv, err, err_baseline = gr.evaluateStaticGraphReconstruction(G, embedding, Y, None)
    #---------------------------------------------------------------------------------
    print(("\tMAP: {} \t preccision curve: {}\n\n\n\n"+'-'*100).format(MAP,prec_curv[:5]))
    #---------------------------------------------------------------------------------
    weight = nextlinks['weight'].to_list()
    edge_tuple = list(zip(from_id, to_id, weight))
    # edge_tuple = tuple(from_id,to_id,weight)
    print('adding')
    G.add_weighted_edges_from(edge_tuple)
    G = G.to_directed()
    print('finish create graph!')
    print('start train n2v')
    look_back = list(G.nodes())
    embeddings = {}
    models = []
    models.append(
        node2vec(d=128,
                 max_iter=10,
                 walk_len=80,
                 num_walks=10,
                 con_size=10,
                 ret_p=1,
                 inout_p=1))
    for embedding in models:
        Y, t = embedding.learn_embedding(graph=G,
                                         edge_f=None,
                                         is_weighted=True,
                                         no_python=True)
        for i, embedding in enumerate(embedding.get_embedding()):
            embeddings[look_back[i]] = embedding
    np.save(root_path + 'graph_embeddings_retp1.npy', embeddings)
    print('nextlink graph embedding retp 1 finish!')  # displays "world"
    del models
    gc.collect()
Exemplo n.º 9
0
def benchmark(x, cv=5):
    """This function automatically runs through a series of benchmarks for unsupervised learning (MAP), semi-supervised learning, and supervised learning (cross validation accuracy with random forest classifiers) for the provided input dataset.
    
    # Arguments:
        x (NEGraph): A NeuroEmbed graph.
        cv (int): Optional. Number of cross-validation folds to use.
        
    # Returns:
        dict: A result dictionary with all models and results.
    """
    all_results = {}
    G, X, y, S, names = x.G, x.X, x.y, x.S, x.names
    out_metrics = {}
    model = ASEEmbedding()
    model.fit(X)
    MAP, prec_curv, err, err_baseline = gr.evaluateStaticGraphReconstruction(
        G, model, model.H, is_undirected=False, is_weighted=True)
    out_metrics['MAP'] = MAP
    d = model.H.shape[1] // 2
    out_metrics = generate_metrics(G, model, model.H, y, model.y, S, cv=cv)
    all_results['ASE'] = out_metrics
    raw_model = RawEmbedding()
    raw_model.fit(X, n_components=d)
    out_metrics = generate_metrics(G,
                                   raw_model,
                                   raw_model.H,
                                   y,
                                   raw_model.y,
                                   S,
                                   cv=cv)
    all_results['Raw'] = out_metrics
    G = nx.from_numpy_matrix(X, create_using=nx.DiGraph)
    Gd = nx.from_numpy_matrix(X + 1e-9, create_using=nx.DiGraph)
    models = {}
    if N2VC_available:
        models['node2vec'] = node2vec(d=d,
                                      max_iter=10,
                                      walk_len=80,
                                      num_walks=10,
                                      con_size=10,
                                      ret_p=1,
                                      inout_p=1)
    models['HOPE'] = HOPE(d=d, beta=0.01)
    models['Laplacian Eigenmaps'] = LaplacianEigenmaps(d=d)
    for model_name, embedding in models.items():
        if model_name == 'node2vec':
            Xh, t = embedding.learn_embedding(graph=Gd,
                                              edge_f=None,
                                              is_weighted=True,
                                              no_python=True)
            MAP, prec_curv, err, err_baseline = gr.evaluateStaticGraphReconstruction(
                Gd, embedding, Xh, is_undirected=False, is_weighted=False)
        else:
            Xh, t = embedding.learn_embedding(graph=G,
                                              edge_f=None,
                                              is_weighted=True,
                                              no_python=True)
            MAP, prec_curv, err, err_baseline = gr.evaluateStaticGraphReconstruction(
                G, embedding, Xh, is_undirected=False, is_weighted=False)
        Xh = np.real(Xh)
        if y is not None:
            clf = RandomForestClassifier(n_estimators=200)
            clf = MLPClassifier(alpha=1, max_iter=100000)
            clusterer = GaussianMixture(n_components=Xh.shape[1])
            clusterer.fit(Xh)
            predict_labels = clusterer.predict(Xh)
            scores = cross_val_score(clf, Xh, y, cv=cv)
            out_metrics['CV'] = scores.mean()
            if S is not None:
                scores = cross_val_score(clf, np.hstack((Xh, S)), y, cv=cv)
                out_metrics['CVAnatomy+Graph'] = scores.mean()
                scores = cross_val_score(clf, S, y, cv=cv)
                out_metrics['CVAnatomyOnly'] = scores.mean()
            out_metrics['ARC Clustering'] = metrics.adjusted_rand_score(
                y, predict_labels)
            out_metrics['AMI Clustering'] = metrics.adjusted_mutual_info_score(
                y, predict_labels)
        out_metrics['MAP'] = MAP
        print(model_name, out_metrics)
        all_results[model_name] = out_metrics
    return all_results
Exemplo n.º 10
0
# Optionally, you can add weights as third column: source target weight
edge_f = 'karate.edgelist'
# Specify whether the edges are directed
isDirected = True

# Load graph
G = graph_util.loadGraphFromEdgeListTxt(edge_f, directed=isDirected)
G = G.to_directed()

models = []
# You can comment out the methods you don't want to run
models.append(GraphFactorization(2, 100000, 1 * 10**-4, 1.0))
models.append(HOPE(4, 0.01))
models.append(LaplacianEigenmaps(2))
models.append(LocallyLinearEmbedding(2))
models.append(node2vec(2, 1, 80, 10, 10, 1, 1))
models.append(
    SDNE(d=2,
         beta=5,
         alpha=1e-5,
         nu1=1e-6,
         nu2=1e-6,
         K=3,
         n_units=[
             50,
             15,
         ],
         rho=0.3,
         n_iter=50,
         xeta=0.01,
         n_batch=500,
Exemplo n.º 11
0
 models = []
 # Load the models you want to run
 models.append(
     GraphFactorization(d=64,
                        max_iter=50000,
                        eta=1 * 10**-4,
                        regu=1.0,
                        data_set=gfile))
 models.append(HOPE(d=64, beta=0.01))
 models.append(LaplacianEigenmaps(d=64))
 models.append(LocallyLinearEmbedding(d=64))
 models.append(
     node2vec(d=64,
              max_iter=100,
              walk_len=80,
              num_walks=10,
              con_size=10,
              ret_p=1,
              inout_p=1,
              data_set=gfile))
 models.append(
     SDNE(d=64,
          beta=5,
          alpha=1e-5,
          nu1=1e-6,
          nu2=1e-6,
          K=3,
          n_units=[
              500,
              300,
          ],
          rho=0.3,