Exemplo n.º 1
0
def deep_walk_run():
    Graph = read_graph('wiki/Wiki_edgelist.txt')

    deepwalk = DeepWalk(Graph=Graph,
                        per_vertex=80,
                        walk_length=10,
                        window_size=5,
                        dimension_size=128,
                        work=4)

    embeddings = deepwalk.transform()
    eval = evaluate_tools(embeddings=embeddings,
                          label_path='wiki/Wiki_labels.txt')
    eval.plot_embeddings()
Exemplo n.º 2
0
def get_embeds(g):
    d = {}
    for n in g.nodes:
        d[n] = str(n)
    g1 = nx.relabel_nodes(g, d)
    graph_model = DeepWalk(g1, num_walks=args.num_walks, walk_length=args.walk_len,
                           workers=args.cpu if args.cpu > 0 else cpu_count())

    graph_model.train(window_size=args.win, iter=args.emb_iters, embed_size=action_dim)
    embs = {}
    emb1 = graph_model.get_embeddings()
    for n in emb1.keys():
        embs[int(n)] = emb1[n]

    return embs
Exemplo n.º 3
0
def main(args):
    _, _, _, X_tr, Y_tr, V_tr, E_tr = get_x_y_v_e(args.filepath)

    print('Learning embeddings with {}...'.format(args.embedding))

    if args.embedding == 'deepwalk':
        ## DeepWalk default values: number_walks=10,representation_size=64,seed=0,walk_length=40,window_size=5,workers=1
        label_emb = DeepWalk().transform(E_tr, 'edgedict')
    elif args.embedding == 'node2vec':
        ## Node2Vec default values: num_walks=10,dimensions=64,walk_length=40,window_size=5,workers=1,p=1,q=1,
        ## weighted=False,directed=False,iter=1
        label_emb = Node2Vec().transform(E_tr, 'edgedict')
    else:
        raise NotImplemented

    label_emb_wv = label_emb.wv

    print('Calling compare...')

    compare = Compare(args.labelfile, label_emb_wv)
    compare.invoke()

    import pdb
    pdb.set_trace()

    print('end')
Exemplo n.º 4
0
def get_rule(args):
    if args.rule == 'deepwalk':
        return DeepWalk()
    elif args.rule == 'node2vec':
        return Node2Vec()
    else:
        raise NotImplemented
Exemplo n.º 5
0
 def model_fn():
   return DeepWalk(graph,
                   config['walk_len'],
                   config['window_size'],
                   config['node_count'],
                   config['hidden_dim'],
                   config['neg_num'],
                   config['batch_size'],
                   s2h=config['s2h'],
                   ps_hosts=config['ps_hosts'],
                   temperature=config['temperature'])
Exemplo n.º 6
0
        total_embeddings.append(model.get_embeddings())
    result = pd.concat(total_embeddings, axis=0)
    dim_log.write(name + 'NetRA number instances :' + str(result.shape[0]) +
                  '\n')
    result.to_pickle('./Result/' + name + 'netra_embedding' + str(DIM) +
                     '.pickle')
    print('NetRA algorithm successful conducted')
except BaseException:
    print('Net RA error ')
try:
    # 1. deep walk
    print('beging deep walk algorithm ')
    print('\n' * 3)
    model = DeepWalk(G,
                     global_dimension,
                     walk_length=10,
                     num_walks=80,
                     workers=4)
    model.train(window_size=5, workers=4, iter=3)
    model.save_embeddings('./Result/' + name + 'deepwalk_embeddings' +
                          str(DIM) + '.pickle')
    print('deep walk algorithm successful conducted')
except BaseException:
    error_file.write('deep walk algorithm error!\n')
else:
    error_file.write('deep walk algorithm success \n')
# 2. line

try:
    print('beging LINE algorithm ')
    print('\n' * 3)
Exemplo n.º 7
0
def gen_embs(E,
             emb_file_name='label_emb',
             emb_file_path='',
             nrl='deepwalk',
             format='edgedict',
             walks=20,
             vec_len=VECTOR_LENGTH,
             seed=0,
             walk_len=20,
             window=7,
             work=8,
             p=1,
             q=1,
             weight=False,
             directed=False,
             iter=1):
    """
    Generates DeepWalk vectors
    :param format: Input format of the graph; default: 'edgedict'
    :param nrl: Which NRL to use ['deepwalk', 'node2vec']
    :param directed: Flag to denote if graph is directed.
    :param weight: Flag to denote if graph is weighted.
    :param emb_file_path: Path to save generated embeddings
    :param work: Number of workers to use
    :param window: window size for skip-gram model
    :param walk_len: length of each random walk
    :param seed:
    :param vec_len: Length of generated vectors
    :param walks: Number of walks per node
    :param E: list of edges for DeepWalk in [edgelist] format.
    :param emb_file_name: file path to store generated vectors in w2v format.
    :return: Generated Embeddings in Gensim KeyedVector format.
    """

    ## renaming output file with DeepWalk param values
    emb_file_name = emb_file_name+'_'+str(nrl)+'_'+str(walks)+'_'+str(vec_len)+'_'+str(walk_len)+'_'+str(window)\
                    +'_'+str(work)+'_'+str(weight)+'_'+str(directed)
    emb_file_name = os.path.join(emb_file_path, emb_file_name)

    if os.path.exists(
            emb_file_name):  ## checks if embeddings were generated previously

        print('Embeddings already exist at:', emb_file_name)
        from gensim.models import KeyedVectors

        label_emb_wv = KeyedVectors.load_word2vec_format(emb_file_name)

    else:
        if nrl == 'deepwalk':
            ## DeepWalk default values: number_walks=10,representation_size=64,seed=0,walk_length=40,window_size=5,workers=1
            label_emb = DeepWalk(number_walks=walks,
                                 representation_size=vec_len,
                                 seed=seed,
                                 walk_length=walk_len,
                                 window_size=window,
                                 workers=work).transform(E, format)
        elif nrl == 'node2vec':
            ## Node2Vec default values: num_walks=10,dimensions=64,walk_length=40,window_size=5,workers=1,p=1,q=1,
            ## weighted=False,directed=False,iter=1
            label_emb = Node2Vec(num_walks=walks,
                                 dimensions=vec_len,
                                 walk_length=walk_len,
                                 window_size=window,
                                 workers=work,
                                 p=p,
                                 q=q,
                                 weighted=weight,
                                 directed=directed,
                                 iter=iter).transform(E, format)
        else:
            raise NotImplemented

        label_emb_wv = label_emb.wv

        directory = os.path.dirname(emb_file_name)
        if not os.path.exists(directory):
            os.makedirs(directory)

        label_emb_wv.save_word2vec_format(emb_file_name)
        # label_emb.save(emb_file_name) # saves in non-human readable format
        print('Saved generated vectors at:', emb_file_name)

    # FYI
    # (pdb) label_emb_wv.__dict__.keys()
    # => dict_keys(['vectors', 'vocab', 'vector_size', 'index2word', 'vectors_norm'])

    return label_emb_wv
Exemplo n.º 8
0
import numpy as np
import random
from sklearn.metrics import roc_auc_score

from deepwalk import DeepWalk

random.seed(616)
edges = np.load('../tencent/train_edges.npy')
G = nx.Graph()
for i in range(169209):
    G.add_node(i)
G.add_edges_from(edges)

deepwalk = DeepWalk(G,
                    emb_size=128,
                    length_walk=50,
                    num_walks=10,
                    window_size=10,
                    num_iters=2)
w2v = deepwalk.train(workers=4, is_loadmodel=False, is_loaddata=False)

pos_test = np.load('../tencent/test_edges.npy')
neg_test = np.load('../tencent/test_edges_false.npy')

y_true = [True] * pos_test.shape[0] + [False] * neg_test.shape[0]
X = np.vstack([pos_test, neg_test])

print('Testing...')
y_score = []
for u, v in X:
    y_score.append(w2v.wv.similarity(str(u), str(v)))
Exemplo n.º 9
0
    X, Y = read_node_label('../data/wiki_labels.txt')

    emb_list = []
    for k in X:
        emb_list.append(embeddings[k])
    emb_list = np.array(emb_list)

    model = TSNE(n_components=2)
    node_pos = model.fit_transform(emb_list)

    color_idx = {}
    for i in range(len(X)):
        color_idx.setdefault(Y[i][0], [])
        color_idx[Y[i][0]].append(i)

    for c, idx in color_idx.items():
        plt.scatter(node_pos[idx, 0], node_pos[idx, 1], label=c)
    plt.legend()
    plt.show()

if __name__ == '__main__':
    G = nx.read_edgelist('../data/Wiki_edgelist.txt',
                        create_using=nx.DiGraph(), nodetype=None, data=[("weight", int)])
    print(len(G.nodes()))
    model = DeepWalk(G, walk_length=10, num_walks=80, workers=1)
    model.train(w_size=5, iter_num=3)
    embeddings = model.get_embedding_all()
   
    evaluate_embeddings(embeddings)
    plot_embeddings(embeddings)
Exemplo n.º 10
0
   Author :       haxu
   date:          2019/4/3
-------------------------------------------------
   Change Activity:
                   2019/4/3:
-------------------------------------------------
"""
__author__ = 'haxu'

import networkx as nx
from deepwalk import DeepWalk
from classify import read_node_label, Classifier
from sklearn.linear_model import LogisticRegression

if __name__ == '__main__':
    G = nx.read_edgelist('../data/Wiki_edgelist.txt',
                         create_using=nx.DiGraph(),
                         nodetype=None,
                         data=[('weight', int)])

    model = DeepWalk(G, walk_length=30, num_walks=80, workers=4)

    model.train(window_size=5, iter=3)
    embeddings = model.get_embeddings()

    X, Y = read_node_label('../data/wiki_labels.txt')

    tr_frac = 0.8
    clf = Classifier(embeddings=embeddings, clf=LogisticRegression())
    clf.split_train_evaluate(X, Y, tr_frac)
Exemplo n.º 11
0
def train_embedding(subgraph):
    return DeepWalk(subgraph)
Exemplo n.º 12
0
    plot_MSE(MSE)
    plot_SC(SC)

# 分類並將顧客分群
def customer_vote(G_list,G_vector):
    kmean_model = KMeans(n_clusters=50)
    kmean_model.fit(G_vector)
    labels = kmean_model.labels_
    # print(len(G_list))
    # print(len(labels))

if __name__ == '__main__':
    # Preprocess('../91ForNTUDataSet/OrderSlaveData.csv')

    #Translate('dataset/dataset.txt')

    #Trans_format('dataset/confidence.txt')

    G = DeepWalk('dataset/result.txt')

    model = Word2Vec.load('graph_embedding_model')

    G_list = G.nodes()
    G_vector = []
    for item in G_list:
	    G_vector.append(model.wv[item])

    #find_best_k(G_vector)

    customer_vote(G_list,G_vector)