Пример #1
0
def main(args):
    '''
	Pipeline for representational learning for all nodes in a graph.
	'''
    # original graph nx_G
    nx_G = read_graph()

    G = node2vec.Graph(nx_G, args.directed, args.p, args.q)
    G.preprocess_transition_probs()
    walks = G.simulate_walks(args.num_walks, args.walk_length)

    # simplify graph
    simplify_threshold = [2.67, 4.15, 10.66]
    # meeting_freq threshold
    # simplify_threshold = [7,10,24]

    for threshold in simplify_threshold:
        newG_edge = []
        for n, nbrs in nx_G.adj.items():
            for nbr, eattr in nbrs.items():
                wt = eattr['weight']
                if wt > threshold: newG_edge.append((n, nbr, wt))
        new_nx_G = nx.DiGraph()
        new_nx_G.add_weighted_edges_from(newG_edge)
        new_nx_G.to_undirected()
        new_G = node2vec.Graph(new_nx_G, args.directed, args.p, args.q)
        new_G.preprocess_transition_probs()
        walks += new_G.simulate_walks(args.num_walks, args.walk_length)

    learn_embeddings(walks)
Пример #2
0
def main(args):
    '''
	Pipeline for representational learning for all nodes in a graph.
	'''
    embeddings = {}
    edgelists = os.listdir(args.input)
    for idx, edgelist in enumerate(edgelists):
        nx_G = read_graph(args.input + edgelist, args)
        G = node2vec.Graph(nx_G, args.directed, args.p, args.q)
        G.preprocess_transition_probs()
        walks = G.simulate_walks(args.num_walks, args.walk_length)
        n2v = learn_embeddings(args, walks)
        for key in n2v.vocab.keys():
            assert int(key) not in embeddings.keys(), (int(key),
                                                       embeddings.keys())
            embeddings[int(key)] = n2v.word_vec(key)
        if idx % 10000 == 0:
            print(idx, len(list(embeddings.keys())))
    keys = np.array(list(embeddings.keys()))
    order = np.argsort(keys)
    keys = keys[order]
    values = np.array(list(embeddings.values()))
    values = values[order]

    emb_old = np.load('data/embeddings_base.npy')
    emb = np.concatenate([emb_old, values], axis=1)
    np.save('data/embeddings_new.npy', emb)
Пример #3
0
def main(args, c_args):
    """
    Pipeline for representational learning for all nodes in a graph.
    """

    #generate corpus

    nx_G = read_graph()
    G = node2vec.Graph(nx_G, args.directed, args.p, args.q)
    G.preprocess_transition_probs()
    walks = G.simulate_walks(args.num_walks, args.walk_length)
    sentences = ""

    # create raw data
    # _walks = [map(str, walk) for walk in walks]
    print("walks converted")
    for i in range(len(walks)):
        sentences += str(walks[i][0])
        for j in range(1, len(walks[i])):
            sentences += (" " + str(walks[i][j]))
        sentences += '\n'
    file = open('../corpus/' + source_name + '.txt', 'w+')
    file.write(sentences)

    #learn_embeddings(walks)
    call(
        "./../Modified_DIVE/word2vec -train ../corpus/" + source_name +
        ".txt -output ../emb/" + source_name + ".emb " + source_name +
        "-vocab -alpha 0.10 -window 10 -cbow 0 -sample 1e-5 -threads 20 -binary 0 -iter 15",
        shell=True)

    save_word_emb_with_name("../emb/" + source_name + ".emb",
                            "../json/" + source_name + "_with_name.json",
                            "../graph/" + source_name + "_id_name.net")
Пример #4
0
def generate_node2vec_embeddings(A,
                                 emd_size=128,
                                 negative_injection=False,
                                 train_neg=None):
    if negative_injection:
        row, col = train_neg
        A = A.copy()
        A[row, col] = 1  # inject negative train
        A[col, row] = 1  # inject negative train
    nx_G = nx.from_scipy_sparse_matrix(A)
    G = node2vec.Graph(nx_G, is_directed=False, p=1, q=1)
    G.preprocess_transition_probs()
    walks = G.simulate_walks(num_walks=10, walk_length=80)
    walks = [map(str, walk) for walk in walks]
    model = Word2Vec(walks,
                     size=emd_size,
                     window=10,
                     min_count=0,
                     sg=1,
                     workers=8,
                     iter=1)
    wv = model.wv
    embeddings = np.zeros([A.shape[0], emd_size], dtype='float32')
    sum_embeddings = 0
    empty_list = []
    for i in range(A.shape[0]):
        if str(i) in wv:
            embeddings[i] = wv.word_vec(str(i))
            sum_embeddings += embeddings[i]
        else:
            empty_list.append(i)
    mean_embedding = sum_embeddings / (A.shape[0] - len(empty_list))
    embeddings[empty_list] = mean_embedding
    return embeddings
Пример #5
0
def main(args):
    '''
	Pipeline for representational learning for all nodes in a graph.
	'''
    print("Args: {}".format(args))
    try:
        #walks = load_walks(args.cached_walks_path)
        walks = Walks(args.cached_walks_path)
    except IOError as e:
        print("Found no cached walks at {}".format(args.cached_walks_path))
        print("Generating walks from graph.")
        nx_G = read_graph()
        G = node2vec.Graph(nx_G,
                           args.directed,
                           args.p,
                           args.q,
                           n_jobs=args.workers)
        print("Loaded graph: {}".format(G))
        print("Processing transition probabilities.")
        G.preprocess_transition_probs()
        print("Simulating walks.")
        walks = G.simulate_walks(args.num_walks, args.walk_length)
        walks = walks2str_list(walks)
        print("Saving walks at {}".format(args.cached_walks_path))
        save_walks(walks, args.cached_walks_path)
        print("Loading walks")
        walks = load_walks(args.cached_walks_path)
    print("Learning embeddings.")
    learn_embeddings(walks)
Пример #6
0
def main(args):
    # Pipeline for representational learning for all nodes in a graph.
    nx_G = read_graph()
    G = node2vec.Graph(nx_G, args.directed, args.p, args.q)
    G.preprocess_transition_probs()
    walks = G.simulate_walks(args.num_walks, args.walk_length)
    learn_embeddings(walks)
Пример #7
0
def main(args):
    """
	Pipeline for representational learning for all nodes in a graph.
	"""
    print("Reading the graph...")
    nx_G = read_graph()
    print("Preprocessing the graph...")
    G = node2vec.Graph(nx_G, args.directed, args.p, args.q)
    G.preprocess_transition_probs()
    print("Simulating random walks...")
    startTime = time.time()
    walks = G.simulate_walks(args.num_walks, args.walk_length)
    endTime = time.time()
    print("\n---- Random walks generation completed in " +
          str(round(endTime - startTime, 3)) + " seconds; average of " +
          str(round((endTime - startTime) / args.num_walks, 3)) +
          " seconds per set of walks\n")
    print("Learning node embedding function for {} SGD iterations...".format(
        args.iter))
    startTime = time.time()
    learn_embeddings(walks)
    endTime = time.time()
    print("\n ---- Embedding learning completed in " +
          str(round(endTime - startTime, 3)) + " seconds; average of " +
          str(round((endTime - startTime) / args.iter, 3)) +
          " seconds per SGD iteration\n")
Пример #8
0
def main(args):
    '''
	Pipeline for representational learning for all nodes in a graph.
	'''
    if args.resume == False:
        print("Reading graph.")
        nx_G = read_graph()
        print("Passthrough graph and construct class.")
        G = node2vec.Graph(nx_G, args.directed, args.p, args.q)
        print("Generate probs.")
        G.preprocess_transition_probs()
        if args.end2end:
            print("Generate path.")
            walks = G.simulate_walks(args.num_walks, args.walk_length)
        else:
            print("Dump probs of nodes.")
            pickle.dump(G.alias_nodes, open(args.probs_graph + ".nodes", "wb"))
            print("Dump probs of edges.")
            pickle.dump(G.alias_edges, open(args.probs_graph + ".edges", "wb"))
            # nx.write_weighted_edgelist(G, args.probs_graph)

    if args.end2end or args.resume:
        if args.resume:
            walks = pickle.load(open(args.walk_list, "rb"))
        print("Pass to word2vec.")
        learn_embeddings(walks)
Пример #9
0
def main(args):
    """
    Pipeline for representational learning for all nodes in a graph.
    """
    print("==========Read Network and Semantic!===========")
    nx_G = read_graph()
    print("The number of nodes in network is {}".format(len(nx_G.nodes())))
    index = read_index(args.semantic)

    print("==========Sampling for Fact!===========")
    G = node2vec.Graph(nx_G, args.directed, args.p, args.q)
    G.preprocess_transition_probs()
    walks = G.simulate_walks(args.epochs, args.walk_length)

    print("==========Semantic Alignment!===========")
    for walks_length in tqdm(range(len(walks))):
        for walk_len in range(len(walks[walks_length])):
            walks[walks_length][walk_len] = index[walks[walks_length]
                                                  [walk_len]]

    print(
        "==========Jointly Learning Fact Embeddings in Word-Level and Fact-Level!==========="
    )
    word_model = J_ME.learn_embeddings(args.mode, walks, args.dimensions,
                                       args.window_size, args.workers,
                                       args.iter, index)

    print(
        "==========Combine Two Embeddings to Obtain the Final Fact Embeddings!==========="
    )
    fact_vec_from_word = S_O.self_organization(word_model, nx_G, index,
                                               args.dimensions)
    fact_vec_from_node = read_embeddings("temp/fact_embeddings.txt")
    sentenceemb_with_wordemb(fact_vec_from_word, fact_vec_from_node)
Пример #10
0
def main(datapath):
    '''
    コマンドラインから,入力のグラフデータを読み込んで,ランダムウォークで系列データを作成して,Skip-Gramで学習させる
    '''
    print('Reading edgelist...')
    nx_G = nx.read_edgelist(datapath, nodetype=str, create_using=nx.DiGraph())
    for edge in nx_G.edges():
        nx_G[edge[0]][edge[1]]['weight'] = 1

    print('Organizing baiased-weighted graph...')
    G = node2vec.Graph(
        nx_G, is_directed=True, p=1, q=1
    )  # node2vec.pyの中の,Graphクラスのインスタンスを生成, read_graphでunweightedでも"weight=1"と入れるため,args.is_unweightedは考えない.
    G.preprocess_transition_probs_deepwalk(
    )  # 入力グラフのリンクの "重み・矢印 "に従って,各ノードごとにその重みを反映した遷移確率でウォーク出来るように準備

    print('Start random walk...')
    walks = G.simulate_deepwalks(20, 120)  # バイアス付きランダムウォークを開始

    print('Training the skip-gram model...')
    dataname = datapath.replace('.edgelist', '')
    model = learn_embeddings(
        walks, dataname)  # 上記で得られたノード系列データをインプットとして,skip-gramモデルで学習

    del walks, model
    gc.collect()
Пример #11
0
def node_fun(args, nx_graph):
    G = node2vec.Graph(nx_graph, args.directed, args.p, args.q)
    G.preprocess_transition_probs()
    walks = G.simulate_walks(args.num_walks, args.walk_length)
    walks = node_num(walks)
    vocabulary_size = len(nx_graph.nodes())
    return walks, vocabulary_size
Пример #12
0
def get_n2v_embeddings(emb_dim, nx_G):
    """
    Pipeline for representational learning for all nodes in a graph.
    """
    G = node2vec.Graph(nx_G, is_directed=False, p=1, q=1)
    G.preprocess_transition_probs()
    walks = G.simulate_walks(10, 80)
    learn_embeddings(walks, emb_dim)
Пример #13
0
def main(args):
    '''
	Pipeline for representational learning for all nodes in a graph.
	'''
    nx_G = read_graph()
    G = node2vec.Graph(nx_G, args.directed, args.p, args.q)
    G.preprocess_transition_probs()
    walks = G.simulate_walks(args.num_walks, args.walk_length)
    np.savetxt(args.output, walks)
Пример #14
0
		def main(args):
			'''
			Pipeline for representational learning for all nodes in a graph.
			'''
			nx_G = read_graph()
			G = node2vec.Graph(nx_G, args.directed, args.p, args.h)
			G.preprocess_transition_probs()
			walks = G.simulate_walks(args.r, args.l)
			learn_embeddings(walks)
Пример #15
0
def node_walk(args):
    nx_g = read_graph(args)
    g = n2v.Graph(nx_g, args.directed, args.p,
                  args.q)  #args.directed bool型用来标识
    #(有向图,无向图), args.p,args.q分别是参数p和q, 这一步是生成一个图对象
    g.preprocess_transition_probs()  #生成每个节点的转移概率向量
    walks = g.simulate_walks(args.num_walks, args.walk_length)  #随机游走
    # walks是随机游走生成的多个节点序列,被当做文本输入,调用Word2Vec模型,生成向量
    return walks
def main(nx_G, args):
    '''
	Pipeline for representational learning for all nodes in a graph.
	'''
    # nx_G = read_graph()
    G = node2vec.Graph(nx_G, args["is_directed"], args["p"], args["q"])
    G.preprocess_transition_probs()
    walks = G.simulate_walks(args["num_walks"], args["walk_length"])
    return learn_embeddings(walks, args)
Пример #17
0
def main(args):
    '''
	Pipeline for representational learning for all nodes in a graph.
	'''
    nx_G = read_graph()
    G = node2vec.Graph(nx_G, args.directed, args.p, args.q)
    G.preprocess_transition_probs()
    for i in range(0, args.num_walks):
        G.simulate_walks(1, args.walk_length, i)
Пример #18
0
def main(args):

    # Pipeline for representational learning for all nodes in a graph.
    nx_G = read_graph()
    # nx_G = snap.LoadEdgeList(snap.PNGraph, '/home/huwenxin/文档/project/CA-AstroPh.txt', 0, 1)
    G = node2vec.Graph(nx_G, args.directed, args.p, args.q)
    G.preprocess_transition_probs()
    walks = G.simulate_walks(args.num_walks, args.walk_length)
    learn_embeddings(walks)
Пример #19
0
    def graph2walks(self, method="", params={}):

        self.params = params

        if method == "deepwalk":
            number_of_walks = self.params['number_of_walks']
            walk_length = self.params['walk_length']
            alpha = self.params['alpha']

            # Temporarily generate the edge list
            with open("./temp/graph.edgelist", 'w') as f:
                for line in nx.generate_edgelist(self.graph, data=False):
                    f.write("{}\n".format(line))

            dwg = deepwalk.load_edgelist("./temp/graph.edgelist",
                                         undirected=True)
            corpus = deepwalk.build_deepwalk_corpus(G=dwg,
                                                    num_paths=number_of_walks,
                                                    path_length=walk_length,
                                                    alpha=alpha,
                                                    rand=random.Random(0))

        elif method == "node2vec":

            number_of_walks = self.params['number_of_walks']
            walk_length = self.params['walk_length']
            p = self.params['p']
            q = self.params['q']

            for edge in self.graph.edges():
                self.graph[edge[0]][edge[1]]['weight'] = 1
            G = node2vec.Graph(nx_G=self.graph, p=p, q=q, is_directed=False)
            G.preprocess_transition_probs()
            corpus = G.simulate_walks(num_walks=number_of_walks,
                                      walk_length=walk_length)

        else:
            raise ValueError("Invalid method name!")
        """
        new_corpus = []
        line_counter = 0
        line = []
        for walk in corpus:
            if line_counter < self.params['number_of_walks']:
                line.extend(walk)
                line_counter += 1
            else:
                line_counter = 0
                new_corpus.append(line)
                line = []

        corpus = new_corpus
        """
        self.corpus = corpus

        return self.corpus
Пример #20
0
def main(args):
    '''
	Pipeline for representational learning for all nodes in a graph.
	'''
    nx_G = read_graph()
    G = node2vec.Graph(nx_G, "true", 1, 10)
    G.preprocess_transition_probs()

    walks = G.simulate_walks(10, 80)
    learn_embeddings(walks)
Пример #21
0
def main(args):
    '''
	Pipeline for representational learning for all nodes in a graph.
	'''
    nx_G = read_graph('data/citations')
    nx_G = get_gaint_comp(nx_G)
    G = node2vec.Graph(nx_G, args.directed, args.p, args.q)
    G.preprocess_transition_probs()
    walks = G.simulate_walks(args.num_walks, args.walk_length)
    learn_embeddings(walks)
Пример #22
0
def doNode2VecStuff(nx_graph, filename = 'sample.txt', p = 1, q = 2, num_walks = 10, walk_length = 80,
                   dimensions = 2, window_size = 10, num_workers = 10, num_iters = 5):
    Graph_n2v = node2vec.Graph(nx_graph, False, p, q) # p and q param
    Graph_n2v.preprocess_transition_probs()
    walks = Graph_n2v.simulate_walks(num_walks, walk_length)
    walks = [map(str, walk) for walk in walks]
    
    model = Word2Vec(walks, size=dimensions, window=window_size, min_count=0, sg=1, workers=num_workers, iter=num_iters)
    
    model.wv.save_word2vec_format(filename, binary = False)
Пример #23
0
def generate_random_walks(input, num_walks, walk_length):
    '''
    Pipeline for representational learning for all nodes in a graph.
    '''
    nx_G = read_graph(input)
    G = node2vec.Graph(nx_G, is_directed=False, p=1, q=1)  #DeepWalk
    G.preprocess_transition_probs()
    walks = G.simulate_walks(num_walks, walk_length)

    return np.array(walks)
Пример #24
0
def ns(nx_G, num_walks=100, walk_length=3):
    G = node2vec.Graph(nx_G, False, 1, 1)
    G.preprocess_transition_probs()
    walks = G.simulate_walks(num_walks, walk_length, False)

    p_visit = calc_pvisit(walks)
    graph = nx_G.copy()
    for u,v in graph.edges():
        graph[u][v]['weight'] = cossim(p_visit[u], p_visit[v])
    return graph
Пример #25
0
def main(args):
    nx_G = read_graph(file=args.input,
                      get_connected_graph=True,
                      remove_selfloops=True,
                      get_directed=False)

    print('Original Graph: nodes: {}, edges: {}'.format(
        nx_G.number_of_nodes(), nx_G.number_of_edges()))
    print()
    train_pos = pickle.load(open(config.train_pos, 'rb'))
    test_pos = pickle.load(open(config.test_pos, 'rb'))
    train_neg = pickle.load(open(config.train_neg, 'rb'))
    test_neg = pickle.load(open(config.test_neg, 'rb'))

    print('Number of positive training samples: ', len(train_pos))
    print('Number of negative training samples: ', len(train_neg))
    print('Number of positive testing samples: ', len(test_pos))
    print('Number of negative testing samples: ', len(test_neg))
    train_graph = read_graph(file=config.train_graph,
                             get_connected_graph=False,
                             remove_selfloops=False,
                             get_directed=False)

    print('Train graph created: {} nodes, {} edges'.format(
        train_graph.number_of_nodes(), train_graph.number_of_edges()))
    print('Number of connected components: ',
          nx.number_connected_components(train_graph))
    if config.train:
        if config.resume_training:
            _ = learn_embeddings(walks=None)
        else:
            G = node2vec.Graph(train_graph, args.directed, args.p, args.q)
            G.preprocess_transition_probs()
            walks = G.simulate_walks(args.num_walks, args.walk_length)
            # learn the embeddings
            _ = learn_embeddings(walks)

    embeddings_file = None
    checkpoint_file = None
    if config.evaluate:

        if config.model is not 'rnn':
            embeddings_file = config.embeddings_dir + config.output_file
        else:
            checkpoint_file = config.checkpoint_dir + config.checkpoint_name
            print(checkpoint_file)
        # evaluate embeddings in link prediction
        _ = learn_embeddings(walks=None,
                             train_pos=train_pos,
                             train_neg=train_neg,
                             test_pos=test_pos,
                             test_neg=test_neg,
                             eval_bool=True,
                             embeddings_file=embeddings_file,
                             checkpoint_file=checkpoint_file)
Пример #26
0
def ce(nx_G, k=3, num_walks=10, walk_length=3):
    newgraph = nx_G.copy()
    for u,v in tqdm(nx_G.edges()):
        s_u = set(nx.single_source_shortest_path_length(nx_G, u, cutoff=k).keys())
        s_v = set(nx.single_source_shortest_path_length(nx_G, u, cutoff=k).keys())
        sg = node2vec.Graph(nx_G.subgraph(s_u.union(s_v)), False, 1, 1)
        sg.preprocess_transition_probs()
        walks = sg.simulate_walks(num_walks, walk_length, False)
        probs = calc_ce(walks)
        newgraph[u][v]['weight'] = newgraph[v][u]['weight'] = 0.5 * (probs[(u,v)] + probs[(v,u)])
    return newgraph
Пример #27
0
def main(args, Samplegraph):
    '''
	Pipeline for representational learning for all nodes in a graph.
	'''
    nx_G = Samplegraph
    print(nx_G.number_of_nodes())
    print(nx_G.edges(data=True))
    G = node2vec.Graph(nx_G, args.directed, args.p, args.q)
    G.preprocess_transition_probs()
    walks = G.simulate_walks(args.num_walks, args.walk_length)
    learn_embeddings(walks)
Пример #28
0
def main():
    prepareInput.createInput(logName)

    scores=[]
    #----------start Trace2Vec
    Trace2Vec.learn(logName,vectorsize)
    y=Trace2Vec.getY(logName)
    vectors, corpus=Trace2Vec.startCluster(logName, vectorsize)
    printMatrix(vectors, "Trace2Vec", "vectors")
    for alg in clustering:
        assigned_clusters=cluster(alg, vectors, y)
        printVector(assigned_clusters, "Trace2Vec", "clusters", alg)
        Trace2Vec.endCluster(logName, assigned_clusters, vectorsize, alg, corpus)
    #----------end Trace2Vec

    #----------start Node2Vec
    args=Node2Vec.parse_args()
    args.input="input/"+logName+".graph"
    args.output="output/"+logName+"N2VVS"+str(vectorsize)+".node2vec"
    nx_G = Node2Vec.read_graph(args)
    G = node2vec.Graph(nx_G, True, args.p, args.q)
    G.preprocess_transition_probs()
    walks = G.simulate_walks(args.num_walks, args.walk_length)
    Node2Vec.learn_embeddings(args, logName, vectorsize, walks)
    Node2Vec.extract(logName, vectorsize)
    
    y=Node2Vec.getY(logName)
    vectors, corpus=Node2Vec.startCluster(logName, vectorsize)
    printMatrix(vectors, "Node2Vec", "vectors")
    for alg in clustering:
        assigned_clusters=cluster(alg, vectors, y)
        printVector(assigned_clusters, "Node2Vec", "clusters", alg)
        Node2Vec.endCluster(logName, assigned_clusters, vectorsize, alg, corpus)
    #----------end Node2Vec

    #----------start NGrams
    vectors, y=NGrams.ngrams_BPI_2015(logName, vectorsize)
    printMatrix(vectors, "NGrams", "vectors")
    for alg in clustering:
        assigned_clusters=cluster(alg, vectors, y)
        printVector(assigned_clusters, "NGrams", "clusters", alg)
        NGrams.endCluster(logName, assigned_clusters, vectorsize, alg, [0]*len(vectors))
    #----------end NGrams

    scores.append(get_scores("Trace2Vec"))
    scores.append(get_scores("Node2Vec"))
    scores.append(get_scores("NGrams"))

    for score in scores:
        print_scores(score)
    
    if vectorsize==2:
        for emb in embed:
            myPlot.plot(emb)
Пример #29
0
def main(args):
    '''
	Pipeline for representational learning for all nodes in a graph.
	'''
    nx_G = read_graph()  #从文本中读取图
    G = node2vec.Graph(
        nx_G, args.directed, args.p, args.q
    )  #args.directed bool型用来标识(有向图,无向图), args.p,args.q分别是参数p和q, 这一步是生成一个图对象
    G.preprocess_transition_probs()  #生成每个节点的转移概率向量
    walks = G.simulate_walks(args.num_walks, args.walk_length)
    learn_embeddings(walks)  #walks是随机游走生成的多个节点序列,被当做文本输入,调用Word2Vec模型,生成向量
Пример #30
0
def main(args):
    '''
	Pipeline for representational learning for all nodes in a graph.
	'''
    nx_G = read_graph()
    G = node2vec.Graph(nx_G, args.directed, args.p, args.q)
    G.preprocess_transition_probs()
    walks = G.simulate_walks(args.num_walks, args.walk_length)
    weights = learn_embeddings(walks)

    weight_dict = {"node_embs": weights}
    torch.save(weight_dict, args.output)