def detect_hubs(self): self.hubs = [] # [(node1, node2...), (degree1, degree2, ...)] node_degrees = list(zip(*self.G.degree())) degrees = node_degrees[1] nodes = node_degrees[0] avg_degree = np.mean(degrees) std_degree = np.std(degrees) for idx, node in enumerate(nodes): deg = degrees[idx] if(deg > (avg_degree + std_degree)): self.hubs.append(node) debug('Hubs: ', self.hubs, ", avg degree: ", avg_degree, ", std degree: ", std_degree)
def run(opts: Options, method: Node2Vec, title=None, cluster2color=['green', 'red', 'blue', 'orange', 'purple', 'cyan'], num_clusters=6, zero_indexed=False, tsne_perplexity=12, draw=["graph", "tsne"], save=False, save_path=None): G, clf_results = main(opts, method) model = method.model if (draw or save): debug("Generating colors...") colors = cluster.cluster_colors(graph=G, model=model, cluster2color=cluster2color, num_clusters=num_clusters) colors_idx = utils.color_by_index(colors, list(model.wv.vocab), zero_indexed) if (draw): debug("Drawing...") if ("graph" in draw): debug("Drawing graph...") layout = nx.drawing.layout.spring_layout(G) drawing.graph(graph=G, model=model, colors=colors, layout=layout, title=title) if ("tsne" in draw): debug("Drawing TSNE...") tsne_model = tsne.model(model, perplexity=tsne_perplexity) drawing.tsne(tsne_model, colors_idx, title=title) if (save): assert title, "title must be set if save=True" assert save_path, "save_path must be set if save=True" utils.save_gefx(G, colors, title, save_path, zero_indexed) return clf_results
def preprocess_transition_probs(self): ''' Preprocessing of transition probabilities for guiding the random walks. ''' G = self.G debug("Preprocessing nodes...") alias_nodes = {} for node in tqdm(G.nodes(), disable=(not DEBUG)): # get all weights of node's neighbors unnormalized_probs = [ G[node][nbr]['weight'] for nbr in sorted(G.neighbors(node)) ] # normalize weights with sum norm_const = sum(unnormalized_probs) normalized_probs = [ float(u_prob) / norm_const for u_prob in unnormalized_probs ] alias_nodes[node] = self.alias_setup(normalized_probs) debug("Preprocessing edges...") alias_edges = {} # Split edges up in chunks workers = os.cpu_count() edge_divisor = workers**2 chunk_size = int(G.size() / edge_divisor) + 1 chunks = list(self.split_chunks(G.edges(), chunk_size)) # Creating worker pool & running #maxtasks = int(len(chunks) / 10) #with mp.Pool(processes=workers, maxtasksperchild=maxtasks) as pool: with mp.Pool(processes=workers) as pool: debug( f"Workers {workers}, chunks {len(chunks)} of size {chunk_size}" ) result = list( tqdm(pool.imap(self.preprocess_edge_chunk, chunks), total=len(chunks), disable=(not DEBUG))) # Combining results into one for d in result: alias_edges.update(d) del result pool.close() pool.join() self.alias_nodes = alias_nodes self.alias_edges = alias_edges return
def run(self): debug("Preprocessing transition probs...") self.walker.preprocess_transition_probs() debug("Simulating walks...") walks = self.walker.simulate_walks(self.opts.num_walks, self.opts.walk_length) debug("Learning model...") # Converts every walk value (= node) from int to str for word2vec (=> words) walks_str = tqdm([list(map(str, walk)) for walk in walks], disable=(not DEBUG)) self.model = Word2Vec(walks_str, workers=self.opts.workers, size=self.opts.dim, min_count=self.opts.min_count, window=self.opts.window, sg=1)
def __init__(self, opts: MethodOpts, h=1): debug("Node2Vec opts: ", opts.__dict__) self.opts = opts self.h = h
def main(opts: Options, model: node2vec.Node2Vec): debug("Options: ", opts.__dict__) random.seed(32) np.random.seed(32) # Reading graph debug("Reading graph...") G = None if opts.graph_format == 'adjlist': G = graph.read_adjlist(filename=opts.input) elif opts.graph_format == 'edgelist': G = graph.read_edgelist(filename=opts.input, weighted=opts.weighted) debug(f"Graph: {len(G.nodes())} nodes, {G.size()} edges") # Loading/learning embeddings if (opts.output and os.path.isfile(opts.output)): debug(f"Model {opts.output} exists, loading from file...") model.load_embeddings(opts.output) else: debug("Training model...") model.init_walker(G) model.run() if opts.output: debug("Saving embeddings...") model.save_embeddings(opts.output) # Classification clf_results = None if opts.label_file: X, Y = classify.read_node_label(opts.label_file) debug("Training classifier using {:.2f}% nodes...".format( opts.training_ratio * 100)) clf = classify.Classifier(vectors=model.get_vectors(), clf=LogisticRegression(solver='liblinear')) clf_results = clf.split_train_evaluate(X, Y, opts.training_ratio, iter=opts.clf_iterations) return G, clf_results
def __init__(self, opts: MethodOpts, p=1.0, q=1.0): debug("Node2Vec opts: ", opts.__dict__) self.opts = opts self.p = p self.q = q
def __init__(self, opts: MethodOpts, jump_prob=0): debug("Node2Vec opts: ", opts.__dict__) self.opts = opts self.jump_prob = jump_prob