def main(): parser = argparse.ArgumentParser() parser.add_argument('graph') args = parser.parse_args() #vertices, edges = read_graph_from_file(args.graph) G = nx.read_edgelist(args.graph) n = G.number_of_nodes() print "nodes:", n print "edges:", G.number_of_edges() core_exponent = 0.5 core_vertices = filter(lambda v: G.degree(v) >= n**core_exponent, G.nodes()) print "core vertices:", len(core_vertices) core = G.subgraph(core_vertices) print "number of connected components in core:", nx.number_connected_components( core) # BFS-traversal fringe_fraction = 0.1 max_fringe_size = int(n * fringe_fraction) core_vertices = set(core_vertices) for i in range(int(1 / fringe_fraction) + 1): fringe_vertices = set( sorted(fringe(G, core_vertices), key=lambda v: -G.degree(v))[:max_fringe_size]) if not fringe_vertices: break print "{}: core={}, fringe={}".format(i + 1, len(core_vertices), len(fringe_vertices)) core_vertices |= fringe_vertices
def main(): parser = argparse.ArgumentParser() parser.add_argument('graph') args = parser.parse_args() #vertices, edges = read_graph_from_file(args.graph) G = nx.read_edgelist(args.graph) n = G.number_of_nodes() print "nodes:", n print "edges:", G.number_of_edges() core_exponent = 0.5 core_vertices = filter(lambda v: G.degree(v) >= n**core_exponent, G.nodes()) print "core vertices:", len(core_vertices) core = G.subgraph(core_vertices) print "number of connected components in core:", nx.number_connected_components(core) # BFS-traversal fringe_fraction = 0.1 max_fringe_size = int(n * fringe_fraction) core_vertices = set(core_vertices) for i in range(int(1/fringe_fraction)+1): fringe_vertices = set(sorted(fringe(G, core_vertices), key=lambda v: -G.degree(v))[:max_fringe_size]) if not fringe_vertices: break print "{}: core={}, fringe={}".format(i+1, len(core_vertices), len(fringe_vertices)) core_vertices |= fringe_vertices
def find_embeddings( vertices, edges, mode, learning_rate=0.1, n_epoch=100, ratio_to_second=2.0, ratio_between_first=1.0, ratio_random=1.0, silent=False, ): "find (r, phi) for each vertex" vertices = list(vertices) n = len(vertices) R = 2 * np.log(n) print "mode: {}".format(mode) np.random.seed(0) degrees = defaultdict(int) print "count degrees" for v1, v2 in edges: degrees[v1] += 1 degrees[v2] += 1 if mode == "random": # phi=rand(0, 2pi), r = rand(0,R) return {v: (np.random.uniform(0.0, R), np.random.uniform(0.0, 2 * np.pi)) for v in vertices} elif mode == "degrees": # phi=rand(0,2pi), r = 2log(n/k) return {v: (2 * np.log(n / degrees[v]), np.random.uniform(0.0, 2 * np.pi)) for v in vertices} elif mode.startswith("fit"): x0 = [] for (r, phi) in zip( [2 * np.log(n / degrees[v]) for v in vertices], [np.random.uniform(0.0, 2 * np.pi) for v in vertices] ): x0.append(r) x0.append(phi) x0 = np.array(x0) nedges = set() all_nedges = set() for (v1, v2) in combinations(vertices, 2): # if (v1, v2) not in edges and (v2, v1) not in edges: e = make_edge(v1, v2) if e not in edges: all_nedges.add(e) if mode == "fit_random": a = list(all_nedges) random.shuffle(a) nedges = set(a[: len(edges)]) elif mode == "fit_degrees": K = float(ratio_to_second) # ratio of nedges to second neighbour L = float(ratio_between_first) # ratio of nedges between first neighbours M = float(ratio_random) # ratio of random nedges # free_nedges = all_nedges.copy() G = nx.Graph() G.add_edges_from(edges) srt_vertices = sorted(degrees.keys(), key=lambda v: -degrees[v]) shuf_vertices = srt_vertices[:] random.shuffle(shuf_vertices) for v in srt_vertices: # get first neighbours first_neigh = set(G.neighbors(v)) # get second neighbours second_neigh = set() for neigh in first_neigh: second_neigh.update(G.neighbors(neigh)) second_neigh.remove(v) n_vertex_nedges = 0 # from v to second neighbours for i, sec_n in enumerate(second_neigh): # print "i: {}".format(i) if i + 1 > degrees[v] * K: continue e = make_edge(v, sec_n) if e not in nedges: nedges.add(e) n_vertex_nedges += 1 # between first neighbours for j, pair in enumerate(combinations(first_neigh, 2)): # print "j: {}".format(j) if j + 1 > degrees[v] * L: continue v1, v2 = pair e = make_edge(v1, v2) if e not in nedges: nedges.add(e) # random edges max_n_random_vertices = int(degrees[v] * M) n_random_vertices = 0 for rand_v in shuf_vertices: if n_random_vertices >= max_n_random_vertices: break e = make_edge(v, rand_v) if e not in nedges and e not in edges: nedges.add(e) n_random_vertices += 1 else: nedges = all_nedges.copy() print "number of nedges={}".format(len(nedges)) q = Q(vertices, edges, nedges) grad_q = GradQ(vertices, edges, nedges) if mode == "fit_degrees_sgd": print "Learning rate: {}".format(learning_rate) print "Ratio to second: {}".format(ratio_to_second) print "Ratio between first: {}".format(ratio_between_first) print "Ratio random: {}".format(ratio_random) G = nx.Graph() G.add_edges_from(edges) # construct connected(!) core core_exponent = 0.4 core_vertices, fringe_vertices = [], [] # one-pass split by condition for v in vertices: core_vertices.append(v) if degrees[v] >= n ** core_exponent else fringe_vertices.append(v) # add vertices to ensure connectivity of core fringe_vertices.sort(key=lambda v: -degrees[v]) while not nx.is_connected(G.subgraph(core_vertices)): core_vertices.append(fringe_vertices.pop(0)) print "Core size: {}".format(len(core_vertices)) G_core = G.subgraph(core_vertices) print "Is core connected:", nx.is_connected(G_core) # loss_function = MSE(binary_edges=True) loss_function = LogLoss(binary_edges=True) optimizer = SGD(n_epoch=n_epoch, learning_rate=learning_rate, verbose=not silent) FRINGE_FRACTION = 0.1 max_fringe_size = int(G.number_of_nodes() * FRINGE_FRACTION) curr_graph = G.subgraph(core_vertices) curr_core_vertices = set(core_vertices) curr_embedding_model = PoincareModel(curr_graph, fit_radius=False) curr_pair_generator = BinaryPairGenerator(curr_graph, batch_size=1) optimizer.optimize_embedding(curr_embedding_model, loss_function, curr_pair_generator) for i in range(int(1 / FRINGE_FRACTION) + 1): total_fringe = fringe(G, curr_core_vertices) # print "DEBUG:", curr_graph. number_of_nodes(), len(curr_core_vertices), len(total_fringe) fringe_vertices = set(sorted(total_fringe, key=lambda v: -G.degree(v))[:max_fringe_size]) # print "DEBUG:", i+1, fringe_vertices if not fringe_vertices: break curr_graph = G.subgraph(curr_core_vertices | fringe_vertices) curr_embedding_model = PoincareModel(curr_graph, fit_radius=False, init_embedding=curr_embedding_model) curr_pair_generator = BinaryPairGenerator(curr_graph, batch_size=1) optimizer.optimize_embedding( curr_embedding_model, loss_function, curr_pair_generator, fixed_vertices=curr_core_vertices ) curr_core_vertices |= fringe_vertices embedding_model = curr_embedding_model """ core_embedding_model = PoincareModel(G_core, fit_radius=False) core_pair_generator = BinaryPairGenerator(G_core, batch_size=1) optimizer.optimize_embedding(core_embedding_model, loss_function, core_pair_generator) #optimizer = SGD(n_epoch=n_epoch, learning_rate=learning_rate, verbose=not silent) embedding_model = PoincareModel(G, fit_radius=False, init_embedding=core_embedding_model) pair_generator = BinaryPairGenerator(G, batch_size=1) optimizer.optimize_embedding(embedding_model, loss_function, pair_generator, fixed_vertices=core_vertices) #print "Radius before: {}".format(embedding_model.embedding['radius']) #print "Radius after: {}".format(embedding_model.embedding['radius']) """ return (embedding_model.embedding["vertices"], {"core": list(G.edges())}) else: print "Check gradient: ", check_grad(q, grad_q, x0) res = minimize(q, x0, method="BFGS", jac=grad_q) # print res x = res.x retval = {} for i in range(len(vertices)): r = x[2 * i] phi = x[2 * i + 1] retval[vertices[i]] = (r, phi) return retval else: raise Exception("unknown mode")
def find_embeddings(vertices, edges, mode, learning_rate=0.1, n_epoch=100, ratio_to_second=2., ratio_between_first=1., ratio_random=1., silent=False): "find (r, phi) for each vertex" vertices = list(vertices) n = len(vertices) R = 2 * np.log(n) print "mode: {}".format(mode) np.random.seed(0) degrees = defaultdict(int) print "count degrees" for v1, v2 in edges: degrees[v1] += 1 degrees[v2] += 1 if mode == 'random': # phi=rand(0, 2pi), r = rand(0,R) return { v: (np.random.uniform(0.0, R), np.random.uniform(0.0, 2 * np.pi)) for v in vertices } elif mode == 'degrees': # phi=rand(0,2pi), r = 2log(n/k) return { v: (2 * np.log(n / degrees[v]), np.random.uniform(0.0, 2 * np.pi)) for v in vertices } elif mode.startswith('fit'): x0 = [] for (r, phi) in zip([2 * np.log(n / degrees[v]) for v in vertices], [np.random.uniform(0.0, 2 * np.pi) for v in vertices]): x0.append(r) x0.append(phi) x0 = np.array(x0) nedges = set() all_nedges = set() for (v1, v2) in combinations(vertices, 2): #if (v1, v2) not in edges and (v2, v1) not in edges: e = make_edge(v1, v2) if e not in edges: all_nedges.add(e) if mode == 'fit_random': a = list(all_nedges) random.shuffle(a) nedges = set(a[:len(edges)]) elif mode == 'fit_degrees': K = float(ratio_to_second) # ratio of nedges to second neighbour L = float(ratio_between_first ) # ratio of nedges between first neighbours M = float(ratio_random) # ratio of random nedges #free_nedges = all_nedges.copy() G = nx.Graph() G.add_edges_from(edges) srt_vertices = sorted(degrees.keys(), key=lambda v: -degrees[v]) shuf_vertices = srt_vertices[:] random.shuffle(shuf_vertices) for v in srt_vertices: # get first neighbours first_neigh = set(G.neighbors(v)) # get second neighbours second_neigh = set() for neigh in first_neigh: second_neigh.update(G.neighbors(neigh)) second_neigh.remove(v) n_vertex_nedges = 0 # from v to second neighbours for i, sec_n in enumerate(second_neigh): #print "i: {}".format(i) if i + 1 > degrees[v] * K: continue e = make_edge(v, sec_n) if e not in nedges: nedges.add(e) n_vertex_nedges += 1 # between first neighbours for j, pair in enumerate(combinations(first_neigh, 2)): #print "j: {}".format(j) if j + 1 > degrees[v] * L: continue v1, v2 = pair e = make_edge(v1, v2) if e not in nedges: nedges.add(e) # random edges max_n_random_vertices = int(degrees[v] * M) n_random_vertices = 0 for rand_v in shuf_vertices: if n_random_vertices >= max_n_random_vertices: break e = make_edge(v, rand_v) if e not in nedges and e not in edges: nedges.add(e) n_random_vertices += 1 else: nedges = all_nedges.copy() print "number of nedges={}".format(len(nedges)) q = Q(vertices, edges, nedges) grad_q = GradQ(vertices, edges, nedges) if mode == 'fit_degrees_sgd': print "Learning rate: {}".format(learning_rate) print "Ratio to second: {}".format(ratio_to_second) print "Ratio between first: {}".format(ratio_between_first) print "Ratio random: {}".format(ratio_random) G = nx.Graph() G.add_edges_from(edges) # construct connected(!) core core_exponent = 0.4 core_vertices, fringe_vertices = [], [] # one-pass split by condition for v in vertices: core_vertices.append(v) if degrees[ v] >= n**core_exponent else fringe_vertices.append(v) # add vertices to ensure connectivity of core fringe_vertices.sort(key=lambda v: -degrees[v]) while not nx.is_connected(G.subgraph(core_vertices)): core_vertices.append(fringe_vertices.pop(0)) print "Core size: {}".format(len(core_vertices)) G_core = G.subgraph(core_vertices) print "Is core connected:", nx.is_connected(G_core) #loss_function = MSE(binary_edges=True) loss_function = LogLoss(binary_edges=True) optimizer = SGD(n_epoch=n_epoch, learning_rate=learning_rate, verbose=not silent) FRINGE_FRACTION = 0.1 max_fringe_size = int(G.number_of_nodes() * FRINGE_FRACTION) curr_graph = G.subgraph(core_vertices) curr_core_vertices = set(core_vertices) curr_embedding_model = PoincareModel(curr_graph, fit_radius=False) curr_pair_generator = BinaryPairGenerator(curr_graph, batch_size=1) optimizer.optimize_embedding(curr_embedding_model, loss_function, curr_pair_generator) for i in range(int(1 / FRINGE_FRACTION) + 1): total_fringe = fringe(G, curr_core_vertices) #print "DEBUG:", curr_graph. number_of_nodes(), len(curr_core_vertices), len(total_fringe) fringe_vertices = set( sorted(total_fringe, key=lambda v: -G.degree(v))[:max_fringe_size]) #print "DEBUG:", i+1, fringe_vertices if not fringe_vertices: break curr_graph = G.subgraph(curr_core_vertices | fringe_vertices) curr_embedding_model = PoincareModel( curr_graph, fit_radius=False, init_embedding=curr_embedding_model) curr_pair_generator = BinaryPairGenerator(curr_graph, batch_size=1) optimizer.optimize_embedding(curr_embedding_model, loss_function, curr_pair_generator, fixed_vertices=curr_core_vertices) curr_core_vertices |= fringe_vertices embedding_model = curr_embedding_model ''' core_embedding_model = PoincareModel(G_core, fit_radius=False) core_pair_generator = BinaryPairGenerator(G_core, batch_size=1) optimizer.optimize_embedding(core_embedding_model, loss_function, core_pair_generator) #optimizer = SGD(n_epoch=n_epoch, learning_rate=learning_rate, verbose=not silent) embedding_model = PoincareModel(G, fit_radius=False, init_embedding=core_embedding_model) pair_generator = BinaryPairGenerator(G, batch_size=1) optimizer.optimize_embedding(embedding_model, loss_function, pair_generator, fixed_vertices=core_vertices) #print "Radius before: {}".format(embedding_model.embedding['radius']) #print "Radius after: {}".format(embedding_model.embedding['radius']) ''' return (embedding_model.embedding['vertices'], { 'core': list(G.edges()) }) else: print "Check gradient: ", check_grad(q, grad_q, x0) res = minimize(q, x0, method='BFGS', jac=grad_q) #print res x = res.x retval = {} for i in range(len(vertices)): r = x[2 * i] phi = x[2 * i + 1] retval[vertices[i]] = (r, phi) return retval else: raise Exception('unknown mode')