def sum_of_shortest_paths(G, S): r"""Returns the difference between the sum of lengths of all pairs shortest paths in G and the one in G\S. The experiment metrics in [1]_ Parameters ---------- G: easygraph.Graph or easygraph.DiGraph S: list of int A list of nodes witch are structural hole spanners. Returns ------- differ_between_sum : int The difference between the sum of lengths of all pairs shortest paths in G and the one in G\S. C(G/S)-C(G) Examples -------- >>> G_t=eg.datasets.get_graph_blogcatalog() >>> S_t=eg.AP_Greedy(G_t, 10000) >>> diff = sum_of_shortest_paths(G_t, S_t) >>> print(diff) References ---------- .. [1] https://dl.acm.org/profile/81484650642 """ mat_G = eg.Floyd(G) sum_G = 0 inf_const_G = math.ceil((G.number_of_nodes() ** 3) / 3) for i in mat_G.values(): for j in i.values(): if math.isinf(j): j = inf_const_G sum_G += j G_S = G.copy() G_S.remove_nodes(S) mat_G_S = eg.Floyd(G_S) sum_G_S = 0 inf_const_G_S = math.ceil((G_S.number_of_nodes() ** 3) / 3) for i in mat_G_S.values(): for j in i.values(): if math.isinf(j): j = inf_const_G_S sum_G_S += j return sum_G_S - sum_G
def CombineNodes(records, G, label_dict, score_dict, node_dict, Next_label_dict, nodes, degrees, distance_dict): onerecord = dict() for node, label in label_dict.items(): if label in onerecord: onerecord[label].append(node) else: onerecord[label] = [node] records.append(onerecord) Gx = eg.Graph() label_dictx = dict() score_dictx = dict() node_dictx = dict() nodesx = [] cnt = 0 for record_label in onerecord: nodesx.append(cnt) label_dictx[cnt] = record_label score_dictx[record_label] = score_dict[record_label] node_dictx[record_label] = cnt cnt += 1 record_labels = list(onerecord.keys()) i = 0 edge = dict() adj = G.adj for i in range(0, len(record_labels)): edge[i] = dict() for j in range(0, len(record_labels)): if i == j: continue inodes = onerecord[record_labels[i]] jnodes = onerecord[record_labels[j]] for unode in inodes: for vnode in jnodes: if unode in adj and vnode in adj[unode]: if j not in edge[i]: edge[i][j] = 0 edge[i][j] += adj[unode][vnode].get("weight", 1) for unode in edge: for vnode, w in edge[unode].items(): if unode < vnode: Gx.add_edge(unode, vnode, weight=w) G = Gx label_dict = label_dictx score_dict = score_dictx node_dict = node_dictx Next_label_dict = label_dictx nodes = nodesx degrees = G.degree() distance_dict = eg.Floyd(G) return records, G, label_dict, score_dict, node_dict, Next_label_dict, nodes, degrees, distance_dict
def bounded_inverse_closeness_centrality(G, v, l): queue = [] queue.append(v) seen = set() seen.add(v) shortest_path = eg.Floyd(G) result = 0 while (len(queue) > 0): vertex = queue.pop(0) if shortest_path[v][vertex] == l + 1: break nodes = G.neighbors(node=vertex) for w in nodes: if w not in seen: queue.append(w) seen.add(w) result += shortest_path[v][w] return result / (len(G) - 1)
def kamada_kawai_layout(G, dist=None, pos=None, weight="weight", scale=1, center=None, dim=2): """Position nodes using Kamada-Kawai path-length cost-function. Parameters ---------- G : graph or list of nodes A position will be assigned to every node in G. dist : dict (default=None) A two-level dictionary of optimal distances between nodes, indexed by source and destination node. If None, the distance is computed using shortest_path_length(). pos : dict or None optional (default=None) Initial positions for nodes as a dictionary with node as keys and values as a coordinate list or tuple. If None, then use circular_layout() for dim >= 2 and a linear layout for dim == 1. weight : string or None optional (default='weight') The edge attribute that holds the numerical value used for the edge weight. If None, then all edge weights are 1. scale : number (default: 1) Scale factor for positions. center : array-like or None Coordinate pair around which to center the layout. dim : int Dimension of layout. Returns ------- pos : dict A dictionary of positions keyed by node Examples -------- >>> pos = eg.kamada_kawai_layout(G) """ nNodes = len(G) if nNodes == 0: return {} if dist is None: dist = dict(eg.Floyd(G)) dist_mtx = 1e6 * np.ones((nNodes, nNodes)) for row, nr in enumerate(G): if nr not in dist: continue rdist = dist[nr] for col, nc in enumerate(G): if nc not in rdist: continue dist_mtx[row][col] = rdist[nc] if pos is None: if dim >= 3: pos = eg.random_position(G, dim=dim) elif dim == 2: pos = eg.circular_position(G) else: pos = {n: pt for n, pt in zip(G, np.linspace(0, 1, len(G)))} pos_arr = np.array([pos[n] for n in G]) pos = _kamada_kawai_solve(dist_mtx, pos_arr, dim) if center is None: center = np.zeros(dim) else: center = np.asarray(center) if len(center) != dim: msg = "length of center coordinates must match dimension of layout" raise ValueError(msg) pos = eg.rescale_position(pos, scale=scale) + center return dict(zip(G, pos))
def HANP(G, m, delta, threshod=1, hier_open=0, combine_open=0): """Detect community by Hop attenuation & node preference algorithm Return the detected communities. But the result is random. Implement the basic HANP algorithm and give more freedom through the parameters, e.g., you can use threshod to set the condition for node updating. If network are known to be Hierarchical and overlapping communities, it's recommended to choose geodesic distance as the measure(instead of receiving the current hop scores from the neighborhood and carry out a subtraction) and When an equilibrium is reached, treat newly combined communities as a single node. For using Floyd to get the shortest distance, the time complexity is a little high. Parameters ---------- G : graph A easygraph graph m : float Used to calculate score, when m > 0, more preference is given to node with more neighbors; m < 0, less delta : float Hop attenuation threshod : float Between 0 and 1, only update node whose number of neighbors sharing the maximal label is less than the threshod. e.g., threshod == 1 means updating all nodes. hier_open : 1 means using geodesic distance as the score measure. 0 means not. combine_open : this option is valid only when hier_open = 1 1 means When an equilibrium is reached, treat newly combined communities as a single node. 0 means not. Returns ---------- communities : dictionary key: serial number of community , value: nodes in the community. Examples ---------- >>> HANP(G, ... m = 0.1, ... delta = 0.05, ... threshod = 1, ... hier_open = 0, ... combine_open = 0 ... ) References ---------- .. [1] Ian X. Y. Leung, Pan Hui, Pietro Liò, and Jon Crowcrof: Towards real-time community detection in large networks """ nodes = list(G.nodes.keys()) if len(nodes) == 1: return {1: [nodes[0]]} label_dict = dict() score_dict = dict() node_dict = dict() Next_label_dict = dict() cluster_community = dict() nodes = list(G.nodes.keys()) degrees = G.degree() records = [] loop_count = 0 i = 0 old_score = 1 ori_G = G if hier_open == 1: distance_dict = eg.Floyd(G) for node in nodes: label_dict[node] = i score_dict[i] = 1 node_dict[i] = node i = i + 1 while True: loop_count += 1 random.shuffle(nodes) score = 1 for node in nodes: labels = SelectLabels_HANP(G, node, label_dict, score_dict, degrees, m, threshod) if labels == []: Next_label_dict[node] = label_dict[node] continue old_label = label_dict[node] Next_label_dict[node] = random.choice(labels) # Asynchronous updates. If you want to use synchronous updates, comment the line below label_dict[node] = Next_label_dict[node] if hier_open == 1: score_dict[Next_label_dict[node]] = UpdateScore_Hier( G, node, label_dict, node_dict, distance_dict) score = min(score, score_dict[Next_label_dict[node]]) else: if old_label == Next_label_dict[node]: cdelta = 0 else: cdelta = delta score_dict[Next_label_dict[node]] = UpdateScore( G, node, label_dict, score_dict, cdelta) if hier_open == 1 and combine_open == 1: if old_score - score > 1 / 3: old_score = score ( records, G, label_dict, score_dict, node_dict, Next_label_dict, nodes, degrees, distance_dict, ) = CombineNodes( records, G, label_dict, score_dict, node_dict, Next_label_dict, nodes, degrees, distance_dict, ) label_dict = Next_label_dict if (estimate_stop_cond_HANP(G, label_dict, score_dict, degrees, m, threshod) is True): break """As mentioned in the paper, it's suggested that the number of iterations required is independent to the number of nodes and that after five iterations, 95% of their nodes are already accurately clustered """ if loop_count > 20: break print("After %d iterations, HANP complete." % loop_count) for node in label_dict.keys(): label = label_dict[node] if label not in cluster_community.keys(): cluster_community[label] = [node] else: cluster_community[label].append(node) if hier_open == 1 and combine_open == 1: records.append(cluster_community) cluster_community = ShowRecord(records) result_community = CheckConnectivity(ori_G, cluster_community) return result_community