def cluster_sp(): similaritymatrix = get_matrix("datasets/similaritymatrix.csv") graph = add_graph(similaritymatrix, 1000) dendo = get_dendrogram(graph) bestpartition = best_partition(graph) result = dictionaries_to_dataframe(bestpartition, dendo[0], 'SP ID', 'State', 'City') result.to_csv('datasets/result.csv', sep=',', index=False) cities_graph = community.induced_graph(dendo[0], graph).edges(data='weight') list_to_csv(cities_graph, 'cities_graph.csv') states_graph = community.induced_graph(bestpartition, graph).edges(data='weight') list_to_csv(states_graph, 'states_graph.csv')
def makeCommunityInducedGraph(graph, partition, weight=True): """ Get Community Induced Graph of the partition """ induced_subgraph = None if weight == True: induced_subgraph = community.induced_graph(partition, graph, weight='weight') else: induced_subgraph = community.induced_graph(partition, graph, weight=None) return induced_subgraph
def test_weight(self): """ Test that total edge weight does not change """ graph = nx.erdos_renyi_graph(50, 0.1) part = dict([]) for node in graph.nodes(): part[node] = node % 5 self.assertEqual(graph.size(weight='weight'), co.induced_graph(part, graph).size(weight='weight')) for e1, e2 in graph.edges_iter(): graph[e1][e2]["test_weight"] = 2. self.assertEqual(graph.size(weight='test_weight'), co.induced_graph(part, graph, "test_weight").size(weight='test_weight'))
def plot_community_graph(graph, partition, figsize=(8, 8), node_size=200, plot_overlaps=False, plot_labels=False, cmap=None, top_k=None, min_size=None): """ Plot a algorithms-graph with node color coding for communities. :param graph: NetworkX/igraph graph :param partition: NodeClustering object :param figsize: the figure size; it is a pair of float, default (8, 8) :param node_size: int, default 200 :param plot_overlaps: bool, default False. Flag to control if multiple algorithms memberships are plotted. :param plot_labels: bool, default False. Flag to control if node labels are plotted. :param cmap: str or Matplotlib colormap, Colormap(Matplotlib colormap) for mapping intensities of nodes. If set to None, original colormap is used.. :param top_k: int, Show the top K influential communities. If set to zero or negative value indicates all. :param min_size: int, Exclude communities below the specified minimum size. Example: >>> from cdlib import algorithms, viz >>> import networkx as nx >>> g = nx.karate_club_graph() >>> coms = algorithms.louvain(g) >>> viz.plot_community_graph(g, coms) """ cms = __filter(partition.communities, top_k, min_size) node_to_com = {} for cid, com in enumerate(cms): for node in com: if node not in node_to_com: node_to_com[node] = cid else: # duplicating overlapped node alias = "%s_%s" % (node, cid) node_to_com[alias] = cid edges = [(alias, y) for y in graph.neighbors(node)] graph.add_edges_from(edges) # handling partial coverage s = nx.subgraph(graph, node_to_com.keys()) # algorithms graph construction c_graph = induced_graph(node_to_com, s) node_cms = [[node] for node in c_graph.nodes()] return plot_network_clusters(c_graph, NodeClustering(node_cms, None, ""), nx.spring_layout(c_graph), figsize=figsize, node_size=node_size, plot_overlaps=plot_overlaps, plot_labels=plot_labels, cmap=cmap)
def external_ec_coarsening(graph, sfdp_path, coarsening_scheme = 2, c_type = 'original'): if c_type == 'louvain': print("Coarsening with Louvain") matrix = magicgraph.to_adjacency_matrix(graph) nx_graph = nx.from_scipy_sparse_matrix(matrix) dendro = community.generate_dendrogram(nx_graph) coarse_graphs = [DoubleWeightedDiGraph(graph)] merges = [] i = 0 for l in range(len(dendro)): level = community.partition_at_level(dendro, l) induced = community.induced_graph(level, nx_graph) filename = 'induced'+str(l)+'.edgelist' #nx.write_edgelist(induced, filename) # write weighted graph to file f = open(filename, 'w') for u, v, a in induced.edges.data('weight', default = 1): line = ' '.join([str(u), str(v), str(a)]) f.write(line + '\n') f.close() m_graph = magicgraph.load_weighted_edgelist(filename, undirected = True) coarse_graphs.append(DoubleWeightedDiGraph(m_graph)) merges.append(level) print('Level: ', i, 'N nodes: ', m_graph.number_of_nodes()) i+= 1 return coarse_graphs, merges elif c_type == 'original': return original_coarsening(graph, sfdp_path, coarsening_scheme)
def calc_louvain(adj_matrix, level=0, return_c_graph=False): nx_G = nx.from_numpy_matrix(adj_matrix) dendro = louvain.generate_dendrogram( nx_G, randomize=False) #Maybe set tandomize True if len(dendro) - level - 1 < 0: raise Exception("The given Level is too deep. The maximum is: " + str(len(dendro) - 1)) communities = louvain.partition_at_level(dendro, len(dendro) - level - 1) number_communities = max(communities, key=lambda x: communities[x]) + 1 # Maybe unnecessary after some code rework and unification community_list = [] for i in range(number_communities): grp_list = [] for grp in communities: if communities[grp] == i: grp_list.append(grp) else: if grp_list: community_list.append(grp_list) community_level_G = louvain.induced_graph(communities, nx_G) if return_c_graph: c_level_graph = nx.adjacency_matrix(community_level_G) else: c_level_graph = None return community_list, c_level_graph
def test_weight(self): """ Test that total edge weight does not change """ graph = nx.erdos_renyi_graph(50, 0.1) part = dict([]) for node in graph.nodes(): part[node] = node % 5 self.assertEqual(graph.size(weight='weight'), co.induced_graph(part, graph).size(weight='weight')) for src, dst in graph.edges_iter(): graph[src][dst]["test_weight"] = 2. induced = co.induced_graph(part, graph, "test_weight") self.assertEqual(graph.size(weight='test_weight'), induced.size(weight='test_weight'))
def test_nodes(self) : """ Test that result nodes are the communities """ g = nx.erdos_renyi_graph(50, 0.1) part = dict([]) for node in g.nodes() : part[node] = node % 5 self.assertSetEqual(set(part.values()), set(co.induced_graph(part, g).nodes()))
def test_weight(self) : """ Test that total edge weight does not change """ g = nx.erdos_renyi_graph(50, 0.1) part = dict([]) for node in g.nodes() : part[node] = node % 5 self.assertEqual(g.size(weight = 'weight'), co.induced_graph(part, g).size(weight = 'weight'))
def test_weight(self): """ Test that total edge weight does not change """ graph = nx.erdos_renyi_graph(50, 0.1) part = dict([]) for node in graph.nodes(): part[node] = node % 5 self.assertEqual(graph.size(weight='weight'), co.induced_graph(part, graph).size(weight='weight')) for e1, e2 in graph.edges(): graph[e1][e2]["test_weight"] = 2. self.assertEqual( graph.size(weight='test_weight'), co.induced_graph(part, graph, "test_weight").size(weight='test_weight'))
def ppl(diary): # make people-event DataFrame ppl_evt = diary[['Event', 'Participants']].dropna(subset=['Participants']) evtXie = pd.DataFrame({ "Event": ppl_evt['Event'].unique(), "Participants": "謝蘭生" }) ppl_evt = ppl_evt.append(evtXie, ignore_index=True) # make edgelist from people-event DF el = ppl_evt.merge(ppl_evt, on="Event") el = el.drop("Event", axis=1).rename(columns={ "Participants_x": "Source", "Participants_y": "Target" }) el = el.query("Source<Target") el["Weight"] = 1 # Calculate Weight of edges with Groupby edgelist = el.groupby(["Source", "Target"]).sum().reset_index() # export edges into a graphml file G = nx.from_pandas_dataframe(edgelist, source="Source", target="Target", edge_attr="Weight") nx.set_node_attributes(G, "k-core", nx.core_number(G)) communityDict = community.best_partition(G) nx.set_node_attributes(G, "community", communityDict) nx.set_node_attributes( G, "betweenness", nx.betweenness_centrality(G, 850, normalized=True, weight="Weight")) nx.write_graphml(G, "Graph/Network/ppl.graphml", encoding="utf-8") # export a graph of the relationship between communities G_commun = community.induced_graph(communityDict, G) nx.write_graphml(G_commun, "Graph/Network/pplCommunity.graphml", encoding="utf-8") # export nodes with attributes into a csv idx, attr = zip(*G.nodes(data=True)) core = [d['k-core'] for d in attr] commun = [d['community'] for d in attr] betwn = [d['betweenness'] for d in attr] nodes_attr = pd.DataFrame( { 'k-core': core, 'community': commun, 'betweenness centrality': betwn }, index=idx) nodes_attr.to_csv("csv/pplCoreCommunity.csv", encoding='utf-8') return
def test_unique(self): """ Test that the induced graph is the same when all nodes are alone """ graph = nx.erdos_renyi_graph(50, 0.1) part = dict([]) for node in graph.nodes(): part[node] = node ind = co.induced_graph(part, graph) self.assertTrue(nx.is_isomorphic(graph, ind))
def test_weight(self): """ Test that total edge weight does not change """ g = nx.erdos_renyi_graph(50, 0.1) part = dict([]) for node in g.nodes(): part[node] = node % 5 self.assertEqual(g.size(weight='weight'), co.induced_graph(part, g).size(weight='weight'))
def test_uniq(self) : """ Test that the induced graph is the same when all nodes are alone """ g = nx.erdos_renyi_graph(50, 0.1) part = dict([]) for node in g.nodes() : part[node] = node ind = co.induced_graph(part, g) self.assert_(nx.is_isomorphic(g, ind))
def test_nodes(self): """ Test that result nodes are the communities """ graph = nx.erdos_renyi_graph(50, 0.1) part = dict([]) for node in graph.nodes(): part[node] = node % 5 self.assertSetEqual(set(part.values()), set(co.induced_graph(part, graph).nodes()))
def plot_community_graph(graph, partition, figsize=(8, 8), node_size=200, plot_overlaps=False, plot_labels=False): """ Plot a algorithms-graph with node color coding for communities. :param graph: NetworkX/igraph graph :param partition: NodeClustering object :param figsize: the figure size; it is a pair of float, default (8, 8) :param node_size: int, default 200 :param plot_overlaps: bool, default False. Flag to control if multiple algorithms memberships are plotted. :param plot_labels: bool, default False. Flag to control if node labels are plotted. Example: >>> from cdlib import algorithms, viz >>> import networkx as nx >>> g = nx.karate_club_graph() >>> coms = algorithms.louvain(g) >>> viz.plot_community_graph(g, coms) """ cms = partition.communities node_to_com = {} for cid, com in enumerate(cms): for node in com: if node not in node_to_com: node_to_com[node] = cid else: # duplicating overlapped node alias = "%s_%s" % (node, cid) node_to_com[alias] = cid edges = [(alias, y) for y in graph.neighbors(node)] graph.add_edges_from(edges) # handling partial coverage s = nx.subgraph(graph, node_to_com.keys()) # algorithms graph construction c_graph = induced_graph(node_to_com, s) node_cms = [[node] for node in c_graph.nodes()] return plot_network_clusters(c_graph, NodeClustering(node_cms, None, ""), nx.spring_layout(c_graph), figsize=figsize, node_size=node_size, plot_overlaps=plot_overlaps, plot_labels=plot_labels)
def setup(g, num_players, num_seeds): #first compute the best partition partition = community.best_partition(g) induced_graph = community.induced_graph(partition, g) # Play around with picking the "best" community # node boundary? #print nx.current_flow_closeness_centrality(induced_graph) # not better # print nx.katz_centrality(induced_graph) # doesn't converge #print nx.eigenvector_centrality(induced_graph) # not as good #print nx.communicability_centrality(induced_graph) # not as good #{0: 8.451771641899612, 1: 9.041654401534407, 2: 9.321830560246685, 3: 8.79634625159723, 4: 7.512000387517644, 5: 9.319261339431147, 6: 8.635502364748598, 7: 9.182167514276696, 8: 8.812816793986622, 9: 5.955242238035001, 10: 7.224124906314186, 11: 8.598864555204745, 12: 1.3780813983087927, 13: 8.574141188778002, 14: 1.4894068385674029} #{0: 0.03170498456257798, 1: 0.03351885293616147, 2: 0.982004394865475, 3: 0.009750044520081363, 4: 0.012642119637055598, 5: 0.08211419419246402, 6: 0.013202397926046897, 7: 0.15814666928657686, 8: 0.026268239793024895, 9: 0.0005523351650465954, 10: 0.0009839216844465231, 11: 0.019821817113884598, 12: 4.399697547690089e-05, 13: 0.016495461620553098, 14: 0.00022120782918811697} #{0: 1670.2226290285078, 1: 3648.298186716118, 2: 4153.05229512053, 3: 3214.282455755265, 4: 561.0349179323383, 5: 4068.320908838754, 6: 2977.2760610270666, 7: 3474.488922208751, 8: 3493.8811964338947, 9: 1521.5720949300896, 10: 2520.2823105797784, 11: 1385.0884502097147, 12: 281.6674672972596, 13: 2306.8136315883607, 14: 358.98207498678886} # viewer.draw_graph(induced_graph) # try: # plt.show() # except: # plt.hide() # Choose the community with the most number of outgoing edges #weights = nx.communicability_centrality(induced_graph) #weight='weight' weights = nx.degree(induced_graph, weight='weight') #print weights best_com = max(weights, key=weights.__getitem__) com = defaultdict(list) for node, c in partition.iteritems(): com[c].append(node) selected_comm = g.subgraph(com[best_com]) # get one node from every clique #print selected_comm.number_of_nodes() # max_size_clique = nx.graph_clique_number(selected_comm) # print max_size_clique # lst = [] # for cl in nx.find_cliques(selected_comm): # if len(cl) >= max_size_clique/2: # lst.append(r.choice(cl)) # #return cl # #print len(cl), cl # return lst #print nx.find_cliques(selected_comm) #setup = largest_clique.setup(selected_comm, num_players, num_seeds) #return setup return strat.setup(selected_comm, num_players, num_seeds)
def test_clique(self): """ Test that a complet graph of size 2*n has the right behavior when split in two """ n = 5 g = nx.complete_graph(2*n) part = dict([]) for node in g.nodes() : part[node] = node % 2 ind = co.induced_graph(part, g) goal = nx.Graph() goal.add_weighted_edges_from([(0,1,n*n),(0,0,n*(n-1)/2), (1, 1, n*(n-1)/2)]) self.assert_(nx.is_isomorphic(ind, goal))
def test_clique(self): """ Test that a complet graph of size 2*n has the right behavior when split in two """ n = 5 g = nx.complete_graph(2 * n) part = dict([]) for node in g.nodes(): part[node] = node % 2 ind = co.induced_graph(part, g) goal = nx.Graph() goal.add_weighted_edges_from([(0, 1, n * n), (0, 0, n * (n - 1) / 2), (1, 1, n * (n - 1) / 2)]) self.assert_(nx.is_isomorphic(ind, goal))
def ppl_plc(diary): # make people-place DataFrame by merging on event evt_plc = diary[['Event', 'Place']].dropna() evt_ppl = diary[['Event', 'Participants']] Xie = pd.DataFrame({'Event': diary['Event'], 'Participants': '謝蘭生'}) evt_ppl = evt_ppl.append(Xie, ignore_index=True) ppl_plc = pd.merge(evt_plc, evt_ppl, how='left').dropna().drop('Event', axis=1) ppl_plc['Weight'] = 1 # make edgelist edges = ppl_plc.groupby(['Place', 'Participants']).sum().reset_index() ppl_dict = pd.Series('People', index=edges['Participants'].unique()).to_dict() plc_dict = pd.Series('Place', index=edges['Place'].unique()).to_dict() type_dict = {**ppl_dict, **plc_dict} edges.rename(columns={ 'Participants': 'Source', 'Place': 'Target' }, inplace=True) # make the people-place network G = nx.from_pandas_dataframe(edges, source="Source", target="Target", edge_attr="Weight") nx.set_node_attributes(G, 'Type', type_dict) nx.set_node_attributes(G, "k-core", nx.core_number(G)) communityDict = community.best_partition(G) nx.set_node_attributes(G, "community", communityDict) nx.write_graphml(G, "Graph/Network/ppl_plc.graphml", encoding="utf-8") # make community network G_commun = community.induced_graph(communityDict, G) nx.write_graphml(G_commun, "Graph/Network/ppl_plc_Community.graphml", encoding="utf-8") # export nodes with attributes into a csv idx, attr = zip(*G.nodes(data=True)) core = [d['k-core'] for d in attr] commun = [d['community'] for d in attr] nodes_attr = pd.DataFrame({'k-core': core, 'community': commun}, index=idx) nodes_attr.to_csv("csv/ppl_plc_CoreCommunity.csv", encoding='utf-8') return
def test_clique(self): """ Test that a complete graph of size 2*graph_size has the right behavior when split in two """ graph_size = 5 graph = nx.complete_graph(2 * graph_size) part = dict([]) for node in graph.nodes(): part[node] = node % 2 ind = co.induced_graph(part, graph) goal = nx.Graph() edges = [(0, 1, graph_size**2), (0, 0, graph_size * (graph_size - 1) / 2), (1, 1, graph_size * (graph_size - 1) / 2)] goal.add_weighted_edges_from(edges) self.assertTrue(nx.is_isomorphic(ind, goal))
def test_clique(self): """ Test that a complete graph of size 2*graph_size has the right behavior when split in two """ graph_size = 5 graph = nx.complete_graph(2 * graph_size) part = dict([]) for node in graph.nodes(): part[node] = node % 2 ind = co.induced_graph(part, graph) goal = nx.Graph() edges = [(0, 1, graph_size ** 2), (0, 0, graph_size * (graph_size - 1) / 2), (1, 1, graph_size * (graph_size - 1) / 2)] goal.add_weighted_edges_from(edges) self.assertTrue(nx.is_isomorphic(ind, goal))
def calc_louvain(adj_matrix, level=0, return_c_graph=False): nx_G = nx.from_numpy_array(adj_matrix) dendro = louvain.generate_dendrogram( nx_G, randomize=False, random_state=0) #Maybe set randomize True #print(dendro) #asdasd level = len(dendro) - level - 1 if level < 0: raise Exception("The given Level is too deep. The maximum is: " + str(len(dendro) - 1)) communities = louvain.partition_at_level(dendro, level) number_communities = max(communities, key=lambda x: communities[x]) + 1 # Maybe unnecessary after some code rework and unification community_list = [] for i in range(number_communities): grp_list = [] for grp in communities: if communities[grp] == i: grp_list.append(grp) else: if grp_list: community_list.append(grp_list) community_level_G = louvain.induced_graph(communities, nx_G) if return_c_graph: c_level_graph = nx.adjacency_matrix(community_level_G) else: c_level_graph = None inv_dendro = [] for dct in dendro: inv_dct = {} for k, v in dct.items(): inv_dct.setdefault(v, []).append(k) inv_dendro.append(inv_dct) return community_list, c_level_graph, dendro, inv_dendro
def drawInducedGraph(g, subgraph, partition, algorithm, comm_to_plot, community_d): # colors comm_to_color_zip = zip(COLORS, comm_to_plot) comm_to_color = [color for color, com in comm_to_color_zip] fig, ax = plt.subplots() # induced graph scoring score = community.modularity(partition, subgraph) print "Induced Community Score: ", score # draw induced graph new_er_partition = {node:comm for node, comm in partition.items() if comm in comm_to_plot} comm_graph = community.induced_graph(new_er_partition, subgraph) new_partition = {comm:comm for comm in comm_graph.nodes()} new_pos = community_layout(comm_graph, new_partition, comm_scale=2000,node_scale=50) weights = [comm_graph[u][v]['weight']/15.0 for u,v in comm_graph.edges()] # node_degs = [val*35 for node,val in nx.degree(comm_graph)] community_self_loops = [community_d[comm] for comm in comm_graph.nodes()] nx.draw(comm_graph, new_pos, cmap=plt.get_cmap('jet'), with_labels=False, arrows=True, node_color=comm_to_color,node_size=community_self_loops,width=weights) plt.title("Supernodes from Community Detection Using " + algorithm + " Algorithm \n Modularity Score: " + str(score)) plt.savefig('new_plots/induced_community_' + algorithm + '_' + LAYOUT_TYPE + '.png')
def induced_graph(original_graph, partition, induced_graph=None, rescale_node_size=1., draw=True, cmap=None, words_map_inv=None, pos=None, betweenness_scaled=None): """ Returns the graph induced from the community partition of the graph """ if (induced_graph == None): induced_graph = community.induced_graph(partition, original_graph, weight="weight") if (draw and cmap): if (pos == None): pos = nx.spring_layout(induced_graph) w = induced_graph.degree(weight="weight") sizes = [w[node] * rescale_node_size for node in induced_graph.nodes()] nx.draw(induced_graph, pos=pos, node_size=sizes, node_color=[cmap[n] for n in induced_graph.nodes()]) labels = {} for com in induced_graph.nodes(): rep = max([ nodes for nodes in partition.keys() if partition[nodes] == com ], key=lambda n: original_graph.degree(n, weight="weight")) labels[com] = words_map_inv[rep] nx.draw_networkx_labels(induced_graph, pos, labels, font_size=16) return induced_graph
def draw_graph(self, output_file=None, npmi_threshhold = .7): """Draw a graph of used-with connections between packages @param output_file: file to save graph to; if None, then show graph """ G1 = self.G.copy() for edge in self.G.edges_iter(data=True): if edge[2]["weight"] < npmi_threshhold: G1.remove_edge(edge[0], edge[1]) G1 = nx.subgraph(G1, [node for node in G1.nodes() if nx.degree(G1)[node] > 1]) partition = community.best_partition(G1) partpos = nx.spring_layout(community.induced_graph(partition, G1), iterations=100) forced_partpos = { n : partpos[partition[n]] for n in G1.nodes() } print "between" pos=nx.spring_layout(G1,pos =forced_partpos,iterations=200) plt.clf() plt.figure(figsize=(36,36)) plt.axis("off") plt.title('usedwith') labels = { node : self.names[node] for node in G1.nodes() } nx.draw_networkx_edges(G1,pos, edge_color="#cccccc") nx.draw_networkx_nodes(G1,pos,node_size=50, node_color=[hashColor(partition[n]) for n in G1.nodes()]) nx.draw_networkx_labels(G1,pos,labels=labels) if output_file is None: plt.show() else: plt.savefig(output_file, bbox_inches='tight') import pdb pdb.set_trace()
def network_plots(l, partition, out, temp): # Projection colored and sized by communitu pos = community_layout(l, partition) min_comm_size = get_top_comm_len(partition, 3) print('MIN COMM SIZE: ' + str(min_comm_size)) c = _color_nodes(l, partition, min_comm_size) s = _size_nodes(l, partition, 3) nx.draw(l, pos, node_color=c, node_size=s, width=0.3, alpha=0.7) plt.savefig(out + '02_comms_projection.png') plt.close() # Induced network of communities ind = induced_graph(partition, l) node_size = [] for comm_node in ind.nodes: size = temp[temp['community'] == comm_node]['nodecount'].values[0] if size == 1: node_size.append(0) else: node_size.append(np.exp(size)) nx.draw(ind, node_size=node_size, node_color='black', alpha=0.7, width=0.5) plt.savefig(out + 'induced_projection.png') plt.close() pos = nx.spring_layout(l, k=0.50) plt.axis('off') nx.draw_networkx(l, node_size=7, with_labels=False, node_color=c, edge_color='black', width=0.3, alpha=0.7, pos=pos) plt.savefig(out + 'projection.png') plt.close()
def induced_graph_viz(l, partition, partition_df, name): partition_df = partition_df.reset_index() partition_df = partition_df.groupby([0]).count() print(partition_df.head()) partition_df = partition_df.reset_index() print(partition_df.head()) partition_df.columns = ['community', 'nodecount'] # Induced network of communities ind = induced_graph(partition, l) node_size = [] for comm_node in ind.nodes: size = partition_df[partition_df['community'] == comm_node]['nodecount'].values[0] if size == 1: node_size.append(0) else: node_size.append(size**2) plt.figure(figsize=(14, 16)) nx.draw(ind, node_size=node_size, alpha=0.7) plt.savefig(name + '_induced_projection.png') plt.close() return ind
def induce_graph_by_communities(graph: nx.Graph, communities: Dict[Any, int], weight_attribute: str = 'weight') -> nx.Graph: """ Creates a community graph with nodes from the communities dictionary and using the edges of the original graph to form edges between communities. Weights are aggregated; you may need to normalize the resulting graph after calling this function. Note: logs a warning if the size of the community dictionary is less than the size of the provided graph's vertexset. :param networkx.Graph graph: The original graph that contains the edges that will be used to formulate a new induced community graph :param communities: The communities dictionary provides a mapping of original vertex ID to new community ID. :type communities: dict[Any, int] :param str weight_attribute: The weight attribute on the original graph's edges to use when aggregating the weights of the induced community graph. Default is `weight`. :return: The induced community graph. :rtype: networkx.Graph :raises ValueError: If the graph is None :raises ValueError: If the communities dictionary is None """ logger = logging.getLogger(__name__) if graph is None: raise ValueError("graph cannot be None") if communities is None: raise ValueError("communities cannot be None") if len(communities) < len(graph.nodes()): logger.warning( f"Length of communities provided ({len(communities)}) is less than the " + f"total number of nodes in the graph ({len(graph.nodes())})") return community.induced_graph(communities, graph, weight_attribute)
def augment_graph_data(data, max_groups): total_nodes = len(data['nodes']) #lowering the necessary node count #since in some cases node count is greatly reduced after processing # first author kurtz,m goes from ~60 to 19 for instance if total_nodes < 15: #just get rid of the sets for i, l in enumerate(data["links"]): data["links"][i]["overlap"] = list(l["overlap"]) return {"fullGraph": data} #create the networkx graph G = nx.Graph() for i, x in enumerate(data['nodes']): G.add_node(i, node_name=x["nodeName"], nodeWeight=x["nodeWeight"], title=x["title"], citation_count=x["citation_count"], first_author=x["first_author"], read_count=x["read_count"]) for i, x in enumerate(data['links']): G.add_edge(x["source"], x["target"], weight=x["value"], overlap=list(x["overlap"])) all_nodes = G.nodes() #partition is a dictionary with group names as keys # and individual node indexes as values partition = community.best_partition(G) for g in G.nodes(): G.node[g]["group"] = partition[g] #with new group info, create the summary group graph summary_graph = community.induced_graph(partition, G) #title container titles = {} #enhance the information that will be in the json handed off to d3 for x in summary_graph.nodes(): summary_graph.node[x]["total_citations"] = sum([ G.node[paper].get("citation_count", 0) for paper in G.nodes() if G.node[paper]["group"] == x ]) summary_graph.node[x]["total_reads"] = sum([ G.node[paper].get("read_count", 0) for paper in G.nodes() if G.node[paper]["group"] == x ]) papers = sorted([ G.node[paper] for paper in G.nodes() if G.node[paper]["group"] == x ], key=lambda x: x.get("nodeWeight", 0), reverse=True) titles[x] = [p["title"] for p in papers] summary_graph.node[x]["paper_count"] = len(papers) #attaching title 'word clouds' to the nodes significant_words = tf_idf.get_tf_idf_vals(titles) for x in summary_graph.nodes(): #remove the ones with only 1 paper if summary_graph.node[x]["paper_count"] == 1: summary_graph.remove_node(x) else: #otherwise, give them a title #how many words should we show on the group? max 6, otherwise 1 per every 2 papers summary_graph.node[x]["node_label"] = dict( sorted(significant_words[x].items(), key=lambda x: x[1], reverse=True)[:6]) #remove all but top n groups from summary graph #where top n is measured by total citations from a group top_nodes = sorted([n for n in summary_graph.nodes(data=True)], key=lambda x: x[1]["total_citations"], reverse=True)[:max_groups] top_nodes = [t for t in top_nodes if t >= 1] top_node_ids = [n[0] for n in top_nodes] for group_id in summary_graph.nodes(): if group_id not in top_node_ids: summary_graph.remove_node(group_id) #remove nodes from full graph that aren't in top group #this automatically takes care of edges, too for node in G.nodes(data=True): if node[1]["group"] not in top_node_ids: G.remove_node(node[0]) #continuing to enhance the information: add to group info about the most common co-references for x in summary_graph.nodes(): #make a float so division later to get a percent makes sense num_papers = float(summary_graph.node[x]["paper_count"]) references = {} #find all members of group x indexes = [ paperIndex for paperIndex in G.nodes() if G.node[paperIndex]["group"] == x ] for edge in G.edges(data=True): #if it passes, it's an inter-group connection # [0] is source, [1] is target, [2] is data dict paper_one = edge[0] paper_two = edge[1] if paper_one in indexes and paper_two in indexes: for bib in edge[2]["overlap"]: if bib in references: references[bib].update([paper_one, paper_two]) else: references[bib] = set([paper_one, paper_two]) count_references = sorted(references.items(), key=lambda x: len(x[1]), reverse=True)[:5] top_common_references = [ (tup[0], float("{0:.2f}".format(len(tup[1]) / num_papers))) for tup in count_references ] top_common_references = dict(top_common_references) summary_graph.node[x]["top_common_references"] = top_common_references summary_json = json_graph.node_link_data(summary_graph) # giving groups node_names based on size of groups for i, n in enumerate( sorted(summary_json["nodes"], key=lambda x: x["paper_count"], reverse=True)): for possible_real_index, node in enumerate(summary_json["nodes"]): if node == n: real_index = possible_real_index summary_json["nodes"][real_index]["node_name"] = i + 1 for i, n in enumerate(summary_json["nodes"]): #cache this so graph manipulation later is easier summary_json["nodes"][i]["stable_index"] = i #find the node final_data = { "summaryGraph": summary_json, "fullGraph": json_graph.node_link_data(G) } return final_data
#Create network layout for visualizations spring_pos = nx.spring_layout(G_karate) plt.axis("off") nx.draw_networkx(G_karate, pos = spring_pos, with_labels = False, node_size = 80) ############### Community detection ########### import community as com parts = com.best_partition(G_karate) values = [parts.get(node) for node in G_karate.nodes()] plt.axis("off") nx.draw_networkx(G_karate, pos = spring_pos, cmap = plt.get_cmap("jet"), node_color = values, font_size=20,node_size = 80, with_labels = False) ## Calculate the modularity ## com.modularity(parts, G_karate) ## induced graph : each community is represented as one node ## help(com) G_induced=com.induced_graph(parts, G_karate) plt.axis("off") nx.draw_networkx(G_induced, cmap = plt.get_cmap("jet"), font_size=20,node_size = 80, with_labels = False)
import pubmed from utils import * import community import networkx as net import matplotlib.pyplot as plot #articles = pubmed.get_articles('compu') #open('articles.json','wb').write(json.dumps(articles)) articles = json.loads(open('articles.json','rb').read()) #articles2=[a for a in articles if parser.parse(a['DP'].replace('-',' ').split(' ')[0])>datetime.datetime(2000,1,1)] aunet=pubmed.make_author_network(articles2) ## removes single links; now will separate into research groups. components=net.connected_component_subgraphs(trim_edges(aunet,2)) ## separate the rest into communities. Plot the overall structure, individual clusters, macrostructure community.plot_community(components[0], filename='images/largest_community.pdf') subgraphs=community.plot_partitions(components[0],filename='images/community') ind=community.induced_graph(community.best_partition(components[0]),components[0]) net.draw(ind) plot.savefig('images/macrostructure.pdf')
# Matrix2 = [[0 for x in range(size2)] for y in range(size2)] # # # for i in range(0, size2): # for j in range(0, size2): # if j == i or j < i: # continue # # for node in clusters[i]: # Matrix2[i][j] += helpers.num_edges(node, clusters[j], Matrix) # Matrix2[j][i] += helpers.num_edges(node, clusters[j], Matrix) # # Matrix3 = helpers.flip_edge_ratio(Matrix2, size2) # Matrix4 = helpers.edge_ratio(Matrix2, size2) G2 = community.induced_graph(labels, G) size2 = G2.number_of_nodes() Matrix2 = helpers.create_shortest_path_matrix(G2, size2, "weight") dFrame2 = DataFrame(Matrix2) dFrame2.to_csv("Matrix_Files/M2.csv") # save matrix to file for output to dipha dipha_utils.writeDistBin(Matrix2, infile2) #os.system("run_dipha.bat") # nx.draw(G) # plt.savefig("G.png") # nx.draw(G2) # plt.savefig("G2.png")
import networkx as nx import community as c import matplotlib.pyplot as plt # import the graph G = nx.read_graphml("test1.graphml") # find communities dendo = c.generate_dendrogram(G) for level in range(len(dendo) - 1): print("partition at level", level, "is", c.partition_at_level(dendo, level)) partition = c.best_partition(G) m = c.modularity(partition, G) print(m) agglomerate = c.induced_graph(partition, G) # draw the graph colors = ["blue", "green", "yellow", "violet", "black", "orange", "cyan", "red", "indigo", "pink"] plt.figure(1) size = float(len(set(partition.values()))) pos = nx.spring_layout(G) count = 0. for community in set(partition.values()): count = count + 1. list_nodes = [nodes for nodes in partition.keys() if partition[nodes] == community] nx.draw_networkx_nodes(G, pos, list_nodes, node_size=40, node_color=colors[int(count) % 10]) nx.draw_networkx_edges(G, pos, alpha=0.5)
nx.draw_networkx_nodes(G_fb, spring_pos, list_nodes, node_size = 15, node_color = str(count / size)) nx.draw_networkx_edges(G_fb,spring_pos, alpha=0.5) plt.show() ##### Dendo graph ###### dendo = community.generate_dendogram(G_fb) for level in range(len(dendo) - 1) : print "partition at level", level, "is", community.partition_at_level(dendo, level) ##### induced graph #### G=community.induced_graph(parts, G_fb) #nx.draw_networkx(G, pos = spring_pos, cmap = plt.get_cmap("jet"), node_color = values, node_size = 15, with_labels = False) nx.draw_networkx(G)
def build_json(hierarchy_dict, h5_data, dataset_name, graph, json, threshold): # data set dict ds_dict = {} # graph dict g_dict = {} # Maximum hierarchy size hmax = len(hierarchy_dict["dendro"]) - 1 # Add pseudo entry to trigger single node dict creation hierarchy_dict[hmax + 1] = {} for hidx, hdict in hierarchy_dict.items(): if not isinstance(hidx, int): continue # Dendrogram list is sorted inversely to hierarchy dict. Therefore, the dendrogram index has to be recalculated. didx = hmax - hidx # edge dict e_dict = {} # node dict n_dict = {} # hierarchy dicr h_dict = {} if didx > -1: # Nodes for com, nodes in hierarchy_dict["inv_dendro"][didx].items(): # attribute dict a_dict = {} a_dict["index"] = com a_dict["name"] = "h%in%i" % (hidx, com) a_dict["childs"] = nodes a_dict["mzs"] = list( h5_data.columns[hdict["communities"][com]]) try: a_dict["membership"] = hierarchy_dict["dendro"][didx + 1][com] except Exception as e: print(e) n_dict["h%in%i" % (hidx, com)] = a_dict else: # single nodes are always first entry in dendro for node, com in hierarchy_dict["dendro"][0].items(): a_dict = {} a_dict["index"] = node a_dict["name"] = h5_data.columns[node] a_dict["membership"] = com a_dict["mzs"] = [h5_data.columns[node]] n_dict["h%in%i" % (hidx, node)] = a_dict # Edges if didx > -1: community = louvain.partition_at_level(hierarchy_dict["dendro"], didx) edges = louvain.induced_graph(community, graph).edges(data=True) else: edges = graph.edges(data=True) idx = 0 for source, target, weight in edges: # Include source == target for inner edge weight. #print(weight) if source != target: a_dict = {} a_dict["index"] = idx a_dict["name"] = "h%ie%i" % (hidx, idx) a_dict["source"] = "h%in%i" % (hidx, source) a_dict["target"] = "h%in%i" % (hidx, target) try: count = weight["count"] except: count = 1 #print(count) a_dict["weight"] = weight["weight"] / count e_dict["h%ie%i" % (hidx, idx)] = a_dict idx += 1 h_dict["nodes"] = n_dict h_dict["edges"] = e_dict g_dict["hierarchy%i" % (hidx)] = h_dict ds_dict["graph"] = g_dict ds_dict["dataset"] = dataset_name ds_dict["threshold"] = threshold #mzs = [x for x in np.round(h5_data.columns, 3)] mzs = [x for x in h5_data.columns] mzs_dict = {} for mz in mzs: mzs_dict[str(mz)] = {} for hy, vals in g_dict.items(): for nid, props in vals["nodes"].items(): try: if mz in props["mzs"]: mzs_dict[str(mz)][hy] = nid break # Last hierarchy has no "mzs" prop except Exception as e: print(e) if mz == props["name"]: mzs_dict[str(mz)][hy] = nid ds_dict["mzs"] = mzs_dict json["graphs"]["graph%i" % (hierarchy_dict["graph_idx"])] = ds_dict return json
# # # (b). 'coverage' - (note: have to turn partition into a list of sets.) # # ???? NOT WORKING AND NOT SURE WHY. # partsList = [] # numParts = part.get( max(part,key=part.get) ) # for p in range( numParts ): # partsList.append( set([i for i,j in part.items() if j == p]) ) # coverage[i] = nx.community.coverage(G, partsList) # # # # # Looking further into dendrogram. Makes sense. try: G2 = c.induced_graph( dend[0], G, weight='weight') # define graph turns clusters into nodes. q_dend[1, i] = c.modularity(dend[1], G2, weight='weight') pp = c.partition_at_level( dend, 1) # express partition at a give layer in terms of all nodes. q_dend[2, i] = c.modularity(pp, G, weight='weight') except: continue # Plot modularity metric for different partitions at different resolution parameters. if False: plt.plot(res, q_bp, 'b') plt.plot(res, q_dend[0], 'r') plt.plot(res, q_dend[1], 'g') plt.plot(res, q_dend[2], 'k')
plt.axis("off") nx.draw_networkx(G_lesmis, pos=spring_pos, with_labels=False, node_size=15) ############### Community detection ########### import community as com parts = com.best_partition(G_lesmis) values = [parts.get(node) for node in G_lesmis.nodes()] plt.axis("off") nx.draw_networkx(G_lesmis, pos=spring_pos, cmap=plt.get_cmap("jet"), node_color=values, font_size=20, node_size=80, with_labels=False) ## Calculate the modularity ## com.modularity(parts, G_lesmis) ## induced graph : each community is represented as one node ## help(com) G_induced = com.induced_graph(parts, G_lesmis) plt.axis("off") nx.draw_networkx(G_induced, cmap=plt.get_cmap("jet"), font_size=20, node_size=80, with_labels=False)
def extraction(self): '''Extract adjacency lists,mats,user and community centrality and communities bags''' #Compute the first derivative and the point of timeslot separation firstderiv,mentionLimit=self.timeslotselection(self.authors,self.mentions,self.alltime) #Split time according to the first derivative of the users' activity# sesStart,timeslot,timeLimit=0,0,[self.alltime[0]] print("Forming timeslots") for k in range(len(mentionLimit)): if firstderiv[k]<0 and firstderiv[k+1]>=0: #make timeslot timelimit array timeLimit.append(self.alltime[int(mentionLimit[k])]) fileNum='{0}'.format(str(timeslot).zfill(2)) # print("Forming Timeslot Data "+str(timeslot)+" at point "+str(k)) sesEnd=int(mentionLimit[k]+1) #Make pairs of users with weights usersPair=list(zip(self.authors[sesStart:sesEnd],self.mentions[sesStart:sesEnd])) #Create weighted adjacency list weighted=collections.Counter(usersPair) weighted=list(weighted.items()) adjusrs,weights=zip(*weighted) adjauthors,adjments=zip(*adjusrs) adjList=list(zip(adjauthors,adjments,weights)) #Write pairs of users to txt file for Gephi my_txt=open(self.dataset_path+"/data/GDD/results/forGephi/usersPairs_"+fileNum+".txt","w") my_txt.write("Source,Target,Weight"+"\n") for line in adjList: my_txt.write(",".join(str(x) for x in line) + "\n") my_txt.close() #Create dictionary of tags per user tmptags=self.tags[sesStart:sesEnd] self.tagBag[timeslot]={} for authIdx,auth in enumerate(self.authors[sesStart:sesEnd]): if auth not in self.tagBag[timeslot]: self.tagBag[timeslot][auth]=[] elif tmptags[authIdx]: self.tagBag[timeslot][auth].append(tmptags[authIdx]) #create dictionary of urls per user tmpUrls=self.tweetUrls[sesStart:sesEnd] self.urlBag[timeslot]={} for authIdx,auth in enumerate(self.authors[sesStart:sesEnd]): if auth not in self.urlBag[timeslot]: self.urlBag[timeslot][auth]=[] elif tmpUrls[authIdx]: self.urlBag[timeslot][auth].append(tmpUrls[authIdx]) #create dictionary of tweet Ids per user tmptweetids=self.tweetIds[sesStart:sesEnd] self.tweetIdBag[timeslot]={} for authIdx,auth in enumerate(self.authors[sesStart:sesEnd]): if auth not in self.tweetIdBag[timeslot]: self.tweetIdBag[timeslot][auth]=[] elif tmptweetids[authIdx]: self.tweetIdBag[timeslot][auth].append(tmptweetids[authIdx]) for mentIdx,ment in enumerate(self.mentions[sesStart:sesEnd]): if ment not in self.tweetIdBag[timeslot]: self.tweetIdBag[timeslot][ment]=[] elif tmptweetids[mentIdx]: self.tweetIdBag[timeslot][ment].append(tmptweetids[mentIdx]) #create dictionary of text per user tmptweetText=self.twText[sesStart:sesEnd] self.tweetTextBag[timeslot]={} for authIdx,auth in enumerate(self.authors[sesStart:sesEnd]): if auth not in self.tweetTextBag[timeslot]: self.tweetTextBag[timeslot][auth]=[] elif tmptweetText[authIdx]: self.tweetTextBag[timeslot][auth].append(tmptweetText[authIdx]) for mentIdx,ment in enumerate(self.mentions[sesStart:sesEnd]): if ment not in self.tweetTextBag[timeslot]: self.tweetTextBag[timeslot][ment]=[] elif tmptweetText[mentIdx]: self.tweetTextBag[timeslot][ment].append(tmptweetText[mentIdx]) #Create dictionary of text #Construct networkX graph tempDiGraph=nx.DiGraph() tempDiGraph.add_weighted_edges_from(adjList) tempDiGraph.remove_edges_from(tempDiGraph.selfloop_edges()) tempGraph=nx.Graph() tempGraph.add_weighted_edges_from(adjList) tempGraph.remove_edges_from(tempGraph.selfloop_edges()) #Extract the centrality of each user using the PageRank algorithm tempUserPgRnk=nx.pagerank(tempDiGraph,alpha=0.85,max_iter=100,tol=0.001) maxPGR=max((pgr for k,(pgr) in tempUserPgRnk.items())) for k in tempUserPgRnk.items(): tempUserPgRnk[k[0]]/=maxPGR self.userPgRnkBag[timeslot]=tempUserPgRnk #Detect Communities using the louvain algorithm# partition = community.best_partition(tempGraph) inv_partition = {} for k, v in partition.items(): inv_partition[v] = inv_partition.get(v, []) inv_partition[v].append(k) inv_partition[v].sort() strComms=[inv_partition[x] for x in inv_partition] strComms.sort(key=len,reverse=True) #Construct Communities of uniqueUsers indices and new community dict with size sorted communities numComms,new_partition=[],{} for c1,comms in enumerate(strComms): numpart=[] for ids in comms: numpart.extend(self.uniqueUsers[ids]) new_partition[ids]=c1 numComms.append(numpart) newinv_partition = {} for k, v in new_partition.items(): newinv_partition[v] = newinv_partition.get(v, []) newinv_partition[v].append(k) newinv_partition[v].sort() #Construct a graph using the communities as users tempCommGraph=community.induced_graph(new_partition,tempDiGraph) #Detect the centrality of each community using the PageRank algorithm commPgRnk=nx.pagerank(tempCommGraph,alpha=0.85,max_iter=100,tol=0.001) maxCPGR=max((cpgr for k,(cpgr) in commPgRnk.items())) commPgRnkList=[] for key,value in commPgRnk.items(): commPgRnkList.append(value/maxCPGR) self.commPgRnkBag[timeslot]=commPgRnkList '''Construct Community Dictionary''' self.commStrBag[timeslot]=strComms self.commNumBag[timeslot]=numComms sesStart=sesEnd timeslot+=1 day_month=[datetime.datetime.fromtimestamp(int(x)).strftime('%d/%m') for x in timeLimit] self.day_month=day_month self.timeLimit=[time.ctime(int(x)) for x in timeLimit]
edge_colors = 'red' pos = nx.spring_layout(G) nx.draw(G, pos, width=0.5, alpha=0.5) plt.savefig("First_Graph.png") nx.draw_networkx_edges(G, pos, width=0.5, alpha=0.5) partition = community.best_partition(G) print len(partition) partition_set = set() for item in partition.values(): partition_set.add(item) induced_graph = community.induced_graph(partition, G) print induced_graph.edges(data='weight') pos = nx.spring_layout(induced_graph) nx.draw(induced_graph, pos, width=0.5, alpha=0.5) plt.savefig("Induced_Graph.png") nx.draw_networkx_edges(induced_graph, pos, width=0.5, alpha=0.5) cluster_size = [] for i in range(168): cluster_size.append(0) for i in range(1, len(partition) + 1): cluster_size[partition[i]] += 1 print partition[i]
def augment_graph_data(data, max_groups): total_nodes = len(data['nodes']) #lowering the necessary node count #since in some cases node count is greatly reduced after processing # first author kurtz,m goes from ~60 to 19 for instance if total_nodes < 15: #just get rid of the sets for i, l in enumerate(data["links"]): data["links"][i]["overlap"] = list(l["overlap"]) return {"fullGraph" :data} #create the networkx graph G = nx.Graph() for i,x in enumerate(data['nodes']): G.add_node(i, node_name= x["nodeName"], nodeWeight = x["nodeWeight"], title=x["title"], citation_count=x["citation_count"], first_author = x["first_author"], read_count = x["read_count"]) for i,x in enumerate(data['links']): G.add_edge(x["source"], x["target"], weight = x["value"], overlap = list(x["overlap"])) all_nodes = G.nodes() #partition is a dictionary with group names as keys # and individual node indexes as values partition = community.best_partition(G) for g in G.nodes(): G.node[g]["group"] = partition[g] #with new group info, create the summary group graph summary_graph = community.induced_graph(partition, G) #title container titles = {} #enhance the information that will be in the json handed off to d3 for x in summary_graph.nodes(): summary_graph.node[x]["total_citations"] = sum([G.node[paper].get("citation_count", 0) for paper in G.nodes() if G.node[paper]["group"] == x]) summary_graph.node[x]["total_reads"] = sum([G.node[paper].get("read_count", 0) for paper in G.nodes() if G.node[paper]["group"] == x]) papers = sorted([G.node[paper] for paper in G.nodes() if G.node[paper]["group"] == x], key = lambda x: x.get("nodeWeight", 0), reverse = True) titles[x] = [p["title"]for p in papers] summary_graph.node[x]["paper_count"] = len(papers) #attaching title 'word clouds' to the nodes significant_words = tf_idf.get_tf_idf_vals(titles) for x in summary_graph.nodes(): #remove the ones with only 1 paper if summary_graph.node[x]["paper_count"] == 1: summary_graph.remove_node(x) else: #otherwise, give them a title #how many words should we show on the group? max 6, otherwise 1 per every 2 papers summary_graph.node[x]["node_label"] = dict(sorted(significant_words[x].items(), key = lambda x: x[1], reverse = True)[:6]) #remove all but top n groups from summary graph #where top n is measured by total citations from a group top_nodes = sorted([n for n in summary_graph.nodes(data = True)], key= lambda x : x[1]["total_citations"], reverse = True )[:max_groups] top_nodes = [t for t in top_nodes if t >=1] top_node_ids = [n[0] for n in top_nodes] for group_id in summary_graph.nodes(): if group_id not in top_node_ids: summary_graph.remove_node(group_id) #remove nodes from full graph that aren't in top group #this automatically takes care of edges, too for node in G.nodes(data = True): if node[1]["group"] not in top_node_ids: G.remove_node(node[0]) #continuing to enhance the information: add to group info about the most common co-references for x in summary_graph.nodes(): #make a float so division later to get a percent makes sense num_papers = float(summary_graph.node[x]["paper_count"]) references = {} #find all members of group x indexes = [paperIndex for paperIndex in G.nodes() if G.node[paperIndex]["group"] == x] for edge in G.edges(data=True): #if it passes, it's an inter-group connection # [0] is source, [1] is target, [2] is data dict paper_one = edge[0] paper_two = edge[1] if paper_one in indexes and paper_two in indexes: for bib in edge[2]["overlap"]: if bib in references: references[bib].update([paper_one, paper_two]) else: references[bib] = set([paper_one, paper_two]) count_references = sorted(references.items(), key=lambda x:len(x[1]), reverse = True)[:5] top_common_references = [(tup[0], float("{0:.2f}".format(len(tup[1])/num_papers))) for tup in count_references] top_common_references = dict(top_common_references) summary_graph.node[x]["top_common_references"] = top_common_references summary_json = json_graph.node_link_data(summary_graph) # giving groups node_names based on size of groups for i, n in enumerate(sorted(summary_json["nodes"], key=lambda x:x["paper_count"], reverse=True)): for possible_real_index, node in enumerate(summary_json["nodes"]): if node == n: real_index = possible_real_index summary_json["nodes"][real_index]["node_name"] = i +1 for i, n in enumerate(summary_json["nodes"]): #cache this so graph manipulation later is easier summary_json["nodes"][i]["stable_index"] = i #find the node final_data = {"summaryGraph" : summary_json, "fullGraph" : json_graph.node_link_data(G) } return final_data
plt.axis("off") nx.draw_networkx(G_karate, pos=spring_pos, with_labels=False, node_size=80) ############### Community detection ########### import community as com parts = com.best_partition(G_karate) values = [parts.get(node) for node in G_karate.nodes()] plt.axis("off") nx.draw_networkx(G_karate, pos=spring_pos, cmap=plt.get_cmap("jet"), node_color=values, font_size=20, node_size=80, with_labels=False) ## Calculate the modularity ## com.modularity(parts, G_karate) ## induced graph : each community is represented as one node ## help(com) G_induced = com.induced_graph(parts, G_karate) plt.axis("off") nx.draw_networkx(G_induced, cmap=plt.get_cmap("jet"), font_size=20, node_size=80, with_labels=False)
plt.axis("off") nx.draw_networkx(G_lesmis, pos = spring_pos, with_labels = False, node_size = 15) ############### Community detection ########### import community as com parts = com.best_partition(G_lesmis) values = [parts.get(node) for node in G_lesmis.nodes()] plt.axis("off") nx.draw_networkx(G_lesmis, pos = spring_pos, cmap = plt.get_cmap("jet"), node_color = values, font_size=20,node_size = 80, with_labels = False) ## Calculate the modularity ## com.modularity(parts, G_lesmis) ## induced graph : each community is represented as one node ## help(com) G_induced=com.induced_graph(parts, G_lesmis) plt.axis("off") nx.draw_networkx(G_induced, cmap = plt.get_cmap("jet"), font_size=20,node_size = 80, with_labels = False)
def extraction(self): '''Extract adjacency lists,mats,user and community centrality and communities bags''' import community #Compute the first derivative and the point of timeslot separation firstderiv, mentionLimit = self.timeslotselection(self.authors, self.mentions, self.alltime) self.commPgRnkBag = {} #Split time according to the first derivative of the users' activity# sesStart, timeslot, timeLimit,commCount = 0, 0, [self.alltime[0]],0 print("Forming timeslots") for tmplim in mentionLimit: #make timeslot timelimit array timeLimit.append(self.alltime[int(tmplim)]) fileNum = '{0}'.format(str(timeslot).zfill(2)) # print("Forming Timeslot Data "+str(timeslot)+" at point "+str(tmplim)) sesEnd = int(tmplim + 1) #Make pairs of users with weights usersPair = list(zip(self.authors[sesStart:sesEnd], self.mentions[sesStart:sesEnd])) #Create weighted adjacency list weighted = collections.Counter(usersPair) weighted = list(weighted.items()) adjusrs, weights = zip(*weighted) adjauthors, adjments = zip(*adjusrs) adjList = list(zip(adjauthors, adjments, weights)) '''Write pairs of users to txt file for Gephi''' my_txt = open(self.dataset_path + "/data/nonadaptive/results/forGephi/usersPairs_" + fileNum + ".txt", "w")# my_txt.write("Source,Target,Weight" + "\n") for line in adjList: my_txt.write(",".join(str(x) for x in line) + "\n") my_txt.close() '''create dictionaries of text per user, of urls per user, of tweet Ids per user and of tags per user''' tmptweetText = self.twText[sesStart:sesEnd] self.tweetTextBag[timeslot] = {} tmpUrls = self.tweetUrls[sesStart:sesEnd] self.urlBag[timeslot] = {} tmptweetids = self.tweetIds[sesStart:sesEnd] self.tweetIdBag[timeslot] = {} tmptags = self.tags[sesStart:sesEnd] self.tagBag[timeslot] = {} for authIdx, auth in enumerate(self.authors[sesStart:sesEnd]): if auth not in self.tweetTextBag[timeslot]: self.tweetTextBag[timeslot][auth] = [] if tmptweetText[authIdx]: self.tweetTextBag[timeslot][auth].append(tmptweetText[authIdx]) if auth not in self.urlBag[timeslot]: self.urlBag[timeslot][auth] = [] if tmpUrls[authIdx]: for multUrls in tmpUrls[authIdx]: self.urlBag[timeslot][auth].append(multUrls) if auth not in self.tweetIdBag[timeslot]: self.tweetIdBag[timeslot][auth] = [] if tmptweetids[authIdx]: self.tweetIdBag[timeslot][auth].append(tmptweetids[authIdx]) if auth not in self.tagBag[timeslot]: self.tagBag[timeslot][auth] = [] if tmptags[authIdx]: self.tagBag[timeslot][auth].append(tmptags[authIdx]) for mentIdx, ment in enumerate(self.mentions[sesStart:sesEnd]): if ment not in self.tweetTextBag[timeslot]: self.tweetTextBag[timeslot][ment] = [] if tmptweetText[mentIdx]: self.tweetTextBag[timeslot][ment].append(tmptweetText[mentIdx]) if ment not in self.tweetIdBag[timeslot]: self.tweetIdBag[timeslot][ment] = [] if tmptweetids[mentIdx]: self.tweetIdBag[timeslot][ment].append(tmptweetids[mentIdx]) '''Construct networkX graph''' tempDiGraph = nx.DiGraph() tempDiGraph.add_weighted_edges_from(adjList) tempDiGraph.remove_edges_from(tempDiGraph.selfloop_edges()) tempGraph = nx.Graph() tempGraph.add_weighted_edges_from(adjList) tempGraph.remove_edges_from(tempGraph.selfloop_edges()) '''Extract the centrality of each user using the PageRank algorithm''' tempUserPgRnk = nx.pagerank(tempDiGraph, alpha=0.85, max_iter=100, tol=0.001) maxPGR=max((pgr for k,(pgr) in tempUserPgRnk.items())) for k in tempUserPgRnk.items(): tempUserPgRnk[k[0]]/=maxPGR self.userPgRnkBag[timeslot] = tempUserPgRnk '''Detect Communities using the louvain algorithm''' partition = community.best_partition(tempGraph) inv_partition = {} for k, v in partition.items(): inv_partition[v] = inv_partition.get(v, []) inv_partition[v].append(k) inv_partition[v].sort() strComms = [inv_partition[x] for x in inv_partition] strComms.sort(key=len, reverse=True) commCount+=len(strComms) '''Construct Communities of uniqueUsers indices and new community dict with size sorted communities''' numComms, new_partition = [], {} for c1, comms in enumerate(strComms): numpart = [] for ids in comms: numpart.extend(self.uniqueUsers[ids]) new_partition[ids] = c1 numpart.sort() numComms.append(numpart) newinv_partition = {} for k, v in new_partition.items(): newinv_partition[v] = newinv_partition.get(v, []) newinv_partition[v].append(k) newinv_partition[v].sort() '''Construct a graph using the communities as users''' tempCommGraph = community.induced_graph(new_partition, tempDiGraph) self.commGraph=tempCommGraph '''Detect the centrality of each community using the PageRank algorithm''' commPgRnk = nx.pagerank(tempCommGraph, alpha=0.85, max_iter=100, tol=0.001) maxCPGR = max((cpgr for k, (cpgr) in commPgRnk.items())) commPgRnkList = [] for key, value in commPgRnk.items(): commPgRnkList.append(value/maxCPGR) self.commPgRnkBag[timeslot] = commPgRnkList # #Detect the centrality of each community using the degree centrality algorithm # commDegreeness = nx.degree_centrality(tempCommGraph) # maxCDeg = max((cpgr for k, (cpgr) in commDegreeness.items())) # commDegreenessList = [] # for key, value in commDegreeness.items(): # commDegreenessList.append(value/maxCDeg) # self.commDegreenessBag[timeslot] = commDegreenessList # #Detect the centrality of each community using the betweeness centrality algorithm # commBetweeness = nx.betweenness_centrality(tempCommGraph) # maxCBet = max((cpgr for k, (cpgr) in commBetweeness.items())) # commBetweennessList = [] # for key, value in commBetweeness.items(): # commBetweennessList.append(value/maxCDeg) # self.commBetweenessBag[timeslot] = commBetweennessList # #Extract community degree # degreelist=[] # for k in range(len(tempCommGraph.edge)): # tmpdeg=tempCommGraph.degree(k) # degreelist.append(tmpdeg) # degreelist=[x/max(degreelist) for x in degreelist] # self.degreeBag[timeslot]=degreelist '''Construct Community Dictionary''' self.commStrBag[timeslot] = strComms self.commNumBag[timeslot] = numComms sesStart = sesEnd timeslot += 1 day_month = [datetime.datetime.fromtimestamp(int(x)).strftime(self.labelstr) for x in timeLimit] self.day_month = day_month self.timeLimit = [datetime.datetime.fromtimestamp(int(x)).strftime(self.labelstr) for x in timeLimit] statement = '\nTotal # of communities is '+str(commCount) + '\n' statsfile = open(self.dataset_path + "/data/nonadaptive/results/basicstats.txt",'a') print(statement) statsfile.write(statement) statsfile.close() dataCommPck = open(self.dataset_path + '/data/nonadaptive/tmp/dataComm_'+str(self.fileTitle)+'.pck','wb') pickle.dump(self, dataCommPck , protocol = 2) dataCommPck.close()
def run_comm(inputcID,subids): con = mdb.connect(user= '******', passwd='X', db='X',unix_socket='X',charset='utf8') with con: cur = con.cursor() cur.execute("SELECT cID,tocID FROM CommenterSubs WHERE inputID=%s and tocID<>'NotShared'",(inputcID))# (tocID<>'None' AND cID<>'None')")# limit 1000") edges = cur.fetchall() cur.close() edges_pulled = [list(x) for x in set(tuple(x) for x in edges)] print "Number of edges pulled from the database", len(edges_pulled) uedges=[] for i,edge_i in enumerate(edges_pulled): if edge_i[0] in subids: uedges.append(edge_i) print "Number of relevant edges for the inputcID",len(uedges) G=nx.Graph() for edge in uedges: G.add_edge(edge[0],edge[1],weight=0.5) #print edge[0],edge[1] print "Number of edges", G.number_of_edges(),", number of nodes",G.number_of_nodes() import community as comm #dendo = comm.generate_dendogram(G) #takes a long time and unnecessary part = comm.best_partition(G) modularity=comm.modularity(part, G) print "Number of communities found",max(part.values())+1, ", modularity:",modularity count = 0. commf=0; #community to which input channel belongs nodesf=[] #nodes of the community to which input channel belongs nodepcom=[] label_prep=["" for x in range(len(part.values()))] for com in set(part.values()) : count = count + 1. list_nodes = [nodes for nodes in part.keys() if part[nodes] == com] nodepcom.append(len(list_nodes)) label_prep[int(com)]=str(len(list_nodes)) if inputcID in list_nodes: print "Input Channel is in the community #", com commf=com nodesf=list_nodes label_prep[int(com)]="Target community: "+str(len(list_nodes)) labs=dict(zip(set(part.values()), label_prep)) plt.figure() h1=plt.hist(nodepcom,bins=20,normed=False,color='steelblue') h2=plt.axvline(int(np.average(nodepcom)),0,1,color='navy',linewidth=10, label='average community size = '+str(int(np.average(nodepcom)))) plt.legend(fontsize=22) plt.title('Network modularity Q = %.4f ' % (modularity),fontsize=30) plt.xlabel('Number of channels per community', fontsize=26) plt.ylabel('Frequency', fontsize=26) plt.savefig('./static/img/'+inputcID+'/'+inputcID+'_com_size_distrib_1.png', dpi=300, format='png')#,transparent=True) com1=comm.induced_graph(part, G) # plt.figure() # pos = nx.spring_layout(com1) # nx.draw_networkx_edges(com1,pos,width=1.0, edge_color='g', style='solid', alpha=0.2) # nx.draw_networkx_labels(com1, pos, labels=labs, font_size=12, font_color='r', font_family='sans-serif', font_weight='normal', alpha=1.0) # plt.draw() # plt.savefig('./static/img/'+inputcID+'/'+inputcID + '_v3_com_plot_all_1.png', dpi=300, format='png',transparent=True) # #export to json for d3 graph plot i=0 comfin=com1 for node in com1.nodes(): comfin.node[i]['group'] = i comfin.node[i]['label'] = label_prep[i] i=i+1 comfin.nodes(data=True) nld=json_graph.node_link_data(comfin) json.dump(nld,open('./static/img/'+inputcID+'/'+inputcID+'_community_graph_comsub2.json','w')) return nodesf