def processNetwork(Graph, id_to_groups): with open("../../data/fastinf_graph_noweights_features.txt", "w+") as f: f.write("RELATED GROUPS GRAPH:\n") f.write('Edges: %d\n' % Graph.GetEdges()) f.write('Nodes: %d\n\n' % Graph.GetNodes()) MxWcc = snap.GetMxWcc(Graph) f.write("MAX WCC:\n") f.write('Edges: %f ' % MxWcc.GetEdges()) f.write('Nodes: %f \n' % MxWcc.GetNodes()) f.write('Node List: ') for node in MxWcc.Nodes(): f.write('%d, ' % node.GetId()) f.write('\n') for node in MxWcc.Nodes(): f.write('%s, ' % id_to_groups[node.GetId()]) f.write("\n\nALL WCCs:") Components = snap.TCnComV() snap.GetWccs(Graph, Components) for i, CnCom in enumerate(Components): if CnCom.Len() < 10: continue f.write('\nWcc%d: ' % i) for nodeid in CnCom: f.write('%d, ' % nodeid) MxScc = snap.GetMxScc(Graph) f.write("\n\nMAX SCC:\n") f.write('Edges: %f ' % MxScc.GetEdges()) f.write('Nodes: %f \n' % MxScc.GetNodes()) f.write('Node List: ') for node in MxScc.Nodes(): f.write('%d, ' % node.GetId()) f.write('\n') for node in MxScc.Nodes(): f.write('%s, ' % id_to_groups[node.GetId()]) f.write("\n\nALL SCCs:") Components = snap.TCnComV() snap.GetSccs(Graph, Components) for i, CnCom in enumerate(Components): if CnCom.Len() < 10: continue f.write('\nScc%d: ' % i) for nodeid in CnCom: f.write('%d, ' % nodeid) f.write('\n\nCLUSTERING AND COMMUNITIES:\n') f.write('Clustering coefficient: %f\n' % snap.GetClustCf(Graph, -1)) f.write('Num Triads: %d\n' % snap.GetTriads(Graph, -1)) Nodes = snap.TIntV() for node in Graph.Nodes(): Nodes.Add(node.GetId()) f.write('Modularity: %f' % snap.GetModularity(Graph, Nodes))
def community_detection(G): ''' See snap docs for details. ''' # Only for large networks - I got 3000 node communities on a 9000 node graph... # modularity = snap.CommunityCNM(G, CmtyV) edgefile = "data/toronto_knn_20.csv" outfile = "data/CGN_knn_20.csv" dictfile = "data/CGN_dict_knn_20.json" edge = pd.read_csv(edgefile, ',', header=0) graph = nx.from_pandas_edgelist(edge, source='r1', target='r2') CmtyV = snap.TCnComV() modularity = snap.CommunityGirvanNewman(G, CmtyV) community_id = 0 comm_dict = dict() for Cmty in CmtyV: comm_dict[community_id] = [] for c in Cmty: comm_dict[community_id].append(c) community_id += 1 with open(outfile, "w+") as f: for idx, assignment in tqdm(assignments.iteritems()): print len(assignment) f.write(", ".join(assignment)) f.write("\n") with open(dictfile, "w+") as f: json.dump(partition, f)
def gen_G(D, Pi_minus, Pi_exo, V_exo, theta2, N): """ Returns pairwise-stable network on N nodes. D, Pi_minus, Pi_exo = outputs of gen_D(). V_exo = 'exogenous' part of joint surplus (output of gen_V_exo). theta2 = transitivity parameter (theta[2]). """ G = snap.GenRndGnm(snap.PUNGraph, N, 0) # initialize empty graph Components = snap.TCnComV() snap.GetWccs(D, Components) # collects components of D NIdV = snap.TIntV() # initialize vector for C in Components: if C.Len() > 1: NIdV.Clr() for i in C: NIdV.Add(i) tempnet = gen_G_subgraph(NIdV, D, Pi_minus, Pi_exo, V_exo, theta2) for edge in tempnet.Edges(): G.AddEdge(edge.GetSrcNId(), edge.GetDstNId()) # add robust links for edge in Pi_exo.Edges(): G.AddEdge(edge.GetSrcNId(), edge.GetDstNId()) return G
def get_thread_text(comments): "Groups comments into threads, then concatenates the text of each thread." comments.object_id = comments.object_id.astype(int) comments.parent_id = comments.parent_id.astype(int) comments.points = comments.points.astype(float).astype(int) nodes = set(comments.object_id).union(set(comments.parent_id)) commentsGraph = snap.TUNGraph.New() for node in nodes: commentsGraph.AddNode(node) for edge in comments[['object_id', 'parent_id']].values.tolist(): commentsGraph.AddEdge(*edge) commentThreads = snap.TCnComV() snap.GetSccs(commentsGraph, commentThreads) threadText = [] for commentThread in commentThreads: commentsInThread = comments[comments['object_id'].isin(commentThread)] commentsInThread = commentsInThread.comment_text.astype( str) # No more floats in here... #commentsInThread = [c.encode('ascii', 'ignore') for c in commentsInThread] commentsInThread = [ c.decode('ascii', errors='replace').encode('ascii', 'ignore') for c in commentsInThread ] commentsInThread = [htmlParser.unescape(c) for c in commentsInThread] threadText.append(" ".join(commentsInThread)) return " ".join(threadText)
def compute(self): for filename in os.listdir('./test_egonets'): index = filename.split('.egonet')[0] new_file = './edges/' + index + '.egonet.edges' G = snap.TUNGraph.New() G.AddNode(int(index)) for node in self.adj_list[index]: G.AddNode(int(node)) for line in file(new_file): line = line.strip('\n') x = line.split(' ') x = map(lambda x: int(x), x) if not G.IsEdge(x[1], x[0]): G.AddEdge(x[0], x[1]) print 'Computing for ' + index CmtyV = snap.TCnComV() modularity = snap.CommunityGirvanNewman(G, CmtyV) for Cmty in CmtyV: for NI in Cmty: print NI, print G.Clr()
def detect_community(G, id_to_title): print('dectect community ....') CmtyV = snap.TCnComV() modularity = snap.CommunityGirvanNewman(G, CmtyV) f = open('./community_detection/assignment2_Nhom1_TuToanChien.txt', 'w') i = 0 for Cmty in CmtyV: if i == 100: break f.write('Community ' + str(i) + ': \n') j = 0 for NI in Cmty: if j == 10: break title = id_to_title[NI] f.write(str(NI) + '\t \t' + str(title) + '\n') j += 1 i += 1 f.close()
def run(self, data, seed=None): if data.is_directed(): raise UnsupportedException("only undirected graph is supported") if seed is not None: self.logger.info("seed ignored") UGraph = convert.to_snap(data) CmtyV = snap.TCnComV() timecost, modularity = utils.timeit( lambda: snap.CommunityGirvanNewman(UGraph, CmtyV)) clusters = {} i = 0 for Cmty in CmtyV: clusters[i] = [] for NI in Cmty: clusters[i].append(NI) i += 1 self.logger.info( "Made %d clusters in %f seconds. modularity of the graph is %f" % (len(clusters), timecost, modularity)) result = {} result['timecost'] = timecost result['runname'] = self.name result['dataname'] = data.name result['meta'] = self.get_meta() result['modularity'] = modularity result['clusters'] = clusters save_result(result) self.result = result return self
def pageRank_components(g): print 'executing pagerank components ---- getting components for page rank' Components = snap.TCnComV() snap.GetWccs(g, Components) f = open('component_pr.txt', 'w') cgraphs = [] for com in Components: v = snap.TIntV() for ni in com: v.Add(ni) cgraphs.append(snap.GetSubGraph_PNGraph(g, v)) print 'components retrived for pagerank' f.write('Total components:' + str(len(cgraphs)) + '\n') for graph in cgraphs: if graph.GetNodes() == 2: continue sprank = snap.TIntFltH() snap.GetPageRank_PNGraph(graph, sprank) sprank.SortByDat(False) f.write( str(graph.GetNodes()) + ' ' + str(sprank[sprank.BegI().GetKey()]) + '\n') f.close() print 'finished writing pagerank components values'
def analizzaGirvanNewman(pfPaj, pfAINN, pfMod): # prende un grafo in formato Pajek # restituisce le comunita come ID Nome Comunita g = snap.LoadPajek(snap.PUNGraph, pfPaj) comunita = snap.TCnComV() modularity = snap.CommunityGirvanNewman(g, comunita) dMod = {} # {numero : classe} classe = 0 for com in comunita: # print('comunita {} = '.format(classe), end='' ) for nodo in com: # print('{} '.format(nodo), end='') dMod.update({nodo: classe}) classe += 1 # print('') print('Numero di comunita analizzaGirvanNewman: {} modularity: {}'.format( classe, modularity)) dNum = {} with open(pfAINN, 'rb') as fAINN: for line in fAINN: autID, autNum, autNome = line.rstrip().split('\t') autNum = int(autNum) dNum.update({autNum: [autID, autNome]}) # print(dNum) with open(pfMod, 'wb') as fMod: for autNum in dNum: fMod.write('{}\t{}\t{}\r\n'.format(dNum[autNum][0], dNum[autNum][1], dMod[autNum])) return classe # numero di comunita trovate
def GirvanNewmanMethodBySnap(graph): CmtyV = snap.TCnComV() modularity = snap.CommunityGirvanNewman(graph, CmtyV) for Cmty in CmtyV: print("Community: ",CmtyV) # for NI in Cmty: # print(NI) print("The modularity of the network is %f" % modularity)
def quick_properties(graph, name, dic_path): """Get quick properties of the graph "name". dic_path is the path of the dict {players: id} """ n_edges = graph.GetEdges() n_nodes = graph.GetNodes() print("##########") print("Quick overview of {} Network".format(name)) print("##########") print("{} Nodes, {} Edges").format(n_nodes, n_edges) print("{} Self-edges ".format(snap.CntSelfEdges(graph))) print("{} Directed edges, {} Undirected edges".format( snap.CntUniqDirEdges(graph), snap.CntUniqUndirEdges(graph))) print("{} Reciprocated edges".format(snap.CntUniqBiDirEdges(graph))) print("{} 0-out-degree nodes, {} 0-in-degree nodes".format( snap.CntOutDegNodes(graph, 0), snap.CntInDegNodes(graph, 0))) node_in = graph.GetNI(snap.GetMxInDegNId(graph)) node_out = graph.GetNI(snap.GetMxOutDegNId(graph)) print("Maximum node in-degree: {}, maximum node out-degree: {}".format( node_in.GetDeg(), node_out.GetDeg())) print("###") components = snap.TCnComV() snap.GetWccs(graph, components) max_wcc = snap.GetMxWcc(graph) print "{} Weakly connected components".format(components.Len()) print "Largest Wcc: {} Nodes, {} Edges".format(max_wcc.GetNodes(), max_wcc.GetEdges()) prankH = snap.TIntFltH() snap.GetPageRank(graph, prankH) sorted_prankH = sorted(prankH, key=lambda key: prankH[key], reverse=True) NIdHubH = snap.TIntFltH() NIdAuthH = snap.TIntFltH() snap.GetHits(graph, NIdHubH, NIdAuthH) sorted_NIdHubH = sorted(NIdHubH, key=lambda key: NIdHubH[key], reverse=True) sorted_NIdAuthH = sorted(NIdAuthH, key=lambda key: NIdAuthH[key], reverse=True) with open(dic_path, 'rb') as dic_id: mydict = pickle.load(dic_id) print("3 most central players by PageRank scores: {}, {}, {}".format( list(mydict.keys())[list(mydict.values()).index(sorted_prankH[0])], list(mydict.keys())[list(mydict.values()).index(sorted_prankH[1])], list(mydict.keys())[list(mydict.values()).index( sorted_prankH[2])])) print("Top 3 hubs: {}, {}, {}".format( list(mydict.keys())[list(mydict.values()).index( sorted_NIdHubH[0])], list(mydict.keys())[list(mydict.values()).index( sorted_NIdHubH[1])], list(mydict.keys())[list(mydict.values()).index( sorted_NIdHubH[2])])) print("Top 3 authorities: {}, {}, {}".format( list(mydict.keys())[list(mydict.values()).index( sorted_NIdAuthH[0])], list(mydict.keys())[list(mydict.values()).index( sorted_NIdAuthH[1])], list(mydict.keys())[list(mydict.values()).index( sorted_NIdAuthH[2])]))
def is_uniquely_connected(graph): def is_unique(components): return len(list(filter(lambda comp: comp.Len() > 1, components))) == 1 # First identify if there are strongly connected components in the graph s_components = snap.TCnComV() snap.GetSccs(graph, s_components) unique = is_unique(s_components) # if there is unique strongly connected component then we don't need to search # for the weakly because the graph is connected, otherwise implement the same search # on the weakly components. if not is_unique: w_components = snap.TCnComV() snap.GetWccs(graph, w_components) unique = is_unique(w_components) return unique
def out_modularity_gn(g): """Girvan-Newman method""" community_vector = snap.TCnComV() modularity = snap.CommunityGirvanNewman(g, community_vector) for community in community_vector: print "Community: " for i in community: print i print "The modularity of the network is %f" % modularity
def get_component_distribution(ei_graph): """Returns the sizes of strongly connected components. returns: dict of (size of component -> num of such components) https://snap.stanford.edu/snappy/doc/reference/GetSccs.html """ components = snap.TCnComV() snap.GetSccs(ei_graph.base(), components) return Counter(c.Len() for c in components)
def community_partition(G): CommuV = snap.TCnComV() modularity = snap.CommunityCNM(G, CommuV) ComutyH = snap.TIntIntH() partition = 0 for community in CommuV: for NI in community: ComutyH[NI] = partition partition = partition + 1 return ComutyH
def comDetect(algorithm, clusterCommands, Graph, conn, cur): CmtyV = snap.TCnComV() before_time = time.time() if algorithm == "gn": modularity = snap.CommunityGirvanNewman(Graph, CmtyV) if algorithm == 'cnm': modularity = snap.CommunityCNM(Graph, CmtyV) print "Total handling time is: ", (time.time() - before_time) createTable(clusterCommands, CmtyV, conn, cur) print "The modularity of the network is %f" % modularity
def community_gn(G): CmtyV = snap.TCnComV() modularity = snap.CommunityGirvanNewman(G,CmtyV) ret_list = [] for Cmty in CmtyV: temp = [] for NI in Cmty: temp.append(NI) ret_list.append(temp) return ret_list
def getCnn(): G1, id2, synset2, _, _, _ = generate_word_graph(True, False, False) print(G1.GetNodes()) CmtyV = snap.TCnComV() modularity = snap.CommunityCNM(G1, CmtyV) for Cmty in CmtyV: print "Community: " for NI in Cmty: print NI print "The modularity of the network is %f" % modularity
def getStronglyConnectedComponents(Graph, node_to_g): prot_to_SCcomponent = {} Components = snap.TCnComV() snap.GetSccs(Graph, Components) for i, CnCom in enumerate(Components): for node in CnCom: my_prot = node_to_g[node] prot_to_SCcomponent[ my_prot] = i + 1 ##1-index component membership. return prot_to_SCcomponent
def findCommunity(): #%% make a submission submission = pd.read_csv(submissionFolderName + 'sample_submission.csv') #submission = pd.read_csv(submissionFolderName + 'train_ID.csv') for userId in list(submission['UserId']): # read graph filename = str(userId) + '.egonet' G = snap.TUNGraph.New() read_nodeadjlist(egonetFolderName + filename, G) # do not calculate for large graphs (it takes too long) if G.GetNodes() > tooManyNodesThreshold: print 'skipping user ' + str(userId) continue else: print 'predicting for user ' + str(userId) # visualization plot = plotting(G, snap.gvlNeato) plot.run('gviz_plot_{}'.format(userId), title='UserID = {}'.format(userId)) # find comunities by using GirvanNewman listOfCircles = [] CmtyV = snap.TCnComV() modularity = snap.CommunityGirvanNewman(G, CmtyV) for Cmty in CmtyV: #print 'Community' # leave only relativly large communities if len(Cmty) >= tooLittleFriendsInCircleThreshold: listOfCircles.append(list(Cmty)) for NI in Cmty: #print NI continue print 'The modularity of the network is %f' % modularity # populate prediction string predictionString = '' for Cmty in listOfCircles: for NI in Cmty: predictionString = predictionString + str(NI) + ' ' predictionString = predictionString[:-1] # if no prediction was created, use 'all friends in one circle' if len(listOfCircles) > 0: submission.ix[submission['UserId'] == userId, 'Predicted'] = predictionString submission.to_csv(submissionFolderName + str(submissionNumber) + '.csv', index=False)
def run(self): snap.DelSelfEdges(self.graph) community_list = snap.TCnComV() snap.CommunityCNM(self.graph, community_list) self.community_list = list() for community in community_list: cmty = list() for node in community: cmty.append(node) self.community_list.append(cmty)
def computeWeaklyConnectedComponents(graph, outFile): logger.info("Computing Weakly Connected Components") fw_cc = open(outFile, 'w') Components = snap.TCnComV() snap.GetWccs(graph, Components) for CnCom in Components: for item in CnCom: fw_cc.write(str(item) + "\n") fw_cc.write("\n") logger.info("Weakly Connected Components Computed!") logger.info("Weakly Connected Components Exported to " + outFile)
def community_cnm(G): CmtyV = snap.TCnComV() modularity = snap.CommunityCNM(G,CmtyV) #print modularity ret_list = [] for Cmty in CmtyV: temp = [] for NI in Cmty: temp.append(NI) ret_list.append(temp) return ret_list
def calculate_communities(G): g = networkx_to_snappy(G) CmtyV = snap.TCnComV() modularity = snap.CommunityGirvanNewman(g, CmtyV) nodes_communities = {} # {node: [community]} for i, Cmty in enumerate(CmtyV): for NI in Cmty: nodes_communities.setdefault(NI, []) nodes_communities[NI].append(i + 2) return nodes_communities
def split_communities(C_Net): CmtyV = snap.TCnComV() modularity = snap.CommunityCNM(C_Net, CmtyV) print(len(CmtyV)) # number of communities Cs = [] for Cmty in CmtyV: NIdV = snap.TIntV() for NI in Cmty: NIdV.add(NI) Cs.append(NIdV) return Cs
def runCNM(nodelist, weightedGraph): print("Building snap graph") snapWeightedGraph = snap.TUNGraph_New(len(nodelist), len(weightedGraph)) for i in range(0, len(nodelist)): snapWeightedGraph.AddNode(i) for edge in weightedGraph: snapWeightedGraph.AddEdge(edge[0], edge[1]) print("Clustering weighted graph") catagoryNodes = snap.TCnComV() print(f'Mod: {snap.CommunityCNM(snapWeightedGraph, catagoryNodes)}') return [[node for node in cat] for cat in catagoryNodes]
def getComms(graph): comms = snap.TCnComV() modularity = snap.CommunityCNM(graph, comms) print 'Modularity', modularity commDict = {} for i in xrange(len(comms)): for id in comms[i]: if id in commDict: 'node in more than one comm?' else: commDict[id] = i return comms, commDict
def get_community_CNM(file_path, output_path): Graph, H = load_graph(file_path) Graph = convert_to_undirected(Graph) CmtyV = snap.TCnComV() modularity = snap.CommunityCNM(Graph, CmtyV) output_str = 'Modularity: ' + str( modularity) + '\nNum of communities: ' + str( len(CmtyV)) + '\nCommunities:\n' for Cmty in CmtyV: output_str += str(len(Cmty)) + '\n' with open(output_path, 'w') as f: f.write(output_str)
def get_communities(G_Undir, chords_dict): print("************") print("Communities") snap.DelSelfEdges(G_Undir) CmtyV = snap.TCnComV() modularity = snap.CommunityCNM(G_Undir, CmtyV) for Cmty in CmtyV: print "Community: size", Cmty.Len() for NI in Cmty: print chords_dict[NI] print "" print "" print "The modularity of the network is %f" % modularity
def detectCommunities(self, algo="CNM", snapgraph=None, H=None, nxGraph=None, write=True, printout=True): ''' Detect communities using Clauset-Newman-Moore modularity-based greedy algorithm or Girvan-Neman betweeness-centrality based algorithm. Returns results as a dictionary and saves a text-file version. ''' # Creates a SNAP Graph Object if none is provided if snapgraph == None: snapgraph, H = self.buildSnapGraph(networkxGraph=nxGraph) # G = snapgraph # Detect community and calculate modularity networkxGraph = H.Graph start = time.time() CmtyV = snap.TCnComV() if algo == "CNM": modularity = snap.CommunityCNM(snapgraph, CmtyV) else: modularity = snap.CommunityGirvanNewman(snapgraph, CmtyV) if printout == True: i = 1 for Cmty in CmtyV: print(f"Community {i}: ") print(list(Cmty)) # communities.append(list(Cmty)) print("\n") i += 1 print("The modularity of the network is %f" % modularity) print(f"Time : {time.time()-start} seconds") # Save to text file at savepath if write = True if write == True: self.writeTxt(CmtyV, modularity) # Create a dataframe of community assignments communities = [] for i, cmty in enumerate(CmtyV): for c in cmty: communities.append((c, i + 1)) community_df = pd.DataFrame(communities, columns=['hashtag_id', 'CNM_Label']) # Export labeleed graph as a JSON file. nx.set_node_attributes( networkxGraph, community_df.set_index('hashtag_id').to_dict('index')) print("Community labelled graph exported as ", self.name_arg, ".json") H.exportGraph("JSON")