def run(self): snap.DelSelfEdges(self.graph) community_list = snap.TCnComV() snap.CommunityGirvanNewman(self.graph, community_list) self.community_list = list() for community in community_list: cmty = list() for node in community: cmty.append(node) self.community_list.append(cmty)
def get_communities(G_Undir, chords_dict): print("************") print("Communities") snap.DelSelfEdges(G_Undir) CmtyV = snap.TCnComV() modularity = snap.CommunityCNM(G_Undir, CmtyV) for Cmty in CmtyV: print "Community: size", Cmty.Len() for NI in Cmty: print chords_dict[NI] print "" print "" print "The modularity of the network is %f" % modularity
def generate_word_graph(hyp, poly, holo, type): if type == 0: G1 = snap.TUNGraph.New() else: G1 = snap.TNGraph.New() hypedges = set() holoedges = set() polyedges = set() idToLemma = dict() lemmaToId = dict() count = 0 for lemma_name in list(wn.all_lemma_names('n')): G1.AddNode(count) idToLemma[count] = lemma_name lemmaToId[lemma_name] = count count += 1 for lemma_name in list(wn.all_lemma_names('n')): if hyp: for synset in wn.synsets(lemma_name, "n"): for synset2 in synset.hyponyms() + synset.instance_hyponyms(): for lemma_name2 in synset2.lemma_names(): lemma_name2 = lemma_name2.lower() if type in [0, 1]: G1.AddEdge(lemmaToId[lemma_name], lemmaToId[lemma_name2]) hypedges.add((lemmaToId[lemma_name], lemmaToId[lemma_name2])) else: G1.AddEdge(lemmaToId[lemma_name2], lemmaToId[lemma_name]) hypedges.add((lemmaToId[lemma_name2], lemmaToId[lemma_name])) if poly: for synset in wn.synsets(lemma_name, "n"): for lemma_name2 in synset.lemma_names(): lemma_name2 = lemma_name2.lower() G1.AddEdge(lemmaToId[lemma_name], lemmaToId[lemma_name2]) polyedges.add( (lemmaToId[lemma_name], lemmaToId[lemma_name2])) if holo: for synset in wn.synsets(lemma_name, "n"): for synset2 in synset.member_holonyms() + synset.part_holonyms( ) + synset.substance_holonyms(): for lemma_name2 in synset2.lemma_names(): lemma_name2 = lemma_name2.lower() G1.AddEdge(lemmaToId[lemma_name], lemmaToId[lemma_name2]) hypedges.add( (lemmaToId[lemma_name], lemmaToId[lemma_name2])) snap.DelSelfEdges(G1) return G1, idToLemma, lemmaToId, hypedges, polyedges, holoedges
def GetNetworkDegree(filePath): All_set = snap.LoadEdgeList(snap.PUNGraph, filePath, 0, 1) # 载入训练网络 snap.DelSelfEdges(All_set) # 删除自连边 snap.DelZeroDegNodes(All_set) # 删除度为0的结点 degs = dict() for NI in All_set.Nodes(): degs[str(NI.GetId())] = NI.GetDeg() f = open(filePath + ".deg", "w") for (k, v) in degs.items(): f.write(str(k) + "\t" + str(v) + "\r\n") f.close() return degs
def loadCollabNet(path): """ :param - path: path to edge list file return type: snap.PUNGraph return: Graph loaded from edge list at `path and self edges removed Do not forget to remove the self edges! """ ############################################################################ # TODO: Your code here! Graph = snap.LoadEdgeList(snap.PUNGraph, 'CA-GrQc.txt', 0, 1) snap.DelSelfEdges(Graph) ############################################################################ return Graph
def loadCollabNet(path): """ :param - path: path to edge list file return type: snap.PUNGraph return: Graph loaded from edge list at `path and self edges removed Do not forget to remove the self edges! """ ############################################################################ # TODO: Your code here! Graph = snap.LoadEdgeList(snap.PUNGraph, path, 0, 1) snap.DelSelfEdges(Graph) # snap.DrawGViz(Graph, snap.gvlDot, 'collab.png', 'real world collaboration') ############################################################################ return Graph
def loadCollabNet(path): """ :param - path: path to edge list file return type: snap.PUNGraph return: Graph loaded from edge list at `path and self edges removed Do not forget to remove the self edges! """ ############################################################################ # TODO: Your code here! # Repeats are automatically ignored when loading an (un)directed graph Graph = snap.LoadEdgeList(snap.PUNGraph, path, 0, 1) # remove self-edges snap.DelSelfEdges(Graph) ############################################################################ return Graph
def main(): """ See usage message in module header block """ get_subgraph = False # if True discard nodes without attribute data try: opts, args = getopt.getopt(sys.argv[1:], "d") except: usage(sys.argv[0]) for opt, arg in opts: if opt == "-d": get_subgraph = True else: usage(sys.argv[0]) if len(args) != 1: usage(sys.argv[0]) data_dir = args[0] outputdir = '.' sys.stdout.write('loading data from ' + data_dir + '...') start = time.time() datazipfile = data_dir + os.path.sep + 'physician-shared-patient-patterns-2014-days30.zip' G = load_physician_referral_data(datazipfile) print time.time() - start, 's' snap.PrintInfo(G) # Remove loops (self-edges). # G is a PNGraph so multiple edges not allowed in this type anyway. snap.DelSelfEdges(G) snap.PrintInfo(G) # specify ordered nodelist to map sequential ids to original ids consistent nodelist = [node.GetId() for node in G.Nodes()] graph_filename = outputdir + os.path.sep + "physician_referall_arclist" + os.path.extsep + "txt" nodeid_filename = outputdir + os.path.sep + "nodeid" + os.path.extsep + "txt" write_graph_file(graph_filename, G, nodelist) write_subgraph_nodeids(nodeid_filename, nodelist)
def main(genre): G_Multi, G_Directed, G_Undirected, dict = load_genre_graphs(genre) snap.DelSelfEdges(G_Undirected) print(G_Undirected.GetNodes()) node_id_to_pos = {} pos_to_node_id = {} i = 0 for NI in G_Undirected.Nodes(): node_id_to_pos[NI.GetId()] = i pos_to_node_id[i] = NI.GetId() i += 1 S, T, A, D = normalized_cut_minimization(G_Undirected, node_id_to_pos) S_chords = [dict[pos_to_node_id[pos]] for pos in S] T_chords = [dict[pos_to_node_id[pos]] for pos in T] print S_chords print '' print T_chords
def NetworkModel(filePath, TRY_TIMES, MAX_WAIK_LENGTH, MAX_TEST_TIMES, WALK_belta, WINDOW, V_SIZE): All_set = snap.LoadEdgeList(snap.PUNGraph, filePath, 0, 1) # 载入训练网络 snap.DelSelfEdges(All_set) # 删除自连边 snap.DelZeroDegNodes(All_set) # 删除度为0的结点 for X in range(TRY_TIMES): if (os.path.exists(filePath + "_m" + str(MAX_TEST_TIMES) + "_s" + str(V_SIZE) + "_w" + str(WINDOW) + "_t" + str(X) + ".vec")): continue mymodel = train_net2vec_total(All_set, MAX_WAIK_LENGTH, WALK_belta, WINDOW, V_SIZE, MAX_TEST_TIMES) # 训练结点的分布式表达 mymodel.wv.save_word2vec_format(filePath + "_m" + str(MAX_TEST_TIMES) + "_s" + str(V_SIZE) + "_w" + str(WINDOW) + "_t" + str(X) + ".vec", binary=False) return
def partly_undir_rewire(G, spokes): spokes_copy = copy.deepcopy(spokes) rewired = snap.GenRndGnm(snap.PNGraph, G.GetNodes(), 0) # Add undirected edges total_undirected = np.sum(spokes_copy[:,2]) while total_undirected > 1: undir_edges = spokes_copy[:,2] nonzero_stubs = np.where(undir_edges != 0)[0] probs = undir_edges[nonzero_stubs] / total_undirected random_stubs = np.random.choice(nonzero_stubs, size=2, p=probs) if random_stubs[0] == random_stubs[1]: continue rewired.AddEdge(random_stubs[0], random_stubs[1]) rewired.AddEdge(random_stubs[1], random_stubs[0]) spokes_copy[random_stubs[0],2] -= 1 spokes_copy[random_stubs[1],2] -= 1 total_undirected = np.sum(spokes_copy[:,2]) # Add in/out edges total_directed = np.sum(spokes_copy[:,0:2]) while total_directed > 1: out_edges = spokes_copy[:,0] in_edges = spokes_copy[:,1] nonzero_out_stubs = np.where(out_edges != 0)[0] out_probs = out_edges[nonzero_out_stubs] / np.sum(out_edges) nonzero_in_stubs = np.where(in_edges != 0)[0] in_probs = in_edges[nonzero_in_stubs] / np.sum(in_edges) random_out = np.random.choice(nonzero_out_stubs, p=out_probs) random_in = np.random.choice(nonzero_in_stubs, p=in_probs) if random_out == random_in: continue rewired.AddEdge(random_out, random_in) spokes_copy[random_out,0] -= 1 spokes_copy[random_in,1] -= 1 total_directed = np.sum(spokes_copy[:,0:2]) snap.DelSelfEdges(rewired) return rewired
def graph_cleaning(file_path): Graph, H = load_graph(file_path) Graph = snap.GetMxWcc(Graph) snap.DelSelfEdges(Graph) nodes_set = set() for NI in Graph.Nodes(): nodes_set.add(NI.GetId()) with open(file_path, 'r') as f: raw_list = f.read().split('\n') edges_list = [edge_str.split() for edge_str in raw_list] with open(file_path, 'w') as f: print '-----clear' with open(file_path, 'a') as f: for edge in edges_list: if len(edge) == 0: continue if H.GetKeyId(edge[0]) not in nodes_set: continue edge_cleaned = list() for node in edge: if H.GetKeyId(node) in nodes_set: edge_cleaned.append(node) f.write(' '.join(edge_cleaned) + '\n')
def generate_meaning_graph(hyp, poly, holo): global numImp G1 = snap.TUNGraph.New() print wn.synsets('festoon') hypedges = set() holoedges = set() polyedges = set() idToSynset = dict() synsetToId = dict() count = 0 numEl = 0 for synset in list(wn.all_synsets('n')): if synset == wn.synset('benthos.n.01'): print synset numImp = count print count G1.AddNode(count) idToSynset[count] = synset synsetToId[synset] = count count += 1 for synset in list(wn.all_synsets('n')): if hyp: for synset2 in synset.hyponyms() + synset.instance_hyponyms(): G1.AddEdge(synsetToId[synset], synsetToId[synset2]) hypedges.add((synsetToId[synset], synsetToId[synset2])) if poly: for lemma_name in synset.lemma_names(): for synset2 in wn.synsets(lemma_name, "n"): G1.AddEdge(synsetToId[synset], synsetToId[synset2]) polyedges.add((synsetToId[synset], synsetToId[synset2])) if holo: for synset2 in synset.member_holonyms() + synset.part_holonyms( ) + synset.substance_holonyms(): G1.AddEdge(synsetToId[synset], synsetToId[synset2]) holoedges.add((synsetToId[synset], synsetToId[synset2])) snap.DelSelfEdges(G1) return G1, idToSynset, synsetToId, hypedges, polyedges, holoedges
def main(): """ See usage message in module header block """ get_subgraph = False # if True discard nodes without attribute data try: opts, args = getopt.getopt(sys.argv[1:], "d") except: usage(sys.argv[0]) for opt, arg in opts: if opt == "-d": get_subgraph = True else: usage(sys.argv[0]) if len(args) != 1: usage(sys.argv[0]) data_dir = args[0] outputdir = '.' sys.stdout.write('loading data from ' + data_dir + '...') start = time.time() (G, patdata, colnames) = load_nber_patent_data(data_dir) print time.time() - start, 's' snap.PrintInfo(G) # Remove loops (self-edges). # There is actually for some reason one loop (patent id 5489070). # G is a PNGraph so multiple edges not allowed in this type anyway. snap.DelSelfEdges(G) snap.PrintInfo(G) # We do not add attributes to nodes as SNAP node attribute as # these seem to get lost by varoius operations including subgraph # that we need to use, so instead maintain them just in the # dictionary mapping the original node ids to the attributes - # fortunately the original node ids are maintained by # GetSubGraph() so we can used these to index the patdata # dictoinary in the subgraphs # Cannot do this: #patdata[:][colnames['COUNTRY']] = convert_to_int_cat(patdata[:][colnames['COUNTRY']]) # like factor in R # as get "TypeError: unhashable type" so have to do this instead: id_countries = [(k, p[colnames['COUNTRY']]) for (k, p) in patdata.iteritems()] id_countries_int = convert_to_int_cat([x[1] for x in id_countries]) for i in xrange(len(id_countries)): patdata[id_countries[i][0]][colnames['COUNTRY']] = id_countries_int[i] for attr in ['COUNTRY']: sys.stdout.write('There are %d NA for %s\n' % ([p[colnames[attr]] for p in patdata.itervalues()].count('NA'), attr)) id_states = [(k, p[colnames['POSTATE']]) for (k, p) in patdata.iteritems()] id_states_int = convert_to_int_cat([x[1] for x in id_states]) for i in xrange(len(id_states)): patdata[id_states[i][0]][colnames['POSTATE']] = id_states_int[i] for attr in ['POSTATE']: sys.stdout.write('There are %d NA for %s\n' % ([p[colnames[attr]] for p in patdata.itervalues()].count('NA'), attr)) # There are 3774768 unique patent identifiers in the citation data but # only 2923922 unique patent identifiers in the patent data (patdata). # The size of the set intersection of these patent ids is 2755865 # i.e. there is patent data for 73% of the patents in the citation network. # Presumably this is because the patdata (pat63_99.txt) contains all # utilit patents in the period 1963 to 1999 but the citation data # cit75_99.txt contains all US patent citations for utility patents # granted in the period 1975 to 1999, so there are patent ids in here # from earlier periods that are cited by patents in that period, # for which therefore we don't have the patent data (prior to 1963). # So we have to set the data for all patents in network that we have it # for, and the rest (27%) to NA. nodelist = list( ) # keep the iteration below in list so we always use same order in future if get_subgraph: # get subgraph induced by nodes that have patent data in the # pat63_99.txt file nodeVec = snap.TIntV() # nodelist in TIntV format for use in SNAP for node in G.Nodes(): patid = node.GetId() if patdata.has_key(patid): nodelist.append(patid) nodeVec.Add(patid) G = snap.GetSubGraph(G, nodeVec) print 'Subgraph with only nodes with patent attribute data:' snap.PrintInfo(G) else: # keep all the graph and just put NA for all data attributes citepatent_count = 0 patentdata_count = 0 for node in G.Nodes(): citepatent_count += 1 patid = node.GetId() nodelist.append(patid) #print citepatent_count, patentdata_count, patid #XXX if not patdata.has_key(patid): #print 'NA for ', patid #XXX patdata[patid] = len(colnames) * ["NA"] patdata[patid][ colnames['HASDATA']] = 0 # no data on this patent else: patentdata_count += 1 sys.stdout.write( "There are %d unique cited/citing patents of which %d (%f%%) have patent data\n" % (citepatent_count, patentdata_count, 100 * float(patentdata_count) / citepatent_count)) graph_filename = outputdir + os.path.sep + "patent_citations" + os.path.extsep + "txt" write_graph_file(graph_filename, G, nodelist) attributes_binary_filename = outputdir + os.path.sep + "patent_binattr" + os.path.extsep + "txt" attributes_categorical_filename = outputdir + os.path.sep + "patent_catattr" + os.path.extsep + "txt" attributes_continuous_filename = outputdir + os.path.sep + "patent_contattr" + os.path.extsep + "txt" write_attributes_file_binary(attributes_binary_filename, G, nodelist, patdata, colnames) write_attributes_file_categorical(attributes_categorical_filename, G, nodelist, patdata, colnames) write_attributes_file_continuous(attributes_continuous_filename, G, nodelist, patdata, colnames) nodeid_filename = outputdir + os.path.sep + "nodeid" + os.path.extsep + "txt" write_subgraph_nodeids(nodeid_filename, nodelist)
def main(): """ See usage message in module header block """ get_subgraph = False # if True discard nodes without attribute data try: opts,args = getopt.getopt(sys.argv[1:], "d") except: usage(sys.argv[0]) for opt,arg in opts: if opt == "-d": get_subgraph = True else: usage(sys.argv[0]) if len(args) != 1: usage(sys.argv[0]) data_dir = args[0] outputdir = '.' sys.stdout.write('loading data from ' + data_dir + '...') start = time.time() (G, patdata, colnames) = load_epo_patent_data(data_dir) print time.time() - start, 's' snap.PrintInfo(G) # Remove loops (self-edges). # There is actually for some reason 92 nodes with self-loops # e.g. EP0021443 # G is a PNGraph so multiple edges not allowed in this type anyway. snap.DelSelfEdges(G) snap.PrintInfo(G) # We do not add attributes to nodes as SNAP node attribute as # these seem to get lost by varoius operations including subgraph # that we need to use, so instead maintain them just in the # dictionary mapping the original node ids to the attributes - # fortunately the original node ids are maintained by # GetSubGraph() so we can used these to index the patdata # dictoinary in the subgraphs # convert categorical attribute values to integers like factor in R for cat_colname in ['Language','Country']: catvalues = [(k, p[colnames[cat_colname]]) for (k,p) in patdata.iteritems()] catvalues_int = convert_to_int_cat([x[1] for x in catvalues]) for i in xrange(len(catvalues)): patdata[catvalues[i][0]][colnames[cat_colname]] = catvalues_int[i] sys.stdout.write('There are %d NA for %s\n' % ([p[colnames[cat_colname]] for p in patdata.itervalues()].count('NA'), cat_colname)) # convert categorical set attribute values to integers like factor in R for set_colname in ['Classes','Sections']: setvalues = [(k, p[colnames[set_colname]]) for (k,p) in patdata.iteritems()] setvalues_int = convert_to_int_set([x[1].split(',') for x in setvalues]) for i in xrange(len(setvalues)): patdata[setvalues[i][0]][colnames[set_colname]] = setvalues_int[i] sys.stdout.write('There are %d NA for %s\n' % ([p[colnames[set_colname]] for p in patdata.itervalues()].count('NA'), set_colname)) nodelist = list() # keep the iteration below in list so we always use same order in future if get_subgraph: # get subgraph induced by nodes that have patent data in the # pat63_99.txt file nodeVec = snap.TIntV() # nodelist in TIntV format for use in SNAP for node in G.Nodes(): patid = node.GetId() if patdata.has_key(patid): nodelist.append(patid) nodeVec.Add(patid) G = snap.GetSubGraph(G, nodeVec) print 'Subgraph with only nodes with patent attribute data:' snap.PrintInfo(G) else: # keep all the graph and just put NA for all data attributes citepatent_count = 0 patentdata_count = 0 for node in G.Nodes(): citepatent_count += 1 patid = node.GetId() nodelist.append(patid) #print citepatent_count, patentdata_count, patid #XXX if not patdata.has_key(patid): #print 'NA for ', patid #XXX patdata[patid] = len(colnames)*["NA"] else: patentdata_count += 1 sys.stdout.write("There are %d unique cited/citing patents of which %d (%f%%) have patent data\n" % (citepatent_count, patentdata_count, 100*float(patentdata_count)/citepatent_count)) graph_filename = outputdir + os.path.sep + "patent_citations" + os.path.extsep + "txt" write_graph_file(graph_filename, G, nodelist) attributes_binary_filename = outputdir + os.path.sep + "patent_binattr" + os.path.extsep + "txt" attributes_categorical_filename = outputdir + os.path.sep + "patent_catattr" + os.path.extsep + "txt" attributes_continuous_filename = outputdir + os.path.sep + "patent_contattr" + os.path.extsep + "txt" attributes_set_filename = outputdir + os.path.sep + "patent_setattr" + os.path.extsep + "txt" write_attributes_file_binary(attributes_binary_filename, G, nodelist, patdata, colnames) write_attributes_file_categorical(attributes_categorical_filename, G, nodelist, patdata, colnames) write_attributes_file_continuous(attributes_continuous_filename, G, nodelist, patdata, colnames) write_attributes_file_set(attributes_set_filename, G, nodelist, patdata, colnames) nodeid_filename = outputdir + os.path.sep + "nodeid" + os.path.extsep + "txt" write_subgraph_nodeids(nodeid_filename, nodelist) # write patent sections as original letters before converting to int # This cannot be used by EstimNetDirected but is useful to read in R # and factor there so that the original names are preserved sections_filename = outputdir + os.path.sep + "patent_string_categories" + os.path.extsep + "txt" attrnames = ['CPCsections','LanguageCode','CountryCode'] with open(sections_filename, 'w') as f: f.write(' '.join(attrnames) + '\n') for i in nodelist: for attrname in attrnames: val = patdata[i][colnames[attrname]] val = 'NA' if (val == 'NA' or val == 'XX') else val f.write(val) if attrname == attrnames[-1]: f.write('\n') else: f.write(' ' )
def main(): # Load data nodes = pd.read_csv("../data/nodes.csv", sep='\t', index_col=0) # Data in nice form headers = list(nodes.columns) nodes = np.asarray(nodes) # Load social network accordingly if path.exists("../data/youtube.graph"): FIn = snap.TFIn("../data/youtube.graph") social_network = snap.TNGraph.Load(FIn) else: edges = pd.read_csv("../data/edges.csv", sep='\t', index_col=0) edges = np.asarray(edges).astype(int) social_network = data2dag(edges, nodes.shape[0]) # Check for self edges for e in social_network.Edges(): if e.GetSrcNId() == e.GetDstNId(): print("Self Loop Found:", e.GetSrcNId()) # CNM Algorithm from snap.py print("Computing CNM") start = timeit.default_timer() CmtyV = snap.TCnComV() undirected = snap.ConvertGraph(snap.PUNGraph, social_network) snap.DelSelfEdges(undirected) the_modularity = snap.CommunityCNM(undirected, CmtyV) stop = timeit.default_timer() node_to_cmty = np.zeros(nodes.shape[0]) cmty_sizes = np.zeros(len(CmtyV)) for i in range(len(CmtyV)): for node in CmtyV[i]: node_to_cmty[node] = i cmty_sizes[i] = len(CmtyV[i]) cmtys = [[node for node in cmty] for cmty in CmtyV] ''' edges = pd.read_csv("../data/edges.csv", sep='\t', index_col=0) edges = np.asarray(edges).astype(int) G = nx.Graph() G.add_nodes_from(range(nodes.shape[0])) G.add_edges_from(list(map(tuple, edges))) ''' #assert(is_partition(G, cmtys)) #print("Calculating Modularity") #modul = modularity(G, cmtys) print("Results from Clauset-Newman-Moore:") #print("Modularity:",modul) print("Number of clusters:", len(CmtyV)) print("Time elapsed:", stop - start) # Fun category stuff to do upload_col = headers.index('category') categories = set() for i in range(nodes.shape[0]): categories.add(nodes[i][upload_col]) idx_to_categories = list(categories) print("Number of categories:", len(idx_to_categories)) categories_to_idx = dict() for i in range(len(idx_to_categories)): categories_to_idx[idx_to_categories[i]] = i # Communities and categories cmty_category_count = np.zeros((len(CmtyV), len(idx_to_categories))) for i in range(nodes.shape[0]): cmty_category_count[int(node_to_cmty[i]), categories_to_idx[nodes[i][upload_col]]] += 1 cmty_category_count = cmty_category_count / cmty_sizes[:, np.newaxis] # Create graphs per category plt.figure() plt.plot(sorted(np.max(cmty_category_count, axis=1), reverse=True), label="Top proportion") plt.plot(0.5 * np.ones(cmty_category_count.shape[0]), label="Majority Threshold", linestyle='dashed') plt.title("Category Proportions in Clusters") plt.xlabel("Cluster") plt.ylabel("Proportion") plt.legend() plt.savefig("../figures/category_top_clusters.png") ''' for i in range(cmty_category_count.shape[0]): top_category = np.argmax(cmty_category_count[i]) print("Community "+str(i)+": "+str(idx_to_categories[top_category])+",",cmty_category_count[i][top_category]) ''' '''
def deleteSelfEdges(self): snap.DelSelfEdges(self.rawGraph)
useredges.to_csv('temp/mergededges.csv', index=None) # Build graph from temp files using SNAP library context = snap.TTableContext() e_schema = snap.Schema() e_schema.Add(snap.TStrTAttrPr("source", snap.atStr)) e_schema.Add(snap.TStrTAttrPr("target", snap.atStr)) n_schema = snap.Schema() n_schema.Add(snap.TStrTAttrPr("username", snap.atStr)) edgetable = snap.TTable.LoadSS(e_schema, 'temp/mergededges.csv', context, ",", snap.TBool(True)) nodetable = snap.TTable.LoadSS(n_schema, 'temp/mergednodes.csv', context, ",", snap.TBool(True)) edgeattrv = snap.TStrV() nodeattrv = snap.TStrV() nodeattrv.Add("username") net = snap.ToNetwork(snap.PNEANet, edgetable, "source", "target", edgeattrv, nodetable, "username", nodeattrv, snap.aaFirst) # Need to remove self-edges to compute rich club coefficient snap.DelSelfEdges(net) # Store the results name = str(pid) + '_usergraph' snap.SaveEdgeListNet(net, outpath + name + '.csv', 'Network of issues, PR and commits') generateTables(outpath, name, net)
def main(): Component = snap.TIntPrV() #loading the real world graph realWorld = snap.LoadEdgeList(snap.PUNGraph, "CA-HepTh.txt", 0, 1) #deleting the self-edges from the graph snap.DelSelfEdges(realWorld) #calling the function wikiVotingNetwork() #Taking number of nodes in a graph from real world network n = realWorld.GetNodes() #Generating an Undirected Graph G = snap.TUNGraph.New() #Taking number of edges in a graph from user e = int(raw_input('Enter the number of Random Edges : ')) p = float( raw_input('Enter the Probability of Edges between Nodes from 0-1 : ')) #Generating Number of Nodes for i in range(n): #Adding Nodes into the graph G.AddNode(i) #calling the function erdosRenyi(G, p) #Printing the Clustering print 'Erdos Renyi Clustering Co-efficient: ', clustCoefficient(G) diam = snap.GetBfsFullDiam(G, 9877, False) #printing the diameter print 'Erdos Renyi Diameter: ', diam #plotting the graph snap.PlotOutDegDistr(G, "Erdos-Renyi", "Un-Directed graph - Out-Degree Distribution") snap.GetSccSzCnt(G, Component) for comp in Component: #printing number of strongly connected components with size print "Size: %d - Number of Connected Component in Erdos-Renyi: %d" % ( comp.GetVal1(), comp.GetVal2()) #printing fraction of nodes and edges print "Fraction of Nodes and Edges in Erdos Renyi: ", snap.GetMxSccSz(G) #Drawing a Erdos Renyi Graph snap.DrawGViz(G, snap.gvlDot, "erdosRenyi1.png", "Erdos Renyi") #calling the function smallWorldRandomNetwork(G, e) #printing the clustering coefficient print 'Small World Random Network Clustering Co-efficient: ', clustCoefficient( G) diam = snap.GetBfsFullDiam(G, 9877, False) #printing the diameter print 'Small World Random Network Diameter: ', diam snap.GetSccSzCnt(G, Component) for comp in Component: #printing number of strongly connected components with size print "Size: %d - Number of Connected Component in Small World: %d" % ( comp.GetVal1(), comp.GetVal2()) #fraction of nodes and edges in small world print "Fraction of Nodes and Edges in Small World: ", snap.GetMxSccSz(G) #plotting the graph snap.PlotOutDegDistr(G, "Small-World", "Un-Directed graph - Out-Degree Distribution") #drawinf the graph snap.DrawGViz(G, snap.gvlDot, "smallWorld1.png", "Small World Random Network") #calculating the clustering co-efficient print 'Real World Random Network Clustering Co-efficient: ', clustCoefficient( realWorld) diam = snap.GetBfsFullDiam(G, 9877, False) print 'Real World Random Network Diameter: ', diam snap.GetSccSzCnt(realWorld, Component) for comp in Component: #printing number of strongly connected components with size print "Size: %d - Number of Weekly Connected Component in Real World: %d" % ( comp.GetVal1(), comp.GetVal2()) #printing fraction of nodes and edges print "Fraction of Nodes and Edges in Small World: ", snap.GetMxSccSz( realWorld) #plotting the real world network graph snap.PlotOutDegDistr(realWorld, "real-World", "Un-Directed graph - Out-Degree Distribution") #Drawing Real WOrld Graph snap.DrawGViz(realWorld, snap.gvlDot, "realWorld.png", "Real World Random Network")
# If an edge exists to or from a node in CnCom, connect that edge to the new representative node. for NI in graph.Nodes(): if NI.GetId() in nodes: for Id_out in NI.GetOutEdges(): graph.AddEdge(num_nodes, Id_out) for Id_in in NI.GetInEdges(): graph.AddEdge(Id_in, num_nodes) # Delete all nodes in CnCom for NI in nodes: node_map_SCC[NI] = num_nodes graph.DelNode(NI) # Delete all self loops and save graph as the SCC graph snap.DelSelfEdges(graph) graph.Defrag() snap.SaveEdgeList(graph, file_name + "SCC.txt", "Save as tab-separated list of edges") # Section of code responsible for computing sets of nodes that have the same descendants # Create a bfs tree from every node and map each node to a set of all its descendants for NI in graph.Nodes(): BfsTree = snap.GetBfsTree(graph, NI.GetId(), True, False) nodes = set() for EI in BfsTree.Edges(): nodes.add(EI.GetDstNId()) all_descendants[NI.GetId()] = nodes # Iterate over the list of all descendants to pair the nodes that have the same descendants for k1, v1 in all_descendants.items():
sw_eed = 0 for i in range(len(sw_qk)): sw_eed += (sw_keys[i] - 1) * sw_qk[i] / sw_sumq print 'SW Expected Excess Degree:', sw_eed sw_ed = 0 for i in range(len(sw_values_p)): sw_ed += sw_keys[i] * sw_values_p[i] print 'SW Expected Degree:', sw_ed ax.plot(sw_keys, sw_qk, marker='*', linestyle='-.', label='SW') # Real-World Collaboration Network colab_net = snap.LoadEdgeList(snap.PUNGraph, "ca-GrQc.txt", 0, 1, '\t') snap.DelSelfEdges(colab_net) ca_deg_dist = {} ca_keys = [] ca_values = [] for n in colab_net.Nodes(): if n.GetOutDeg() in ca_deg_dist: ca_deg_dist[n.GetDeg()] += 1 else: ca_deg_dist[n.GetDeg()] = 1 for key in sorted(ca_deg_dist.iterkeys()): ca_keys.append(key) ca_values.append(ca_deg_dist[key])
def main(): # Number of nodes n = int(raw_input("Please enter the number of nodes")) # Probability of an edge between nodes p = float( raw_input( "Please enter the value of probability of an edge between nodes")) # Random Input of x pairs of nodes x = int(raw_input("Please enter the number of random, x pairs of nodes:")) # Empty graph and add nodes ERM = Empty_graph(n) # Add edges to the graph using personal Erdos Renyi Model Erdos_Renyi(ERM, p) # Erdos Renyi Clustering Coeffecient print("Clustering Coeffecient: ", clustering_coffecient(ERM)) # Diameter diameter_ERM = snap.GetBfsEffDiamAll(ERM, 10, False) print(diameter_ERM[2]) # Largest Strongly Connected Component print("Largest Strongly Connected Component - Maximum size:", snap.GetMxSccSz(Small_world)) # Largest Size of Graph ERM_size = snap.GetMxScc(ERM).GetEdges() print(ERM_size) # Plot of Degree Distribution snap.PlotOutDegDistr(ERM, "ERMGraph", "ERM Degree Distribution") # Add Small World Network Small_world = Empty_graph(n) first_edges(Small_world) second_edges(Small_world) random_edges(Small_world, x) # Small World Clustering Coeffecient print("Clustering Coeffecient: ", clustering_coffecient(Small_world)) # Diameter diameter_Small_world = snap.GetBfsEffDiamAll(Small_world, 10, False) print(diameter_Small_world[2]) # Largest Strongly Connected Component print("Largest Strongly Connected Component - Maximum size:", snap.GetMxSccSz(Small_world)) # Largest Size of Graph Small_world_size = snap.GetMxScc(Small_world).GetEdges() print(Small_world_size) # Plot of Degree Distribution snap.PlotOutDegDistr(Small_world, "SmallWorldGraph", "Small World Degree Distribution") # Add Collaboration Network Collaboration_Network = snap.LoadEdgeList(snap.PUNGraph, "CA-HepTh.txt", 0, 1) snap.DelSelfEdges(Collaboration_Network) snap.PrintInfo(Collaboration_Network, "Graph Statistics", "info.txt", False) # Collaboration Network Clustering Coeffecient print("Clustering Coeffecient: ", clustering_coffecient(Collaboration_Network)) # Diameter diameter_Collaboration_Network = snap.GetBfsEffDiamAll( Collaboration_Network, 10, False) print(diameter_Collaboration_Network[2]) # Largest Strongly Connected Component print("Largest Strongly Connected Component - Maximum size:", snap.GetMxSccSz(Collaboration_Network)) # Largest Size of Graph Collaboration_Network_size = snap.GetMxScc( Collaboration_Network).GetEdges() print(Collaboration_Network_size) # Plot of Degree Distribution snap.PlotOutDegDistr(Collaboration_Network, "CollaborationNetworkGraph", "Collaboration Network Degree Distribution")
return def have_common_friends(G, a, b, node_is_B): friends_a = [] for Id in G.GetNI(a).GetOutEdges(): if Id != a and Id != b and not node_is_B[Id]: friends_a.append(Id) for Id in G.GetNI(b).GetOutEdges(): if Id in friends_a: return True return False LoadedGraph = snap.LoadEdgeList(snap.PUNGraph, "Slashdot0902.txt", 0, 1, '\t') snap.DelSelfEdges(LoadedGraph) random.seed(datetime.now()) PRankH = snap.TIntFltH() snap.GetPageRank(LoadedGraph, PRankH) PRankH_arr = [] for item in PRankH: PRankH_arr.append((item, PRankH[item])) PRankH_arr.sort(key=itemgetter(1), reverse=True) try: f = open("p4_result.txt", "w+") except: print("Some error occurs about open file") for num_init_adopters in num_init_adopters_arr: key_nodes_Id = [] for i in range(num_init_adopters):
def loadCollabNet(path): Graph = snap.LoadEdgeList(snap.PUNGraph, path, 0, 1, '\t') snap.DelSelfEdges(Graph) return Graph
def main(): # Load data if path.exists("../data/cmty_nodes.csv"): node_upload = "../data/cmty_nodes.csv" elif path.exists("../data/nodes.csv"): node_upload = "../data/nodes.csv" else: print("NO NODES TO UPLOAD!") assert(False) pd_nodes = pd.read_csv(node_upload, sep='\t', index_col=0) # Data in nice form headers = list(pd_nodes.columns) nodes = np.asarray(pd_nodes) # Load social network accordingly if path.exists("../data/youtube.graph"): FIn = snap.TFIn("../data/youtube.graph") social_network = snap.TNGraph.Load(FIn) else: edges = pd.read_csv("../data/edges.csv", sep='\t', index_col=0) edges = np.asarray(edges).astype(int) social_network = data2dag(edges, nodes.shape[0]) # Check for self edges for e in social_network.Edges(): if e.GetSrcNId() == e.GetDstNId(): print("Self Loop Found:",e.GetSrcNId()) # CNM Algorithm from snap.py print("Computing CNM") start = timeit.default_timer() CmtyV = snap.TCnComV() undirected = snap.ConvertGraph(snap.PUNGraph, social_network) snap.DelSelfEdges(undirected) the_modularity = snap.CommunityCNM(undirected, CmtyV) stop = timeit.default_timer() node_to_cmty = np.zeros(nodes.shape[0]).astype(int) cmty_sizes = np.zeros(len(CmtyV)) for i in range(len(CmtyV)): for node in CmtyV[i]: node_to_cmty[node] = i cmty_sizes[i] = len(CmtyV[i]) cmtys = [[node for node in cmty] for cmty in CmtyV] ''' m = 0 for i in range(len(CmtyV)): Nodes = snap.TIntV() for elem in CmtyV[i]: Nodes.Add(int(elem)) m += snap.GetModularity(social_network, Nodes, social_network.GetEdges()) ''' edges = pd.read_csv("../data/edges.csv", sep='\t', index_col=0) edges = np.asarray(edges).astype(int) G = nx.Graph() G.add_nodes_from(range(nodes.shape[0])) G.add_edges_from(list(map(tuple, edges))) # Add communities to nodes col_name = "cnm_cmty" pd_nodes[col_name] = node_to_cmty pd_nodes.to_csv("../data/cmty_nodes.csv", sep='\t') assert(is_partition(G, cmtys)) print("Calculating Modularity") modul = modularity(G, cmtys) print("Results from Clauset-Newman-Moore:") print("Modularity:",modul) print("Number of clusters:",len(CmtyV)) print("Time elapsed:",stop - start) # Fun category stuff to do ''' upload_col = headers.index('category') categories = set() for i in range(nodes.shape[0]): categories.add(nodes[i][upload_col]) idx_to_categories = list(categories) print("Number of categories:",len(idx_to_categories)) categories_to_idx = dict() for i in range(len(idx_to_categories)): categories_to_idx[idx_to_categories[i]] = i # Communities and categories cmty_category_count = np.zeros((len(CmtyV),len(idx_to_categories))) for i in range(nodes.shape[0]): cmty_category_count[int(node_to_cmty[i]),categories_to_idx[nodes[i][upload_col]]] += 1 cmty_category_count = cmty_category_count/cmty_sizes[:,np.newaxis] ''' # Create graphs per category ''' plt.figure() for i in range(len(idx_to_categories)): if (str(idx_to_categories[i]) != "nan") and (idx_to_categories[i] != " UNA "): plt.plot(sorted(cmty_category_count[:,i], reverse=True), label=idx_to_categories[i]) plt.title("Category Proportions in Clusters") plt.xlabel("Cluster") plt.ylabel("Proportion") plt.legend(bbox_to_anchor=(1.04,1), loc="upper left") plt.savefig("../figures/category_proportions_clusters.png", bbox_inches="tight") ''' ''' for i in range(cmty_category_count.shape[0]): top_category = np.argmax(cmty_category_count[i]) print("Community "+str(i)+": "+str(idx_to_categories[top_category])+",",cmty_category_count[i][top_category]) ''' '''