def estimate4SubgraphFrequencies(Network, connected=True): subgraph_counts = np.zeros(10) # 0 -> 0 edges # 1 -> 1 edge # 2 -> 2 adjacent edges # 3 -> 2 non-adjacent edges # 4 -> 3-star # 5 -> 3-path # 6 -> tailed triangle # 7 -> 4-cycle # 8 -> chordal 4-cycle # 9 -> 4-clique G = snap.ConvertGraph(snap.PUNGraph, Network) for _ in range(num_samples): sG = snap.GetRndSubGraph(G, 4) num_edges = sG.GetEdges() if connected and num_edges < 3: continue if num_edges == 0: subgraph_counts[0] += 1 elif num_edges == 1: subgraph_counts[1] += 1 elif num_edges == 2: maxdeg = sG.GetNI(snap.GetMxDegNId(sG)).GetDeg() if maxdeg == 2: subgraph_counts[2] += 1 else: subgraph_counts[3] += 1 elif num_edges == 3: maxdeg = sG.GetNI(snap.GetMxDegNId(sG)).GetDeg() if maxdeg == 3: subgraph_counts[4] += 1 else: subgraph_counts[5] += 1 elif num_edges == 4: maxdeg = sG.GetNI(snap.GetMxDegNId(sG)).GetDeg() if maxdeg == 3: subgraph_counts[6] += 1 else: subgraph_counts[7] += 1 elif num_edges == 5: subgraph_counts[8] += 1 else: subgraph_counts[9] += 1 return list(subgraph_counts / sum(subgraph_counts))
def benchmark_ngraph(Graph): ''' Perform benchmark tests for Directed Graphs ''' results = {} results['num_nodes'] = Graph.GetNodes() results['num_edges'] = Graph.GetEdges() for degree in range(0, 11): num = snap.NodesGTEDegree_PNGraph(Graph, degree) percent_deg = float(num) / results['num_nodes'] results['deg_gte_%d' % degree] = num results['deg_gte_%d_percent' % degree] = percent_deg # Check for over-weighted nodes results['max_degree'] = snap.GetMxDegNId(Graph) num = snap.NodesGTEDegree_PNGraph(Graph, results['max_degree']) results['max_degree_num'] = num results['max_wcc_percent'] = snap.MxWccSz_PNGraph(Graph) \ / results['num_nodes'] results['max_scc_percent'] = snap.MxSccSz_PNGraph(Graph).GetNodes() \ / results['num_nodes'] return results
def Q1_3(Graph1, Graph2): e1 = 0 e2 = 0 avg1_cond, avg1 = 0, 0 avg2, avg2_cond = 0, 0 n = Graph1.GetNodes() for i in range(iterations): I = set() # initialize infected node with randomnode number node = snap.GetMxDegNId(Graph1) I.add(node) ipercent1 = SIR(Graph1, I, 0.05, 0.5) avg1 += ipercent1 if (ipercent1 >= 0.5): avg1_cond += ipercent1 e1 += 1 I = set() # initialize infected node with randomnode number node = snap.GetMxDegNId(Graph2) I.add(node) ipercent2 = SIR(Graph2, I, 0.05, 0.5) avg2 += ipercent2 if (ipercent2 >= 0.5): avg2_cond += ipercent2 e2 += 1 print("#################### Q1.3 #############################") print( "portion of simulation with infected with highest degree atleast > 50%" ) print("Event Epidemic Erdos Renyi Graph: ", e1 / (1.0 * iterations)) print( "Mean proportion infected for Erdos Renyi Graph (without condition) ", avg1 / (1.0 * iterations)) print("Mean proportion infected for Erdos Renyi Graph (with condition) ", avg1_cond / (1.0 * e1)) print("Event Epidemic Preferrential Attachment Graph: ", e2 / (1.0 * iterations)) print( "Mean proportion infected for Preferrential Attachment Graph (without condition) ", avg2 / (1.0 * iterations)) print( "Mean proportion infected for Preferrential Attachment Graph (with condition) ", avg2_cond / (1.0 * e2))
def PredictKey(Graph, key, limit): NId1 = snap.GetMxDegNId(Graph) Node3 = Graph.GetNI(NId1) print NId1, len(Set(Node3.GetOutEdges())) TrainGraph = copy_graph(Graph) TestGraph = copy_graph(Graph) print Graph.GetNodes(), Graph.GetEdges() for EI in Graph.Edges(): src = EI.GetSrcNId() dest = EI.GetDstNId() p = np.random.uniform() if src != NId1 and dest != NId1: continue if p < 0.2: TrainGraph.DelEdge(src, dest) else: TestGraph.DelEdge(src, dest) print Graph.GetNodes(), Graph.GetEdges() print TrainGraph.GetNodes(), TrainGraph.GetEdges() print TestGraph.GetNodes(), TestGraph.GetEdges() pairs = [] n = 0 lowest = 0 scores = [] for Node1 in TrainGraph.Nodes(): n1 = Node1.GetId() if n1 != NId1: continue for Node2 in TrainGraph.Nodes(): n2 = Node2.GetId() if n1 >= n2: continue if TrainGraph.IsEdge(n1, n2): continue #print n1,n2 score = getScore(TrainGraph, Node1, Node2, key) if n < limit: pairs.append([score, (n1, n2)]) scores.append(score) lowest = min(lowest, score) n += 1 else: if score > lowest: lowestloc = np.argmin(scores) pairs[lowestloc] = [score, (n1, n2)] scores[lowestloc] = score lowest = min(scores) pairs.sort(key=lambda x: -x[0]) nTrue = 0 for i in range(limit): #print pairs[i] if TestGraph.IsEdge(pairs[i][1][0], pairs[i][1][1]): nTrue += 1 return nTrue
def findHighestDegrees(self, search=1): copy = self.deepCopy() degrees = [] for i in range(0, search): degrees.append(snap.GetMxDegNId(copy)) copy.DelNode(degrees[i]) return degrees
def degree_distribution(): # Get node with max degree NId = snap.GetMxDegNId(G) print("max degree node", NId) # Get degree distribution DegToCntV = snap.TIntPrV() snap.GetDegCnt(G, DegToCntV) for item in DegToCntV: print("%d nodes with degree %d" % (item.GetVal2(), item.GetVal1()))
def print_info(graph): for NI in graph.Nodes(): print("node: %d, out-degree %d, in-degree %d" % (NI.GetId(), NI.GetOutDeg(), NI.GetInDeg())) print("Number of nodes: ", graph.GetNodes()) print("Number of edges: ", graph.GetEdges()) print("Maximum degree: ", graph.GetNI(snap.GetMxDegNId(graph)).GetDeg()) print("Diameter (approximate): ", snap.GetBfsFullDiam(graph, 10)) print("Triangles: ", snap.GetTriads(graph)) print("Clustering coefficient: ", snap.GetClustCf(graph))
def runRobustnessTestMax(graph, rounds=30): graph = cloneGraph(graph) result = [] originalNodesCnt = graph.GetNodes() for i in range(rounds): fractionRemoved = 1.0 - float( graph.GetNodes()) / float(originalNodesCnt) result.append((fractionRemoved, snap.GetMxSccSz(graph))) for n in range(originalNodesCnt / rounds): graph.DelNode(snap.GetMxDegNId(graph)) return result
def getDegCentr(graph): nid = snap.GetMxDegNId(graph) CDn = snap.GetDegreeCentr(graph, nid) n = graph.GetNodes() freeman_nom = 0. for NI in graph.Nodes(): CDi = snap.GetDegreeCentr(graph, NI.GetId()) freeman_nom += CDn - CDi return freeman_nom / (n - 2)
def Q3_2(): ''' Sets the global props32 and res32 variables. ''' networks = loadNetworks() global results32 results32 = runSimulations(networks, lambda G: set([snap.GetMxDegNId(G)])) printResultStatistics(results32, runSignificanceTest=False) print("\nRelative Increases:") for name in results32: prevAvgInfected = np.mean(results31[name]) avgInfected = np.mean(results32[name]) relIncreases = (avgInfected - prevAvgInfected) / prevAvgInfected print("The average proportion infected has increased by %s%% " "from %s%% to %s%% for the %s Network.\n" % (100 * relIncreases, 100 * prevAvgInfected, 100 * avgInfected, name))
def wikiVotingNetwork(): Component = snap.TIntPrV() #Loding the graph Wiki = snap.LoadEdgeList(snap.PNGraph, "Wiki-Vote.txt", 0, 1) #Printing Number of Nodes in the Graph print "Number of Nodes: ", Wiki.GetNodes() #Printing Number of Edges in the Graph print "Number of Edges: ", Wiki.GetEdges() #Printing Number of Directed Edges in the Graph print "Number of Directed Edges: ", snap.CntUniqDirEdges(Wiki) #Printing Number of Un-Directed Edges in the Graph print "Number of Undirected Edges: ", snap.CntUniqUndirEdges(Wiki) #Printing Number of Directed Edges in the Graph print "Number of Self-Edges: ", snap.CntSelfEdges(Wiki) #Printing Number of Zero InDeg Nodes in the Graph print "Number of Zero InDeg Nodes: ", snap.CntInDegNodes(Wiki, 0) #Printing Number of Zero OutDeg Nodes in the Graph print "Number of Zero OutDeg Nodes: ", snap.CntOutDegNodes(Wiki, 0) #Printing Node ID with maximum degree in the Graph print "Node ID with maximum degree: ", snap.GetMxDegNId(Wiki) snap.GetSccSzCnt(Wiki, Component) for comp in Component: #printing number of strongly connected components with size print "Size: %d - Number of Strongly Connected Components: %d" % ( comp.GetVal1(), comp.GetVal2()) #printing size of largest connected components print "Size of largest connected component: ", snap.GetMxSccSz(Wiki) snap.GetWccSzCnt(Wiki, Component) for comp in Component: #printing number of weekly connected components with size print "Size: %d - Number of Weekly Connected Component Wikipedia: %d" % ( comp.GetVal1(), comp.GetVal2()) #printing size of weekly connected components print "Size of Weakly connected component: ", snap.GetMxWccSz(Wiki) #plotting out-degree distribution snap.PlotOutDegDistr(Wiki, "wiki-analysis", "Directed graph - Out-Degree Distribution")
def print_statistics(self, outfile_name): print 'Writing to file:', outfile_name snap.PrintInfo(self.Graph, 'Python type TUNGraph', outfile_name, False) with open(outfile_name, 'a') as f: f.write('\n####More information') max_degree_node = snap.GetMxDegNId(self.Graph) for artist_id in self.ids: if self.ids[artist_id] == max_degree_node: print artist_id # These may throw gnuplot errors; if so, edit the generated .plt files to correct the errors and run # gnuplot from terminal. (May need to set terminal to svg instead of png depending on your gnuplot # installation.) snap.PlotOutDegDistr(self.Graph, 'out_degree_distr', 'Out-degree distribution') snap.PlotInDegDistr(self.Graph, 'in_degree_distr', 'In-degree distribution')
plt.ylabel('Proportion of Nodes with a Given Degree (log)') plt.title( 'Degree Distribution of Erdos Renyi, Small World, and Collaboration Networks' ) plt.legend() plt.show() # Execute code for Q1.1 Q1_1() # Problem 1.2 # Find max degree of all 3 graphs for plotting (add 2 for padding) maxdeg = max([ erdosRenyi.GetNI((snap.GetMxDegNId(erdosRenyi))).GetDeg(), smallWorld.GetNI((snap.GetMxDegNId(smallWorld))).GetDeg(), collabNet.GetNI((snap.GetMxDegNId(collabNet))).GetDeg() ]) + 2 # Erdos Renyi def calcQk(Graph, maxDeg=maxdeg): """ :param Graph - snap.PUNGraph object representing an undirected graph :param maxDeg - maximum degree(+1) for which q_k needs to be calculated return type: np.array return: array q_k of dimension maxDeg representing the excess degree distribution """
def generate_graph(n_nodes=50, out_degree=None, seed=1): """ This method generates a Graph based on the Barabasi Algorithm and computes several metrics: 1) It finds the Node with the maximum Degree. 2) It finds the Node with the maximum PageRank Score. 3) Calculates communities within the graph by using two different algorithms: a) Girvan - Newman community Detection b) Clauset-Newman-Moore community Detection. :param n_nodes: int. Specifies the number of nodes for the graph to be created. :param out_degree: int. Specifies the outer degree for each node. If None, then a random integer is generated between 5 and 20. :param seed: Int. An integer that is used to generate the same 'random' integer for the out degree. :return: Boolean. Whether the execution time of the specific community detection algorithms is over 10 minutes. """ if out_degree is None: random.seed(seed) out_degree = random.randint(5, 20) print print "Generating Graph with %s Nodes of Out Degree: %s " % (n_nodes, out_degree) # Generating a random graph based on the Barabasi Algorithm. barabasi_graph = snap.GenPrefAttach(n_nodes, out_degree) # Finding the node ID with the maximoun Degree. maximum_degree_node = snap.GetMxDegNId(barabasi_graph) # Iterating in the graph nodes in order to find the Maximum degree for this particular node. for NI in barabasi_graph.Nodes(): if NI.GetId() == maximum_degree_node: print "Node: %d, Maximum Degree %d" % (NI.GetId(), NI.GetDeg()) # Computing the PageRank score of every node in Graph # Setting the ID and the PageRank score to -1. (minimum of both of these is 0) page_rank_id, page_rank_score = -1, -1 # Creating the iterator for the PageRank algorithm. PRankH = snap.TIntFltH() # Calculating the PageRank for every Node. snap.GetPageRank(barabasi_graph, PRankH) # By iterating on each node we find the Node with the maximum PageRank Score. for node in PRankH: if PRankH[node] > page_rank_score: page_rank_score = PRankH[node] page_rank_id = node print print "Node with the Highest PageRank value: " print "Node: %s, PageRank value %s " % (page_rank_id, page_rank_score) print try: start_Girvan_Newman = time.time( ) # setting the timer for the first community detection algorithm. # Calculating Girvan - Newman community Detection Algorithm CmtyV = snap.TCnComV() snap.CommunityGirvanNewman(barabasi_graph, CmtyV) print 'Girvan-Newman community Detection Algorithm: Execution Time: ', time.time( ) - start_Girvan_Newman # Calculating Girvan-Newman community Detection Algorithm start_Clauset_Newman_Moore = time.time( ) # setting the timer for the second community detection algorithm. CmtyV = snap.TCnComV() snap.CommunityCNM(barabasi_graph, CmtyV) print 'Clauset-Newman-Moore community Detection Algorithm: Execution Time: ', time.time( ) - start_Clauset_Newman_Moore print '-' * 100 print '-' * 100 if time.time( ) - start_Girvan_Newman > 10 * 60: # if the total execution time for both algorithms is over 10 # minutes then return False in order to quit the loop that this method will be used in. return False return True except MemoryError: # if we get a memory error during the Community Detection algorithms we set to False in order # to avoid adding more Nodes when running this method in a while loop. return False
def algorithm(self, G, threshold, clique_size): #Pruning Step P = 1 T = 0 all_cliques = [] while P == 1: P = 0 for NI in G.Nodes(): NID = NI.GetId() d = NI.GetDeg() edgeList = {} if d <= threshold and d >= clique_size - 1: neighbours = [] for i in range(d - 1): for j in range(i + 1, d): a = NI.GetNbrNId(i) b = NI.GetNbrNId(j) if G.IsEdge(a, b): if a not in edgeList: edgeList[a] = [b] else: edgeList[a].append(b) for node in edgeList: neighbours = edgeList[node] if len(neighbours) == 1: cliqueList = [NID, node, neighbours[0]] if len(cliqueList) >= clique_size: all_cliques = self.ensureNoOverlap( all_cliques, cliqueList) elif len(neighbours) > 1: for i in range(len(neighbours) - 1): cliqueList = [NID, node, neighbours[i]] for j in range(i + 1, len(neighbours)): if G.IsEdge(neighbours[i], neighbours[j]): cliqueList.append(neighbours[j]) if len(cliqueList) >= clique_size: all_cliques = self.ensureNoOverlap( all_cliques, cliqueList) if d <= threshold or d < clique_size - 1: P = 1 G.DelNode(NID) for q in all_cliques: if len(q) == clique_size: print q, " Pruning" self.allCliques.append(q) T = T + 1 #Hierarchical Clustering Step if G.GetNodes() >= clique_size: H = snap.ConvertGraph(type(G), G) S = [] i = 0 while H.GetNodes() > 0: S.append([]) # randomly chosen a node with maximum degree S[i].append(snap.GetMxDegNId(H)) j = 1 TTT = True while TTT: # create an empty vector of integers s = snap.TIntV() # (Graph, StartNId, Hop: distance, NIdV: store nodes, IsDir: directed?) snap.GetNodesAtHop(H, S[i][0], j, s, True) if len(s) != 0: S[i].append(s) j = j + 1 else: TTT = False H.DelNode(S[i][0]) for j in range(1, len(S[i])): for nodeID in S[i][j]: H.DelNode(nodeID) i = i + 1 subgraphs = [[] for x in range(len(S))] #Counting Step for i in range(len(S)): for j in range(1, len(S[i])): G01 = snap.ConvertSubGraph(snap.PUNGraph, G, S[i][j]) subgraphs[i].append(G01) for i in range(len(S)): C1 = snap.TIntV() C1.Add(S[i][0]) for x in S[i][1]: C1.Add(x) C01 = snap.ConvertSubGraph(snap.PUNGraph, G, C1) all_cliques = [] for NI in C01.Nodes(): NID = NI.GetId() d = NI.GetDeg() edgeList = {} for d1 in range(d - 1): for d2 in range(d1 + 1, d): a = NI.GetNbrNId(d1) b = NI.GetNbrNId(d2) if C01.IsEdge(a, b): if a not in edgeList: edgeList[a] = [b] else: edgeList[a].append(b) for node in edgeList: neighbours = edgeList[node] if len(neighbours) == 1: cliqueList = [NID, node, neighbours[0]] if len(cliqueList) >= clique_size: all_cliques = self.ensureNoOverlap( all_cliques, cliqueList) elif len(neighbours) > 1: for d1 in range(len(neighbours) - 1): cliqueList = [NID, node, neighbours[d1]] for d2 in range(d1 + 1, len(neighbours)): if G.IsEdge(neighbours[d1], neighbours[d2]): cliqueList.append(neighbours[d2]) if len(cliqueList) >= clique_size: all_cliques = self.ensureNoOverlap( all_cliques, cliqueList) for q in all_cliques: if len(q) == clique_size: print q, " C1" self.allCliques.append(q) T = T + 1 G.DelNode(S[i][0]) for i in range(len(S)): for j in range(1, len(S[i])): for upnodeID in S[i][j]: U = [] L = [] for t in range(G.GetNI(upnodeID).GetDeg()): a = G.GetNI(upnodeID).GetNbrNId(t) #check lower level if j < len(S[i]) - 1: if subgraphs[i][j].IsNode(a): U.append(a) #check upper level if j > 1: if subgraphs[i][j - 2].IsNode(a): L.append(a) edgeList = {} for s in range(len(U)): for t in range(s + 1, len(U)): if subgraphs[i][j].IsEdge(U[s], U[t]): if U[s] not in edgeList: edgeList[U[s]] = [U[t]] else: edgeList[U[s]].append(U[t]) for s in range(len(L)): for t in range(s + 1, len(L)): if subgraphs[i][j - 2].IsEdge(L[s], L[t]): if L[s] not in edgeList: edgeList[L[s]] = [L[t]] else: edgeList[L[s]].append(L[t]) all_cliques = [] for node in edgeList: neighbours = edgeList[node] if len(neighbours) == 1: cliqueList = [upnodeID, node, neighbours[0]] if len(cliqueList) >= clique_size: all_cliques = self.ensureNoOverlap( all_cliques, cliqueList) elif len(neighbours) > 1: for d1 in range(len(neighbours) - 1): cliqueList = [ upnodeID, node, neighbours[d1] ] for d2 in range(d1 + 1, len(neighbours)): if subgraphs[i][j].IsEdge( neighbours[d1], neighbours[d2]): cliqueList.append(neighbours[d2]) elif subgraphs[i][j - 2].IsEdge( neighbours[d1], neighbours[d2]): cliqueList.append(neighbours[d2]) if len(cliqueList) >= clique_size: all_cliques = self.ensureNoOverlap( all_cliques, cliqueList) for q in all_cliques: if len(q) == clique_size: print q, " C2" self.allCliques.append(q) T = T + 1 return T
def main(): parentDir = os.getcwd() os.chdir(parentDir + "/subgraphs") sub_graph = snap.LoadEdgeList(snap.PUNGraph, sys.argv[1], 0, 1) subGraphName = sys.argv[1].split(".")[0] os.chdir(parentDir) #### 1 ######## node_count = 0 for node in sub_graph.Nodes(): node_count = node_count + 1 printWithOutNewLine("Number of nodes:", node_count) printWithOutNewLine("Number of edges:", snap.CntUniqBiDirEdges(sub_graph)) #### 2 ######## printWithOutNewLine("Number of nodes with degree=7:", snap.CntDegNodes(sub_graph, 7)) rndMaxDegNId = snap.GetMxDegNId(sub_graph) nodeDegPairs = snap.TIntPrV() snap.GetNodeInDegV(sub_graph, nodeDegPairs) maxDegVal = 0 for pair in nodeDegPairs: if (pair.GetVal1() == rndMaxDegNId): maxDegVal = pair.GetVal2() break maxDegNodes = [] for pair in nodeDegPairs: if (pair.GetVal2() == maxDegVal): maxDegNodes.append(pair.GetVal1()) print("Node id(s) with highest degree:", end=" ") print(*maxDegNodes, sep=',') #### 3 ######## sampledFullDiam = [] sampledFullDiam.append(snap.GetBfsFullDiam(sub_graph, 10, False)) sampledFullDiam.append(snap.GetBfsFullDiam(sub_graph, 100, False)) sampledFullDiam.append(snap.GetBfsFullDiam(sub_graph, 1000, False)) sampledFullDiamStats = [] sampledFullDiamStats.append(round(statistics.mean(sampledFullDiam), 4)) sampledFullDiamStats.append(round(statistics.variance(sampledFullDiam), 4)) printWithOutNewLine("Approximate full diameter by sampling 10 nodes:", sampledFullDiam[0]) printWithOutNewLine("Approximate full diameter by sampling 100 nodes:", sampledFullDiam[1]) printWithOutNewLine("Approximate full diameter by sampling 1000 nodes:", sampledFullDiam[2]) print("Approximate full diameter (mean and variance):", end=" ") print(*sampledFullDiamStats, sep=',') sampledEffDiam = [] sampledEffDiam.append(round(snap.GetBfsEffDiam(sub_graph, 10, False), 4)) sampledEffDiam.append(round(snap.GetBfsEffDiam(sub_graph, 100, False), 4)) sampledEffDiam.append(round(snap.GetBfsEffDiam(sub_graph, 1000, False), 4)) sampledEffDiamStats = [] sampledEffDiamStats.append(round(statistics.mean(sampledEffDiam), 4)) sampledEffDiamStats.append(round(statistics.variance(sampledEffDiam), 4)) printWithOutNewLine("Approximate effective diameter by sampling 10 nodes:", sampledEffDiam[0]) printWithOutNewLine( "Approximate effective diameter by sampling 100 nodes:", sampledEffDiam[1]) printWithOutNewLine( "Approximate effective diameter by sampling 1000 nodes:", sampledEffDiam[2]) print("Approximate effective diameter (mean and variance):", end=" ") print(*sampledEffDiamStats, sep=',') #### 4 ######## printWithOutNewLine("Fraction of nodes in largest connected component:", round(snap.GetMxSccSz(sub_graph), 4)) bridgeEdges = snap.TIntPrV() snap.GetEdgeBridges(sub_graph, bridgeEdges) printWithOutNewLine("Number of edge bridges:", len(bridgeEdges)) articulationPoints = snap.TIntV() snap.GetArtPoints(sub_graph, articulationPoints) printWithOutNewLine("Number of articulation points:", len(articulationPoints)) #### 5 ######## printWithOutNewLine("Average clustering coefficient:", round(snap.GetClustCf(sub_graph, -1), 4)) printWithOutNewLine("Number of triads:", snap.GetTriads(sub_graph, -1)) randomNodeId = sub_graph.GetRndNId() nodeIdCcfMap = snap.TIntFltH() snap.GetNodeClustCf(sub_graph, nodeIdCcfMap) print("Clustering coefficient of random node", end=" ") print(randomNodeId, end=": ") print(round(nodeIdCcfMap[randomNodeId], 4)) print("Number of triads random node", end=" ") print(randomNodeId, end=" participates: ") print(snap.GetNodeTriads(sub_graph, randomNodeId)) printWithOutNewLine( "Number of edges that participate in at least one triad:", snap.GetTriadEdges(sub_graph, -1)) #### plots ######## if not os.path.isdir('plots'): os.makedirs('plots') os.chdir(parentDir + "/plots") plotsDir = os.getcwd() snap.PlotOutDegDistr(sub_graph, subGraphName, subGraphName + " Subgraph Degree Distribution") snap.PlotShortPathDistr( sub_graph, subGraphName, subGraphName + " Subgraph Shortest Path Lengths Distribution") snap.PlotSccDistr( sub_graph, subGraphName, subGraphName + " Subgraph Connected Components Size Distribution") snap.PlotClustCf( sub_graph, subGraphName, subGraphName + " Subgraph Clustering Coefficient Distribution") files = os.listdir(plotsDir) for file in files: if not file.endswith(".png"): os.remove(os.path.join(plotsDir, file)) plots = os.listdir(plotsDir) filePrefix = "filename" for file in plots: nameSplit = file.split(".") if (len(nameSplit) == 2): continue if (nameSplit[0] == "ccf"): filePrefix = "clustering_coeff_" elif (nameSplit[0] == "outDeg"): filePrefix = "deg_dist_" elif (nameSplit[0] == "diam"): filePrefix = "shortest_path_" elif (nameSplit[0] == "scc"): filePrefix = "connected_comp_" os.rename(file, filePrefix + nameSplit[1] + "." + nameSplit[2]) os.chdir(parentDir)
def PredictKey1(Graph, key, limit, ktrain, ktest): NId1 = snap.GetMxDegNId(Graph) Node3 = Graph.GetNI(NId1) TrainGraph = copy_graph(Graph) TestGraph = copy_graph(Graph) for EI in Graph.Edges(): src = EI.GetSrcNId() dest = EI.GetDstNId() p = np.random.uniform() #if src!=NId1 and dest!=NId1: # continue if p < 0.5: TrainGraph.DelEdge(src, dest) else: TestGraph.DelEdge(src, dest) core = [] for Node1 in TrainGraph.Nodes(): neigh1 = len(Set(Node1.GetOutEdges())) if neigh1 >= ktrain: core.append(Node1.GetId()) for node in core: Node1 = TestGraph.GetNI(node) neigh1 = len(Set(Node1.GetOutEdges())) if neigh1 < ktest: core.remove(node) #print "Original Graph: ", Graph.GetNodes(), Graph.GetEdges() #print "Train Graph: ", TrainGraph.GetNodes(), TrainGraph.GetEdges() #print "Test Graph: ", TestGraph.GetNodes(), TestGraph.GetEdges() #print "Core Nodes: ", len(core) pairs = [] n = 0 lowest = 0 scores = [] for Node1 in TrainGraph.Nodes(): n1 = Node1.GetId() if n1 not in core: continue for Node2 in TrainGraph.Nodes(): n2 = Node2.GetId() if n2 not in core: continue if n1 >= n2: continue if TrainGraph.IsEdge(n1, n2): continue #print n1,n2 score = getScore(TrainGraph, Node1, Node2, key) if n < limit: pairs.append([score, (n1, n2)]) scores.append(score) lowest = min(lowest, score) n += 1 else: if score > lowest: lowestloc = np.argmin(scores) pairs[lowestloc] = [score, (n1, n2)] scores[lowestloc] = score lowest = min(scores) pairs.sort(key=lambda x: -x[0]) nTrue = 0 for i in range(limit): #print pairs[i] if TestGraph.IsEdge(pairs[i][1][0], pairs[i][1][1]): nTrue += 1 return nTrue
def algorithm(G, D): #Pruning Step P = 1 T = 0 while P == 1: P = 0 for NI in G.Nodes(): NID = NI.GetId() d = NI.GetDeg() if d <= D or d > G.GetNodes() - 2: if d <= D and d > 1: for i in range(d - 1): for j in range(i + 1, d): a = NI.GetNbrNId(i) b = NI.GetNbrNId(j) if G.IsEdge(a, b): T = T + 1 if d > D and d > G.GetNodes() - 2: T = T + G.GetEdges() - NI.GetDeg() P = 1 G.DelNode(NID) #Hierarchical Clustering Step if G.GetNodes() > 5: H = snap.ConvertGraph(type(G), G) S = [] i = 0 while H.GetNodes() > 0: S.append([]) S[i].append(snap.GetMxDegNId(H)) j = 1 TTT = True while TTT: s = snap.TIntV() snap.GetNodesAtHop(H, S[i][0], j, s, True) if len(s) != 0: S[i].append(s) j = j + 1 else: TTT = False H.DelNode(S[i][0]) for j in range(1, len(S[i])): for nodeID in S[i][j]: H.DelNode(nodeID) i = i + 1 subgraphs = [[] for x in range(len(S))] #Counting Step for i in range(len(S)): for j in range(1, len(S[i])): G01 = snap.ConvertSubGraph(snap.PUNGraph, G, S[i][j]) subgraphs[i].append(G01) T = T + subgraphs[i][0].GetEdges() G.DelNode(S[i][0]) for i in range(len(S)): for j in range(1, len(S[i])): for upnodeID in S[i][j]: U = [] D = [] for t in range(G.GetNI(upnodeID).GetDeg()): a = G.GetNI(upnodeID).GetNbrNId(t) if j < len(S[i]) - 1: if subgraphs[i][j].IsNode(a): U.append(a) if j > 1: if subgraphs[i][j - 2].IsNode(a): D.append(a) for s in range(len(U)): for t in range(s + 1, len(U)): if subgraphs[i][j].IsEdge(U[s], U[t]): T = T + 1 for s in range(len(D)): for t in range(s + 1, len(D)): if subgraphs[i][j - 2].IsEdge(D[s], D[t]): T = T + 1 for i in range(len(S)): for j in range(len(S[i]) - 1): T = T + algorithm(subgraphs[i][j], D) return T
def PredictKey(Graph, limit, ktrain, ktest, l): NId1 = snap.GetMxDegNId(Graph) Node3 = Graph.GetNI(NId1) TrainGraph = copy_graph(Graph) TestGraph = copy_graph(Graph) for EI in Graph.Edges(): src = EI.GetSrcNId() dest = EI.GetDstNId() p = np.random.uniform() #if src!=NId1 and dest!=NId1: # continue if p < 0.2: TrainGraph.DelEdge(src, dest) else: TestGraph.DelEdge(src, dest) core = [] for Node1 in TrainGraph.Nodes(): neigh1 = len(Set(Node1.GetOutEdges())) if neigh1 >= ktrain: core.append(Node1.GetId()) for node in core: Node1 = TestGraph.GetNI(node) neigh1 = len(Set(Node1.GetOutEdges())) if neigh1 < ktest: core.remove(node) limit = 0 for Node1 in TrainGraph.Nodes(): n1 = Node1.GetId() if n1 not in core: continue for Node2 in TrainGraph.Nodes(): n2 = Node2.GetId() if n2 not in core: continue if n1 >= n2: continue if TestGraph.IsEdge(n1, n2): limit += 1 #print "Number of true core edges: ", limit #print "Original Graph: ", Graph.GetNodes(), Graph.GetEdges() #print "Train Graph: ", TrainGraph.GetNodes(), TrainGraph.GetEdges() #print "Test Graph: ", TestGraph.GetNodes(), TestGraph.GetEdges() #print "Core Nodes: ", len(core) global shortestDist shortestDist = {} pairs = [] n = 0 lowest = 0 scores = [] propflows = {} for Node1 in TrainGraph.Nodes(): n1 = Node1.GetId() if n1 not in core: continue for Node2 in TrainGraph.Nodes(): n2 = Node2.GetId() if n2 not in core: continue if n1 >= n2: continue if TrainGraph.IsEdge(n1, n2): continue #print n1,n2 if n1 in propflows: propscores = propflows[n1] else: propscores = propflow(TrainGraph, n1, l) #print propscores propflows[n1] = propscores score = 0 if n2 in propscores: score = propscores[n2] if n < limit: pairs.append([score, (n1, n2)]) scores.append(score) lowest = min(lowest, score) n += 1 else: if score > lowest: lowestloc = np.argmin(scores) pairs[lowestloc] = [score, (n1, n2)] scores[lowestloc] = score lowest = min(scores) pairs.sort(key=lambda x: -x[0]) print pairs nTrue = 0 for i in range(limit): #print pairs[i] if TestGraph.IsEdge(pairs[i][1][0], pairs[i][1][1]): nTrue += 1 return [nTrue * 100.0 / limit, limit]
## Nodes print("Number of nodes: {}".format(graph.GetNodes())) ## Edges print("Number of edges: {}".format(graph.GetEdges())) # Question 2 ## Degree 7 print("Number of nodes with degree={}: {}".format(7, sn.CntDegNodes(graph, 7))) ## The Maximum Degree MxDeg = graph.GetNI(sn.GetMxDegNId(graph)).GetDeg() print("Node id(s) with highest degree: ", end="") flag = True for node in graph.Nodes(): if node.GetDeg() == MxDeg: if flag: print(node.GetId(), end="") flag = False else: print(", {}".format(node.GetId), end="") print() ## Plot of degrees sn.PlotOutDegDistr(graph, name, "Degree Distribution") plotRemove("outDeg", "deg_dist", name)
p = open("customer_graph.pkl", "rb") customer_weights = pickle.load(p) Cs = pickle.load(p) p.close() #plotDegreeDist(C_Net, 'Customer Network Degree Distribution', 'g', 10000, 0.0001) print(C_Net.GetNodes()) # 9057 print(C_Net.GetEdges()) # 8,995,947 ''' print('BEGIN TESTS:') print('---------------------') ###### EDGES LISTS FOR INDUCED SUBGRAPHS ###### # CPG nd_MAX = snap.GetMxDegNId(C_P_graph) NIdV = snap.TIntV() snap.GetNodesAtHop(C_P_graph, nd_MAX, 1, NIdV, False) # 1-hop NIdV.Add(nd_MAX) NIdV2 = snap.TIntV() snap.GetNodesAtHop(C_P_graph, nd_MAX, 2, NIdV2, False) #2-hop for i in NIdV2: NIdV.Add(i) SubGraph = snap.GetSubGraph(C_P_graph, NIdV) if nd_MAX in customers_int_to_string: print('IS CUSTOMER') print(nd_MAX) print(customers_int_to_string[nd_MAX]) elif nd_MAX in products_int_to_string:
import snap G = snap.LoadEdgeList(snap.PNGraph, "Wiki-Vote.txt", 0, 1) snap.PrintInfo(G, "votes Stats", "votes-info.txt", False) # Node ID with maximum degree NId1 = snap.GetMxDegNId(G) print("Node ID with Maximum-Degree: %d" % NId1) # Number of Strongly connected components ComponentDist = snap.TIntPrV() snap.GetSccSzCnt(G, ComponentDist) for comp in ComponentDist: print("Size: %d - Number of Components: %d" % (comp.GetVal1(), comp.GetVal2())) # Size of largest strongly connected component print("Strongly Connected Component - Maximum size:", snap.GetMxSccSz(G)) # Number of Weakly Connected Components CompDist = snap.TIntPrV() snap.GetWccSzCnt(G, CompDist) for comp in CompDist: print("Size: %d - Number of Components: %d" % (comp.GetVal1(), comp.GetVal2())) # Size of largest weakly connected component print("Weakly Connected Component - Maximum size:", snap.GetMxWccSz(G)) # Plot of Outdegree Distribution snap.PlotOutDegDistr(G, "Wiki Votes", "Wiki-Votes Out Degree")
# print(edge_dist['sigma']) NodeStat.sample_probability(sina_network, 0, edge_dist['mu'], edge_dist['sigma']) res_probs = NodeStat.get_prob_dict(sina_network, 0) # Components = snap.TCnComV() # snap.GetSccs(g_snap, Components) # selected_nodes = set() # for CnCom in Components: # print CnCom.Len() # if CnCom.Len() == 19492: # selected_nodes = [x for x in CnCom] # selected_nodes = set(random.sample(selected_nodes, 600)) max_nid = snap.GetMxDegNId(g_snap) BfsTree = snap.GetBfsTree(g_snap, max_nid, True, False) selected_nodes = set() selected_edges = collections.defaultdict(list) selected_edges_res = collections.defaultdict(list) counter = collections.Counter() for EI in BfsTree.Edges(): src, dest = EI.GetSrcNId(), EI.GetDstNId() counter.update([src, dest]) if counter[src] > 10 or counter[dest] > 10: continue selected_nodes.add(src) selected_nodes.add(dest) if len(selected_nodes) >= 100: break
def basic_analysis(): FIn = snap.TFIn("../graphs/ph_simple.graph") G = snap.TUNGraph.Load(FIn) numNodes = G.GetNodes() print "num nodes: ", numNodes numEdges = G.GetEdges() print "num edges: ", numEdges # clustering coefficient print "\nclustering coefficient" print "Clustering G: ", snap.GetClustCf(G) ER = snap.GenRndGnm(snap.PUNGraph, numNodes, numEdges) print "Clustering ER: ", snap.GetClustCf(ER) # degree distribution histogram print "\ndegree distribution histogram" x_erdosRenyi, y_erdosRenyi = getDataPointsToPlot(ER) plt.loglog(x_erdosRenyi, y_erdosRenyi, color = 'g', label = 'Erdos Renyi Network') x_smallWorld, y_smallWorld = getDataPointsToPlot(G) plt.loglog(x_smallWorld, y_smallWorld, linestyle = 'dashed', color = 'b', label = 'PH Agency Network') plt.xlabel('Node Degree (log)') plt.ylabel('Proportion of Nodes with a Given Degree (log)') plt.title('Degree Distribution of Erdos Renyi and PH Agency Network') plt.legend() plt.show() # degree print "\ndegree distribution" deg_sum = 0.0 CntV = snap.TIntPrV() snap.GetOutDegCnt(G, CntV) for p in CntV: deg_sum += p.GetVal1() * p.GetVal2() max_node = G.GetNI(snap.GetMxDegNId(G)) deg_sum /= float(numNodes) print "average degree: ", deg_sum # same for G and ER print "max degree: ", max_node.GetOutDeg(), ", id: ", max_node.GetId() deg_sum = 0.0 max_node = ER.GetNI(snap.GetMxDegNId(ER)) print "max degree: ", max_node.GetOutDeg(), ", id: ", max_node.GetId() # diameter print "\ndiameter" diam = snap.GetBfsFullDiam(G, 10) print "Diameter: ", diam print "ER Diameter: ", snap.GetBfsFullDiam(ER, 10) # triads print "\ntriads" print "Triads: ", snap.GetTriads(G) print "ER Triads: ", snap.GetTriads(ER) # centrality print "\ncentrality" max_dc = 0.0 maxId = -1 all_centr = [] for NI in G.Nodes(): DegCentr = snap.GetDegreeCentr(G, NI.GetId()) all_centr.append(DegCentr) if DegCentr > max_dc: max_dc = DegCentr maxId = NI.GetId() print "max" print "node: %d centrality: %f" % (maxId, max_dc) print "average centrality: ", np.mean(all_centr) print "ER" max_dc = 0.0 maxId = -1 all_centr = [] for NI in ER.Nodes(): DegCentr = snap.GetDegreeCentr(ER, NI.GetId()) all_centr.append(DegCentr) if DegCentr > max_dc: max_dc = DegCentr maxId = NI.GetId() print "max" print "node: %d centrality: %f" % (maxId, max_dc) print "average centrality: ", np.mean(all_centr)
def manage_graphs(out_degree, nodes, max_minutes): rnd = snap.TRnd(1, 0) graph = snap.GenSmallWorld(nodes, out_degree, 0.7, rnd) print(40 * "#") print(f"Starting Graph for #{nodes} Nodes.") # Save the graph in order to reload it after manipulation output_filename = f"temporary_graphs/{nodes}_ws_graph.graph" f_out = snap.TFOut(output_filename) graph.Save(f_out) f_out.Flush() # Highest rank Node max_degree_node = graph.GetNI(snap.GetMxDegNId(graph)) print(f"Highest Degree Node ID#{max_degree_node.GetId()}" f" with Degree={max_degree_node.GetDeg()}") # Gets Hubs and Authorities of all the nodes hubs_per_node = snap.TIntFltH() auths_per_node = snap.TIntFltH() snap.GetHits(graph, hubs_per_node, auths_per_node) max_hub_node = graph.GetNI( max(hubs_per_node, key=lambda h: hubs_per_node[h])) print(f"Highest Hub Score Node ID#{max_hub_node.GetId()}" f" with Score={hubs_per_node[max_hub_node.GetId()]}") max_authority_node = graph.GetNI( max(auths_per_node, key=lambda a: auths_per_node[a])) print(f"Highest Authority Score Node ID#{max_authority_node.GetId()}" f" with Score={hubs_per_node[max_authority_node.GetId()]}") exceed = False # CNM Community Detector cnm_community = snap.TCnComV() cnm_thread = threading.Thread(target=snap.CommunityCNM, args=(graph, cnm_community)) cnm_start_time = time.time() try: cnm_thread.start() cnm_thread.join(max_minutes) except MemoryError: exceed = True finally: cnm_stop_time = time.time() cnm_community_exec_time = cnm_stop_time - cnm_start_time exceed |= max_minutes <= cnm_community_exec_time # GN Community Detector if max_minutes > cnm_community_exec_time and not exceed: gn_community = snap.TCnComV() gn_thread = threading.Thread(target=snap.CommunityGirvanNewman, args=(graph, gn_community)) gn_start_time = time.time() try: gn_thread.start() gn_thread.join(max_minutes - cnm_community_exec_time) except MemoryError: exceed = True finally: gn_stop_time = time.time() gn_community_exec_time = gn_stop_time - gn_start_time exceed |= gn_community_exec_time >= max_minutes - cnm_community_exec_time else: gn_community_exec_time = 0.00 if not exceed: print( f"Execution Time for CNM Communities Detector is {round(cnm_community_exec_time, 4):.4f}" ) print( f"Execution Time for GN Communities Detector is {round(gn_community_exec_time, 4):.4f}" ) else: print( f"Graph with Nodes#{nodes_num} exceeded the valid calculation limits." ) print(40 * "#") # load graph in it's initial State f_in = snap.TFIn(output_filename) graph = snap.TUNGraph.Load(f_in) return graph, cnm_community_exec_time, gn_community_exec_time, exceed
vid = x.videoid for v in list(x.related) + [vid]: if v not in self.nodeid: self.nodeid[v] = self.size self.videoid[self.size] = v self.size += 1 #filenames = [ "0301/{}.txt".format(i) for i in range(0, 4) ] #data = Data(filenames) #graph = make_graph(data) #save_graph_data(data, graph, "try") data, graph = load_graph_data("try") Graph = snap.ConvertGraph(snap.PUNGraph, graph) NId1 = snap.GetMxDegNId(Graph) NIdToDistH = snap.TIntH() shortestPath = snap.GetShortPath(Graph, NId1, NIdToDistH) shortestDist = {} for item in NIdToDistH: shortestDist[item] = NIdToDistH[item] PRankH = snap.TIntFltH() snap.GetPageRank(Graph, PRankH) simRanks = {} def simRank(Graph, nIters, gamma): for Node1 in Graph.Nodes():
import snap import sys print "Version", snap.Version try: G = snap.TUNGraph.New() print snap.GetMxDegNId(G) except RuntimeError: e = sys.exc_info() print "1-except1", e print "1-except2", e[0] print "1-except3", e[1] print "after GetMxDegNId" try: G = snap.TUNGraph.New() G.AddNode(1) G.AddNode(1) except RuntimeError: e = sys.exc_info() print "2-except1", e print "2-except2", e[0] print "2-except3", e[1] print "after AddNode"
def algorithm(G, M): #Pruning Step P = 1 Count = 0 while P == 1: P = 0 for node in G.Nodes(): nodeID = node.GetId() degree = node.GetDeg() if degree <= M or degree > G.GetNodes() - 2: if degree <= M and degree > 1: for i in range(degree - 1): for j in range(i + 1, degree): a = node.GetNbrNId(i) b = node.GetNbrNId(j) if G.IsEdge(a, b): Count = Count + 1 if degree > M and degree > G.GetNodes() - 2: Count = Count + G.GetEdges() - node.GetDeg() P = 1 G.DelNode(nodeID) #Hierarchical Clustering Step if G.GetNodes() > 5: H = snap.ConvertGraph(type(G), G) S = [] i = 0 while H.GetNodes() > 0: S.append([]) # randomly chosen a node with maximum degree S[i].append(snap.GetMxDegNId(H)) j = 1 HC = True while HC: # create an empty vector of integers s = snap.TIntV() # (Graph, StartNId, Hop: distance, NIdV: store nodes, IsDir: directed?) snap.GetNodesAtHop(H, S[i][0], j, s, True) if len(s) != 0: S[i].append(s) j = j + 1 else: HC = False H.DelNode(S[i][0]) for j in range(1, len(S[i])): for nodeID in S[i][j]: H.DelNode(nodeID) i = i + 1 subgraphs = [[] for x in range(len(S))] #Counting Step for i in range(len(S)): for j in range(1, len(S[i])): G01 = snap.ConvertSubGraph(snap.PUNGraph, G, S[i][j]) subgraphs[i].append(G01) Count = Count + subgraphs[i][0].GetEdges() G.DelNode(S[i][0]) for i in range(len(S)): for j in range(1, len(S[i])): for nodeID in S[i][j]: U = [] M = [] for t in range(G.GetNI(nodeID).GetDeg()): a = G.GetNI(nodeID).GetNbrNId(t) #check lower level if j < len(S[i]) - 1: if subgraphs[i][j].IsNode(a): U.append(a) #check upper level if j > 1: if subgraphs[i][j - 2].IsNode(a): M.append(a) for s in range(len(U)): for t in range(s + 1, len(U)): if subgraphs[i][j].IsEdge(U[s], U[t]): Count = Count + 1 for s in range(len(M)): for t in range(s + 1, len(M)): if subgraphs[i][j - 2].IsEdge(M[s], M[t]): Count = Count + 1 for i in range(len(S)): for j in range(len(S[i]) - 1): Count = Count + algorithm(subgraphs[i][j], M) return Count
subgraph_name = sys.argv[1] fbsgel = snap.LoadEdgeList(snap.PUNGraph, subgraph_name, 0, 1) MaxDegVfb = [] #Q1 #a fbnn = fbsgel.GetNodes() print("Number of nodes:", fbnn) #b fben = fbsgel.GetEdges() print("Number of edges:", fben) #Q2 #a print("Number of nodes with degree=7:", snap.CntDegNodes(fbsgel, 7)) #b max_deg_fb_id = snap.GetMxDegNId(fbsgel) NI = fbsgel.GetNI(max_deg_fb_id) max_deg_fb = NI.GetDeg() for NI in fbsgel.Nodes(): if (NI.GetDeg() == max_deg_fb): MaxDegVfb.append(NI.GetId()) MaxDegNodeString = ','.join(map(str, MaxDegVfb)) print("Node id(s) with highest degree:", MaxDegNodeString) #c snap.PlotOutDegDistr(fbsgel, "deg_dist_" + str(subgraph_name), "deg_dist_" + str(subgraph_name)) #Q3 #a i = 10 average = 0.0 variance = 0.0
edges = snap.CntUniqUndirEdges(Graph) nodes7 = snap.CntDegNodes(Graph, 7) DegToCntV = snap.TIntPrV() snap.GetDegCnt(Graph, DegToCntV) vector_length = len(DegToCntV) maxdegreencount = DegToCntV[vector_length - 1].GetVal2() #for item in DegToCntV: # print "%d nodes with degree %d" % (item.GetVal2(), item.GetVal1()) print "" print "Number of nodes in " + input_file + ": ", final_nodes print "Number of edges in " + input_file + ": ", edges print "Number of nodes with degree=7 in " + input_file + ": ", nodes7 print "Node id(s) with highest degree in " + input_file + ": ", for i in range(maxdegreencount): NodeId = snap.GetMxDegNId(Graph) V1 = snap.TIntV() V1.Add(NodeId) snap.DelNodes(Graph, V1) print str(NodeId) + ",", print "" Graph1 = snap.LoadEdgeList(snap.PUNGraph, input_file, 0, 1) snap.PlotInDegDistr(Graph1, "degree_plot_" + input_file, "Undirected graph - Degree Distribution") print "Degree distribution of " + input_file + " is in: inDeg.degree_plot_" + input_file + ".png" print "" diameter = [0, 0, 0] index = 0 for i in [10, 100, 1000]: