def getTransitivity(graph): # This coefficient is kind of unimportant but we should probably decide on what it should be. Some sources say 2, others 3 triangleCoefficient = 2 triadSum = 0.0 degreeSum = 0.0 for node in graph.Nodes(): triadSum += snap.GetNodeTriads(graph, node.GetId()) nodeDeg = node.GetDeg() degreeSum += nodeDeg * (nodeDeg - 1) return triangleCoefficient * triadSum / degreeSum
def print_connectivity_clustering(G): """ Prints the average clustering coefficient, number of triads in subgraph G Also prints clustering coefficient and number of triads for random nodes Also prints the number of edges that participate in at least one triad """ GraphClustCoeff = snap.GetClustCf(G) print("Average clustering coefficient:", round(GraphClustCoeff, 4)) print("Number of triads:", snap.GetTriads(G)) NId = G.GetRndNId() print(f'Clustering coefficient of random node {NId}:', round(snap.GetNodeClustCf(G, NId))) NId = G.GetRndNId() print(f'Number of triads random node {NId} participates:', snap.GetNodeTriads(G, NId)) print('Number of edges that participate in at least one triad:', snap.GetTriadEdges(G))
#b EdgeBridgeV = snap.TIntPrV() snap.GetEdgeBridges(fbsgel, EdgeBridgeV) print("Number of edge bridges:", len(EdgeBridgeV)) #c ArtNIdV = snap.TIntV() snap.GetArtPoints(fbsgel, ArtNIdV) print("Number of articulation points:", len(ArtNIdV)) #d Plot snap.PlotSccDistr(fbsgel, "connected_comp_" + str(subgraph_name), "connected_comp_" + str(subgraph_name)) #Q5 #a print("Average clustering coefficient:", round(snap.GetClustCf(fbsgel, -1), 4)) #b print("Number of triads:", snap.GetTriads(fbsgel, -1)) #c RnId = fbsgel.GetRndNId(Rnd) print("Clustering coefficient of random node " + str(RnId) + ":", round(snap.GetNodeClustCf(fbsgel, RnId), 4)) #d print("Number of triads random node " + str(RnId) + " participates:", snap.GetNodeTriads(fbsgel, RnId)) #e print("Number of edges that participate in at least one triad:", snap.GetTriadEdges(fbsgel, -1)) #f Plot snap.PlotClustCf(fbsgel, "clustering_coeff_" + str(subgraph_name), "clustering_coeff_" + str(subgraph_name))
Art_points = snap.TIntV() snap.GetArtPoints(Graph1, Art_points) art = Art_points.Len() print("Number of articulation points: ", art) str2 = "connected_comp_" + file_name snap.PlotSccDistr(Graph1, str2, "Distribution of sizes of connected components") #5.Connectivity and clustering in the network avg_cc = snap.GetClustCf(Graph1, -1) print("Average clustering coefficient: %0.4f" % avg_cc) triads = snap.GetTriads(Graph1, -1) print("Number of triads: ", triads) random1 = Graph1.GetRndNId(Rnd) node_cc = snap.GetNodeClustCf(Graph1, random1) print("Clustering coefficient of random node %d: %0.4f" % (random1, node_cc)) random2 = Graph1.GetRndNId(Rnd) node_triads = snap.GetNodeTriads(Graph1, random2) print("Number of triads random node %d participates: %d" % (random2, node_triads)) triad_edges = snap.GetTriadEdges(Graph1, -1) print("Number of edges that participate in at least one triad: ", triad_edges) str3 = "clustering_coeff_" + file_name snap.PlotClustCf(Graph1, str3, "The distribution of clustering coefficient")
snap.PlotShortPathDistr(graph, "temp", "Undirected graph - shortest path") os.system("mv diam.temp.png plots/shortest_path_" + subgraph_name + ".png") os.system("rm diam.*") print("Fraction of nodes in largest connected component:", round(snap.GetMxSccSz(graph), 4)) print("Number of edge bridges:", get_bridges(graph).Len()) print("Number of articulation points:", get_articulation_points(graph).Len()) snap.PlotSccDistr(graph, "temp", "Undirected graph - scc distribution") os.system("mv scc.temp.png plots/connected_comp_" + subgraph_name + ".png") os.system("rm scc.*") print("Average clustering coefficient:", round(snap.GetClustCf(graph), 4)) print("Number of triads:", snap.GetTriads(graph)) random_node = graph.GetRndNId() print("Clustering coefficient of random node", random_node, ":", round(get_each_nodes_ClusteringCofficient(graph)[random_node], 4)) random_node = graph.GetRndNId() print("Number of triads random node", random_node, "participates:", snap.GetNodeTriads(graph, random_node)) print("Number of edges that participate in at least one triad:", snap.GetTriadEdges(graph)) snap.PlotClustCf(graph, "temp", "Undirected graph - clustering coefficient") os.system("mv ccf.temp.png plots/clustering_coeff_" + subgraph_name + ".png") os.system("rm ccf.*")
def graphStructure(elistName, elistPath): """ Calculate properties of the graph as given in the assignment Args: elistName (str) -> Input elist name elistPath (pathlib.Path) -> Input elist using which graph needs to be built Return: RESULTS (dict) -> Dictionary containing results for different subparts of the assignment """ RESULTS = {} subGraph = snap.LoadEdgeList(snap.PUNGraph, elistPath, 0, 1) # Part 1 (Size of the network) RESULTS['nodeCount'] = subGraph.GetNodes() RESULTS['edgeCount'] = subGraph.GetEdges() # Part 2 (Degree of nodes in the network) maxDegree = 0 maxDegreeNodes = [] degree7Count = 0 for node in subGraph.Nodes(): if node.GetDeg() == 7: degree7Count += 1 maxDegree = max(maxDegree, node.GetDeg()) for node in subGraph.Nodes(): if node.GetDeg() == maxDegree: maxDegreeNodes.append(node.GetId()) plotFilename = f"deg_dist_{elistName}" # Since it is an undirected graph, in/out degree is unimportant snap.PlotOutDegDistr(subGraph, plotFilename) RESULTS['maxDegree'] = maxDegree RESULTS['maxDegreeNodes'] = ','.join(map(str, maxDegreeNodes)) RESULTS['degree7Count'] = degree7Count # Part 3 (Paths in the network) # Full Diameter Calculation fullDiameters = { 10: snap.GetBfsFullDiam(subGraph, 10, False), 100: snap.GetBfsFullDiam(subGraph, 100, False), 1000: snap.GetBfsFullDiam(subGraph, 1000, False) } fullMean, fullVariance = meanVariance(fullDiameters.values()) fullDiameters['mean'] = fullMean fullDiameters['variance'] = fullVariance RESULTS['fullDiameters'] = fullDiameters # Effective Diameter Calculation effDiameters = { 10: snap.GetBfsEffDiam(subGraph, 10, False), 100: snap.GetBfsEffDiam(subGraph, 100, False), 1000: snap.GetBfsEffDiam(subGraph, 1000, False), } effMean, effVariance = meanVariance(effDiameters.values()) effDiameters['mean'] = effMean effDiameters['variance'] = effVariance RESULTS['effDiameters'] = effDiameters plotFilename = f"shortest_path_{elistName}" snap.PlotShortPathDistr(subGraph, plotFilename) # Part 4 (Components of the network) edgeBridges = snap.TIntPrV() articulationPoints = snap.TIntV() RESULTS['fractionLargestConnected'] = snap.GetMxSccSz(subGraph) snap.GetEdgeBridges(subGraph, edgeBridges) snap.GetArtPoints(subGraph, articulationPoints) RESULTS['edgeBridges'] = len(edgeBridges) RESULTS['articulationPoints'] = len(articulationPoints) plotFilename = f"connected_comp_{elistName}" snap.PlotSccDistr(subGraph, plotFilename) # Part 5 (Connectivity and clustering in the network) RESULTS['avgClusterCoefficient'] = snap.GetClustCf(subGraph, -1) RESULTS['triadCount'] = snap.GetTriadsAll(subGraph, -1)[0] nodeX = subGraph.GetRndNId(Rnd) nodeY = subGraph.GetRndNId(Rnd) RESULTS['randomClusterCoefficient'] = (nodeX, snap.GetNodeClustCf( subGraph, nodeX)) RESULTS['randomNodeTriads'] = (nodeY, snap.GetNodeTriads(subGraph, nodeY)) RESULTS['edgesTriads'] = snap.GetTriadEdges(subGraph) plotFilename = f"clustering_coeff_{elistName}" snap.PlotClustCf(subGraph, plotFilename) return RESULTS
def get_number_of_triads_with_node(G, n): return snap.GetNodeTriads(G, n)
def main(): parentDir = os.getcwd() os.chdir(parentDir + "/subgraphs") sub_graph = snap.LoadEdgeList(snap.PUNGraph, sys.argv[1], 0, 1) subGraphName = sys.argv[1].split(".")[0] os.chdir(parentDir) #### 1 ######## node_count = 0 for node in sub_graph.Nodes(): node_count = node_count + 1 printWithOutNewLine("Number of nodes:", node_count) printWithOutNewLine("Number of edges:", snap.CntUniqBiDirEdges(sub_graph)) #### 2 ######## printWithOutNewLine("Number of nodes with degree=7:", snap.CntDegNodes(sub_graph, 7)) rndMaxDegNId = snap.GetMxDegNId(sub_graph) nodeDegPairs = snap.TIntPrV() snap.GetNodeInDegV(sub_graph, nodeDegPairs) maxDegVal = 0 for pair in nodeDegPairs: if (pair.GetVal1() == rndMaxDegNId): maxDegVal = pair.GetVal2() break maxDegNodes = [] for pair in nodeDegPairs: if (pair.GetVal2() == maxDegVal): maxDegNodes.append(pair.GetVal1()) print("Node id(s) with highest degree:", end=" ") print(*maxDegNodes, sep=',') #### 3 ######## sampledFullDiam = [] sampledFullDiam.append(snap.GetBfsFullDiam(sub_graph, 10, False)) sampledFullDiam.append(snap.GetBfsFullDiam(sub_graph, 100, False)) sampledFullDiam.append(snap.GetBfsFullDiam(sub_graph, 1000, False)) sampledFullDiamStats = [] sampledFullDiamStats.append(round(statistics.mean(sampledFullDiam), 4)) sampledFullDiamStats.append(round(statistics.variance(sampledFullDiam), 4)) printWithOutNewLine("Approximate full diameter by sampling 10 nodes:", sampledFullDiam[0]) printWithOutNewLine("Approximate full diameter by sampling 100 nodes:", sampledFullDiam[1]) printWithOutNewLine("Approximate full diameter by sampling 1000 nodes:", sampledFullDiam[2]) print("Approximate full diameter (mean and variance):", end=" ") print(*sampledFullDiamStats, sep=',') sampledEffDiam = [] sampledEffDiam.append(round(snap.GetBfsEffDiam(sub_graph, 10, False), 4)) sampledEffDiam.append(round(snap.GetBfsEffDiam(sub_graph, 100, False), 4)) sampledEffDiam.append(round(snap.GetBfsEffDiam(sub_graph, 1000, False), 4)) sampledEffDiamStats = [] sampledEffDiamStats.append(round(statistics.mean(sampledEffDiam), 4)) sampledEffDiamStats.append(round(statistics.variance(sampledEffDiam), 4)) printWithOutNewLine("Approximate effective diameter by sampling 10 nodes:", sampledEffDiam[0]) printWithOutNewLine( "Approximate effective diameter by sampling 100 nodes:", sampledEffDiam[1]) printWithOutNewLine( "Approximate effective diameter by sampling 1000 nodes:", sampledEffDiam[2]) print("Approximate effective diameter (mean and variance):", end=" ") print(*sampledEffDiamStats, sep=',') #### 4 ######## printWithOutNewLine("Fraction of nodes in largest connected component:", round(snap.GetMxSccSz(sub_graph), 4)) bridgeEdges = snap.TIntPrV() snap.GetEdgeBridges(sub_graph, bridgeEdges) printWithOutNewLine("Number of edge bridges:", len(bridgeEdges)) articulationPoints = snap.TIntV() snap.GetArtPoints(sub_graph, articulationPoints) printWithOutNewLine("Number of articulation points:", len(articulationPoints)) #### 5 ######## printWithOutNewLine("Average clustering coefficient:", round(snap.GetClustCf(sub_graph, -1), 4)) printWithOutNewLine("Number of triads:", snap.GetTriads(sub_graph, -1)) randomNodeId = sub_graph.GetRndNId() nodeIdCcfMap = snap.TIntFltH() snap.GetNodeClustCf(sub_graph, nodeIdCcfMap) print("Clustering coefficient of random node", end=" ") print(randomNodeId, end=": ") print(round(nodeIdCcfMap[randomNodeId], 4)) print("Number of triads random node", end=" ") print(randomNodeId, end=" participates: ") print(snap.GetNodeTriads(sub_graph, randomNodeId)) printWithOutNewLine( "Number of edges that participate in at least one triad:", snap.GetTriadEdges(sub_graph, -1)) #### plots ######## if not os.path.isdir('plots'): os.makedirs('plots') os.chdir(parentDir + "/plots") plotsDir = os.getcwd() snap.PlotOutDegDistr(sub_graph, subGraphName, subGraphName + " Subgraph Degree Distribution") snap.PlotShortPathDistr( sub_graph, subGraphName, subGraphName + " Subgraph Shortest Path Lengths Distribution") snap.PlotSccDistr( sub_graph, subGraphName, subGraphName + " Subgraph Connected Components Size Distribution") snap.PlotClustCf( sub_graph, subGraphName, subGraphName + " Subgraph Clustering Coefficient Distribution") files = os.listdir(plotsDir) for file in files: if not file.endswith(".png"): os.remove(os.path.join(plotsDir, file)) plots = os.listdir(plotsDir) filePrefix = "filename" for file in plots: nameSplit = file.split(".") if (len(nameSplit) == 2): continue if (nameSplit[0] == "ccf"): filePrefix = "clustering_coeff_" elif (nameSplit[0] == "outDeg"): filePrefix = "deg_dist_" elif (nameSplit[0] == "diam"): filePrefix = "shortest_path_" elif (nameSplit[0] == "scc"): filePrefix = "connected_comp_" os.rename(file, filePrefix + nameSplit[1] + "." + nameSplit[2]) os.chdir(parentDir)
if (sub_graph_name == "p2p-Gnutella04-subgraph"): # Clustering coeffiecient of a random node Rand = snap.TRnd(42) Rand.Randomize() RandNode4 = p2p_gnutella04_subgraph.GetRndNId(Rand) print "Clustering coefficient of random node " + str( RandNode4) + " in p2p-Gnutella04-subgraph : " + str( round(snap.GetNodeClustCf(p2p_gnutella04_subgraph, RandNode4), 4)) # Task 1.2.5.4 if (sub_graph_name == "soc-Epinions1-subgraph"): # Number of node Triads of a random node print "Number of triads of random node " + str( RandNode1) + " participates in soc-Epinions1-subgraph: " + str( snap.GetNodeTriads(soc_epinions1_subgraph, RandNode1)) if (sub_graph_name == "cit-HepPh-subgraph"): # Number of node Triads of a random node print "Number of triads of random node " + str( RandNode2) + " participates in cit-HepPh-subgraph: " + str( snap.GetNodeTriads(cit_heph_subgraph, RandNode2)) if (sub_graph_name == "email-Enron-subgraph"): # Number of node Triads of a random node print "Number of triads of random node " + str( RandNode3) + " participates in email-Enron-subgraph: " + str( snap.GetNodeTriads(email_enron_subgraph, RandNode3)) if (sub_graph_name == "p2p-Gnutella04-subgraph"): # Number of node Triads of a random node print "Number of triads of random node " + str( RandNode4) + " participates in p2p-Gnutella04-subgraph: " + str( snap.GetNodeTriads(p2p_gnutella04_subgraph, RandNode4))
plt.savefig(plot_filedir) # [5] Connectivity and Clustering in the Network cluster_coeff = snap.GetClustCf(G, -1) print("Average clustering coefficient: {}".format(round(cluster_coeff, 4))) num_triads = snap.GetTriads(G, -1) print("Number of triads: {}".format(num_triads)) node_id = G.GetRndNId(Rnd) node_cluster_coeff = snap.GetNodeClustCf(G, node_id) print("Clustering coefficient of random node {}: {}".format( node_id, round(node_cluster_coeff, 4))) node_id = G.GetRndNId(Rnd) node_num_triads = snap.GetNodeTriads(G, node_id) print("Number of triads random node {} participates: {}".format( node_id, node_num_triads)) triad_edge = snap.GetTriadEdges(G) print("Number of edges that participate in at least one triad: {}".format( triad_edge)) cf_dist = snap.TFltPrV() coeff = snap.GetClustCf(G, cf_dist, -1) degree_coeff = {} for pair in cf_dist: degree_coeff[pair.GetVal1()] = pair.GetVal2() # Plot Degree Distribution plot_filename = 'clustering_coeff_' + graph_filename[:-6] + '.png'
#loading steam-sweden dataset Graph = snap.LoadEdgeList(snap.PUNGraph, "Steam-Sweden.txt", 0, 1) #calculating number of triads with random sampling NumTriads = snap.GetTriads(Graph, -1) print "Number of triads: " + str(NumTriads) #selecting random node rm_node = Graph.GetRndNId() #random node clustering coefficient rm_clus_coeff = snap.GetNodeClustCf(Graph, rm_node) print "Clustering coefficient of random node ", rm_node, " in Steam-Sweden: ", rm_clus_coeff #Number of triads a randomly selected node participates in num_triads = snap.GetNodeTriads(Graph, rm_node) print "Number of triads of node ", rm_node, " participates in ", num_triads, " triads" #avg and global clustering coefficient TriadV = snap.TIntTrV() snap.GetTriads(Graph, TriadV, -1) OpenTriads = 0 ClosedTriads = 0 for triple in TriadV: OpenTriads += triple.Val3() ClosedTriads += triple.Val2() ClosedTriads = ClosedTriads / 3 GlobalClcf = float(ClosedTriads) / (float(ClosedTriads) + float(OpenTriads)) GraphClustCoeff = snap.GetClustCf(Graph, -1)
snap.GetMxSccSz(G)) EdgeV = snap.TIntPrV() snap.GetEdgeBridges(G, EdgeV) print("Number of edge bridges:", len(EdgeV)) ArtNIdV = snap.TIntV() snap.GetArtPoints(G, ArtNIdV) print("Number of articulation points:", len(ArtNIdV)) print("Average clustering coefficient: %.4f" % snap.GetClustCf(G, -1)) print("Number of triads:", snap.GetTriads(G, -1)) Ran_n = G.GetRndNId(Rnd) print("Clustering coefficient of random node %d: %.4f" % (Ran_n, snap.GetNodeClustCf(G, Ran_n))) Ran_n = G.GetRndNId(Rnd) print("Number of triads random node %d participates: %d" % (Ran_n, snap.GetNodeTriads(G, Ran_n))) print("Number of edges that participate in at least one triad:", snap.GetTriadEdges(G)) snap.PlotInDegDistr(G, "D_" + sys.argv[1], "Degree Distribution") MoveFile(os.path.join(dirname, "inDeg.D_" + sys.argv[1] + ".png"), os.path.join(dirname, "plots", "deg_dist_" + sys.argv[1] + ".png")) snap.PlotShortPathDistr(G, "S_" + sys.argv[1], "Shortest path Distribution") MoveFile( os.path.join(dirname, "diam.S_" + sys.argv[1] + ".png"), os.path.join(dirname, "plots", "shortest_path_" + sys.argv[1] + ".png")) snap.PlotSccDistr(G, "C_" + sys.argv[1], "Component Size Distribution") MoveFile( os.path.join(dirname, "scc.C_" + sys.argv[1] + ".png"),
## Connected Components Distribution sn.PlotSccDistr(graph, name, "Connected Component Distribution") plotRemove("scc", "connected_comp", name) #Question 5 ## Clustering Coefficient print("Average clustering coefficient: {:0.4f}".format( sn.GetClustCf(graph))) ## Triads print("Number of triads: {}".format(sn.GetTriads(graph))) ## Random Clustering Coefficient rndNode = graph.GetRndNId() print("Clustering coefficient of random node {}: {:0.4f}".format( rndNode, sn.GetNodeClustCf(graph, rndNode))) ## Random node triads rndNode = graph.GetRndNId() print("Number of triads random node {} participates: {}".format( rndNode, sn.GetNodeTriads(graph, rndNode))) ## Edges in Triads print("Number of edges that participate in at least one triad: {}".format( sn.GetTriadEdges(graph))) ## Plot Clustering Coefficient sn.PlotClustCf(graph, name, "Clustering Coefficient Distribution") plotRemove("ccf", "clustering_coeff", name)
print "\n\tProblem 2: Connectivity and Clustering [only for Steam-Sweden dataset]\n" # 2.1)Number of Triads. NumTriads = snap.GetTriads(UGraph, -1) print "2.1)Number of triads: %d\n" % NumTriads # 2.2) The local clustering coefficient of a randomly selected node. Also report the selected node id. NI = UGraph.GetRndNId() NodeClustCf = snap.GetNodeClustCf(UGraph, NI) print "2.2)Clustering coefficient of random node < %d > in < %s >: %f\n" % ( NI, file, NodeClustCf) # 2.3) Number of triads a randomly selected node participates in. Also report the selected node id. NI = UGraph.GetRndNId() NodeTriads = snap.GetNodeTriads(UGraph, NI) print "2.3)Number of triads of node < %d > participates in < %d > triads\n" % ( NI, NodeTriads) ''' 2.4) The two versions of the global clustering coefficient of the network (the average over local clustering coefficients, as in Watts-Strogatz definition, and the global clustering coefficient that depends on the number of triangles). ''' #Watts-Strogatz ClustCoeff GraphClustCoeff = snap.GetClustCf(UGraph, -1) #Global ClustCoeff #Check back CfVec = snap.TFltPrV() Cf = snap.GetClustCf(UGraph, CfVec, -1) print "2.4)Clustering coefficient of the network: < %f > (Watts-Strogatz); < %f > (global)\n" % (