Пример #1
0
def calculate_shortest_path_lengths_distribution(graph, hashtag):
    start = time.time()
    print("Calculating shortest path lengths distribution...")
    snap.PlotShortPathDistr(graph, hashtag + "_shortestPathLengthsDist",
                            "Shortest Path Lengths Distribution")
    end = time.time()
    print("Completed in: %s" % timedelta(seconds=(int(end - start))))
Пример #2
0
def shortest_path_distribution_plot(G):
    """
    Saves the shortest path distribution plot of the subgraph G
    The file is saved in the directory './plots/shortest_path_<subgraph_name>.png'
    """

    snap.PlotShortPathDistr(G, sys.argv[-1], f"Shortest Path Distribution in {sys.argv[-1]}")

    try:
        os.mkdir('./plots')
    except:
        pass

    os.rename(f'diam.{sys.argv[-1]}.png', f'./plots/shortest_path_{sys.argv[-1]}.png')
    os.remove(f'diam.{sys.argv[-1]}.plt')
    os.remove(f'diam.{sys.argv[-1]}.tab')
Пример #3
0
    def genGraphInfo(self):
        graphName = self.graphName

        # get the number of nodes and edges in the graph
        print "Number of nodes in %s: %d" % (graphName, self.G.GetNodes())
        print "Number of edges in %s: %d" % (graphName, self.G.GetEdges())

        # get the node id(s) with highest degree

        nodeIdMaxDegree = snap.GetMxOutDegNId(self.G)

        maxDegree = -1
        for node in self.G.Nodes():
            if (node.GetId() == nodeIdMaxDegree):
                maxDegree = node.GetOutDeg()
                break

        nodeIdsMaxDegreeT = ""
        for node in self.G.Nodes():
            if (maxDegree == node.GetOutDeg()):
                nodeIdsMaxDegreeT += str(node.GetId()) + ","

        print "Node id(s) with highest degree in %s: %s" % (graphName,
                                                            nodeIdsMaxDegreeT)

        # plot degree distribution
        snap.PlotOutDegDistr(self.G, graphName, "Degree Distribution")
        degreeFileName = "outDeg." + graphName + ".png"
        print "Degree distribution of %s is in: %s" % (graphName,
                                                       degreeFileName)

        # plot shortest path distribution
        snap.PlotShortPathDistr(self.G, graphName,
                                "Shortest Path Distribution")
        shortestPathFileName = "diam." + graphName + ".png"
        print "Shortest path distribution of %s is in: %s" % (
            graphName, shortestPathFileName)

        # get the fraction of nodes in largest cc
        print "Fraction of nodes in largest connected component in %s: %f" % (
            graphName, snap.GetMxSccSz(self.G))

        # plot the component size distribution
        snap.PlotSccDistr(self.G, graphName, "Component size distribution")
        sccFileName = "scc." + graphName + ".png"
        print "Component size distribution of %s is in: %s" % (graphName,
                                                               sccFileName)
Пример #4
0
def solve_shortest_path_based_questions(G, GName):

    Fulldiam1 = snap.GetBfsFullDiam(G, 10, False)
    print "Approximate full diameter in {0} with sampling {1} nodes: {2}".format(
        GName[:-10], 10, Fulldiam1)
    Fulldiam2 = snap.GetBfsFullDiam(G, 100, False)
    print "Approximate full diameter in {0} with sampling {1} nodes: {2}".format(
        GName[:-10], 100, Fulldiam2)
    Fulldiam3 = snap.GetBfsFullDiam(G, 1000, False)
    print "Approximate full diameter in {0} with sampling {1} nodes: {2}".format(
        GName[:-10], 1000, Fulldiam3)

    temp = np.array([Fulldiam1, Fulldiam2, Fulldiam3])

    print "Approximate full diameter in {0} with sampling nodes (mean and variance): {1}, {2}".format(
        GName[:10], np.mean(temp), np.var(temp))

    effdiam1 = snap.GetBfsEffDiam(G, 10, False)
    print "Approximate Effective diameter in {0} with sampling {1} nodes: {2}".format(
        GName[:-10], 10, effdiam1)
    effdiam2 = snap.GetBfsEffDiam(G, 100, False)
    print "Approximate Effective diameter in {0} with sampling {1} nodes: {2}".format(
        GName[:-10], 100, effdiam2)
    effdiam3 = snap.GetBfsEffDiam(G, 1000, False)
    print "Approximate Effective diameter in {0} with sampling {1} nodes: {2}".format(
        GName[:-10], 1000, effdiam3)

    temp = np.array([effdiam1, effdiam2, effdiam3])

    print "Approximate full diameter in {0} with sampling nodes (mean and variance): {1}, {2}".format(
        GName[:10], np.mean(temp), np.var(temp))

    snap.PlotShortPathDistr(G, GName[:-10], GName[:-10] + " - shortest path")

    filename = "diam." + GName[:-10] + ".png"
    print "Shortest path distribution of {0} is in: {1}".format(
        GName[:-10], filename)
Пример #5
0
def main():

    parentDir = os.getcwd()
    os.chdir(parentDir + "/subgraphs")
    sub_graph = snap.LoadEdgeList(snap.PUNGraph, sys.argv[1], 0, 1)
    subGraphName = sys.argv[1].split(".")[0]
    os.chdir(parentDir)

    #### 1 ########
    node_count = 0
    for node in sub_graph.Nodes():
        node_count = node_count + 1

    printWithOutNewLine("Number of nodes:", node_count)
    printWithOutNewLine("Number of edges:", snap.CntUniqBiDirEdges(sub_graph))

    #### 2 ########
    printWithOutNewLine("Number of nodes with degree=7:",
                        snap.CntDegNodes(sub_graph, 7))

    rndMaxDegNId = snap.GetMxDegNId(sub_graph)
    nodeDegPairs = snap.TIntPrV()
    snap.GetNodeInDegV(sub_graph, nodeDegPairs)
    maxDegVal = 0

    for pair in nodeDegPairs:
        if (pair.GetVal1() == rndMaxDegNId):
            maxDegVal = pair.GetVal2()
            break

    maxDegNodes = []
    for pair in nodeDegPairs:
        if (pair.GetVal2() == maxDegVal):
            maxDegNodes.append(pair.GetVal1())

    print("Node id(s) with highest degree:", end=" ")
    print(*maxDegNodes, sep=',')

    #### 3 ########
    sampledFullDiam = []
    sampledFullDiam.append(snap.GetBfsFullDiam(sub_graph, 10, False))
    sampledFullDiam.append(snap.GetBfsFullDiam(sub_graph, 100, False))
    sampledFullDiam.append(snap.GetBfsFullDiam(sub_graph, 1000, False))

    sampledFullDiamStats = []
    sampledFullDiamStats.append(round(statistics.mean(sampledFullDiam), 4))
    sampledFullDiamStats.append(round(statistics.variance(sampledFullDiam), 4))

    printWithOutNewLine("Approximate full diameter by sampling 10 nodes:",
                        sampledFullDiam[0])
    printWithOutNewLine("Approximate full diameter by sampling 100 nodes:",
                        sampledFullDiam[1])
    printWithOutNewLine("Approximate full diameter by sampling 1000 nodes:",
                        sampledFullDiam[2])
    print("Approximate full diameter (mean and variance):", end=" ")
    print(*sampledFullDiamStats, sep=',')

    sampledEffDiam = []
    sampledEffDiam.append(round(snap.GetBfsEffDiam(sub_graph, 10, False), 4))
    sampledEffDiam.append(round(snap.GetBfsEffDiam(sub_graph, 100, False), 4))
    sampledEffDiam.append(round(snap.GetBfsEffDiam(sub_graph, 1000, False), 4))

    sampledEffDiamStats = []
    sampledEffDiamStats.append(round(statistics.mean(sampledEffDiam), 4))
    sampledEffDiamStats.append(round(statistics.variance(sampledEffDiam), 4))

    printWithOutNewLine("Approximate effective diameter by sampling 10 nodes:",
                        sampledEffDiam[0])
    printWithOutNewLine(
        "Approximate effective diameter by sampling 100 nodes:",
        sampledEffDiam[1])
    printWithOutNewLine(
        "Approximate effective diameter by sampling 1000 nodes:",
        sampledEffDiam[2])
    print("Approximate effective diameter (mean and variance):", end=" ")
    print(*sampledEffDiamStats, sep=',')

    #### 4 ########
    printWithOutNewLine("Fraction of nodes in largest connected component:",
                        round(snap.GetMxSccSz(sub_graph), 4))

    bridgeEdges = snap.TIntPrV()
    snap.GetEdgeBridges(sub_graph, bridgeEdges)
    printWithOutNewLine("Number of edge bridges:", len(bridgeEdges))

    articulationPoints = snap.TIntV()
    snap.GetArtPoints(sub_graph, articulationPoints)
    printWithOutNewLine("Number of articulation points:",
                        len(articulationPoints))

    #### 5 ########
    printWithOutNewLine("Average clustering coefficient:",
                        round(snap.GetClustCf(sub_graph, -1), 4))

    printWithOutNewLine("Number of triads:", snap.GetTriads(sub_graph, -1))

    randomNodeId = sub_graph.GetRndNId()
    nodeIdCcfMap = snap.TIntFltH()
    snap.GetNodeClustCf(sub_graph, nodeIdCcfMap)

    print("Clustering coefficient of random node", end=" ")
    print(randomNodeId, end=": ")
    print(round(nodeIdCcfMap[randomNodeId], 4))

    print("Number of triads random node", end=" ")
    print(randomNodeId, end=" participates: ")
    print(snap.GetNodeTriads(sub_graph, randomNodeId))

    printWithOutNewLine(
        "Number of edges that participate in at least one triad:",
        snap.GetTriadEdges(sub_graph, -1))

    #### plots ########
    if not os.path.isdir('plots'):
        os.makedirs('plots')

    os.chdir(parentDir + "/plots")
    plotsDir = os.getcwd()

    snap.PlotOutDegDistr(sub_graph, subGraphName,
                         subGraphName + " Subgraph Degree Distribution")
    snap.PlotShortPathDistr(
        sub_graph, subGraphName,
        subGraphName + " Subgraph Shortest Path Lengths Distribution")
    snap.PlotSccDistr(
        sub_graph, subGraphName,
        subGraphName + " Subgraph Connected Components Size Distribution")
    snap.PlotClustCf(
        sub_graph, subGraphName,
        subGraphName + " Subgraph Clustering Coefficient Distribution")

    files = os.listdir(plotsDir)

    for file in files:
        if not file.endswith(".png"):
            os.remove(os.path.join(plotsDir, file))

    plots = os.listdir(plotsDir)
    filePrefix = "filename"
    for file in plots:
        nameSplit = file.split(".")
        if (len(nameSplit) == 2):
            continue
        if (nameSplit[0] == "ccf"):
            filePrefix = "clustering_coeff_"
        elif (nameSplit[0] == "outDeg"):
            filePrefix = "deg_dist_"
        elif (nameSplit[0] == "diam"):
            filePrefix = "shortest_path_"
        elif (nameSplit[0] == "scc"):
            filePrefix = "connected_comp_"

        os.rename(file, filePrefix + nameSplit[1] + "." + nameSplit[2])

    os.chdir(parentDir)
Пример #6
0
diameter = [0, 0, 0]
index = 0
for i in [10, 100, 1000]:
    diameter[index] = snap.GetBfsEffDiam(Graph1, i, False)
    print "Approx. effective diameter in " + input_file + " with sampling ", i, " nodes: ", round(
        diameter[index], 3)
    index = index + 1

mean = float(sum(diameter) / 3.0)
variance = float((pow((diameter[0] - mean), 2) + pow(
    (diameter[1] - mean), 2) + pow((diameter[2] - mean), 2)) / 2.0)

print "Approx. effective diameter in " + input_file + " (mean and variance): ", round(
    mean, 3), ", ", round(variance, 3)

snap.PlotShortPathDistr(Graph1, "shortest_path_plot_" + input_file,
                        "Undirected graph - shortest path", 1000)
print "Shortest path distribution of " + input_file + " is in: diam.shortest_path_plot_" + input_file + ".png"

largest_component = snap.TCnComV()
snap.GetSccs(Graph1, largest_component)
largest = 0.0

for item in largest_component:
    if largest < item.Len():
        largest = item.Len()

print ""

print "Fraction of nodes in largest connected component in " + input_file + ": ", float(
    largest) / float(final_nodes)
Пример #7
0
def ShortPath():
	return snap.PlotShortPathDistr(Graph, "ScaleFreeShortestPath", "Undirected graph - shortest path")
    value_new = [
        snap.GetBfsEffDiam(p2p_gnutella04_subgraph, 10, v4, True)[0],
        snap.GetBfsEffDiam(p2p_gnutella04_subgraph, 100, v4, True)[0],
        snap.GetBfsEffDiam(p2p_gnutella04_subgraph, 1000, v4, True)[0]
    ]

    print "Approximate Effective diameter in p2p-Gnutella04-subgraph with sampling nodes(mean and variance):" + str(
        round(statistics.mean(value_new), 3)) + "," + str(
            round(statistics.variance(value_new), 4))

# Task 1.2.3.3

if (sub_graph_name == "soc-Epinions1-subgraph"):
    # Plotting the distribution of shortest Length

    snap.PlotShortPathDistr(soc_epinions1_subgraph, "soc-Epinions1-subgraph",
                            "Undirected graph - shortest path")
    print "Shortest path distribution of soc-Epinions1-subgraph is in :" + "diam.soc-Epinions1-subgraph.png"
if (sub_graph_name == "cit-HepPh-subgraph"):
    # Plotting the distribution of shortest Length

    snap.PlotShortPathDistr(cit_heph_subgraph, "cit-HepPh-subgraph",
                            "Undirected graph - shortest path")
    print "Shortest path distribution of cit-HepPh-subgraph is in :" + "diam.cit-HepPh-subgraph.png"
if (sub_graph_name == "email-Enron-subgraph"):
    # Plotting the distribution of shortest Length

    snap.PlotShortPathDistr(email_enron_subgraph, "email-Enron-subgraph",
                            "Undirected graph - shortest path")
    print "Shortest path distribution of email-Enron-subgraph is in :" + "diam.email-Enron-subgraph.png"
if (sub_graph_name == "p2p-Gnutella04-subgraph"):
    # Plotting the distribution of shortest Length
import snap

Graph = snap.GenRndGnm(snap.PNGraph, 100, 1000)
snap.PlotShortPathDistr(Graph, "example", "Directed graph - shortest path")
snap.DrawGViz(Graph, snap.gvlDot, "graph.png", "graph 1")

UGraph = snap.GenRndGnm(snap.PUNGraph, 100, 1000)
snap.PlotShortPathDistr(UGraph, "example", "Undirected graph - shortest path")
snap.DrawGViz(UGraph, snap.gvlNeato, "graph_undirected.png", "graph 2", True)

Network = snap.GenRndGnm(snap.PNEANet, 100, 1000)
snap.PlotShortPathDistr(Network, "example", "Network - shortest path")
Пример #10
0
effData = [effDiam10, effDiam100, effDiam1000]
em = mean(effData)
ev = variance(effData)

print "Approx. effective diameter in %s with sampling 10 nodes: %d" % (
    file, effDiam10)
print "Approx. effective diameter in %s with sampling 100 nodes: %d" % (
    file, effDiam100)
print "Approx. effective diameter in %s with sampling 1000 nodes: %d" % (
    file, effDiam1000)
print "Approx. effective diameter in %s (mean and variance): %d, %d\n" % (
    file, em, ev)
# c) Plot of the distribution of the shortest path
plotFN1 = file + ".diam.short-path-plot.png"
snap.PlotShortPathDistr(UGraph, plotFN1,
                        "Undirected graph - Shortest path for file " + file)
print "\nShortest path distribution of %s is in: %s\n" % (file, plotFN1)

# 4) Components of the network:
print "Components of the network:\n"
# a) Fraction of nodes in the largest connected component
nodeFrac = snap.GetMxSccSz(UGraph)
print "Fraction of nodes in largest connected component in '%s': %d\n" % (
    file, nodeFrac)
# b) Plot of the distribution of sizes of connected components.
plotFN2 = file + ".scc.connected-components-plot.png"
snap.PlotSccDistr(UGraph, plotFN2,
                  "Undirected graph - scc distribution for file " + file)
print "\nComponent size distribution of %s is in: %s\n" % (file, plotFN2)

# end of program
eff2 = snap.GetBfsEffDiam(Graph1, 100, False)
eff3 = snap.GetBfsEffDiam(Graph1, 1000, False)
print("Approximate effective diameter by sampling ", 10,
      " nodes: %0.4f" % eff1)
print("Approximate effective diameter by sampling ", 100,
      " nodes: %0.4f" % eff2)
print("Approximate effective diameter by sampling ", 1000,
      " nodes: %0.4f" % eff3)
effmean = (eff1 + eff2 + eff3) / 3.0
effvar = (((eff1 * eff1) + (eff2 * eff2) +
           (eff3 * eff3)) / 3.0) - (effmean * effmean)
print("Approximate effective diameter (mean and variance): %0.4f,%0.4f" %
      (effmean, effvar))

str1 = 'shortest_path_' + file_name
snap.PlotShortPathDistr(Graph1, str1, "Distribution of shortest path lengths")

#4.Components of the network
fraction = snap.GetMxSccSz(Graph1)
print("Fraction of nodes in largest connected component: %0.4f" % fraction)

V_edges = snap.TIntPrV()
snap.GetEdgeBridges(Graph1, V_edges)
edge_bridges = V_edges.Len()
print("Number of edge bridges: ", edge_bridges)

Art_points = snap.TIntV()
snap.GetArtPoints(Graph1, Art_points)
art = Art_points.Len()
print("Number of articulation points: ", art)
Пример #12
0
snap.GetArtPoints(G, ArtNIdV)
print("Number of articulation points:", len(ArtNIdV))
print("Average clustering coefficient: %.4f" % snap.GetClustCf(G, -1))
print("Number of triads:", snap.GetTriads(G, -1))
Ran_n = G.GetRndNId(Rnd)
print("Clustering coefficient of random node %d: %.4f" %
      (Ran_n, snap.GetNodeClustCf(G, Ran_n)))
Ran_n = G.GetRndNId(Rnd)
print("Number of triads random node %d participates: %d" %
      (Ran_n, snap.GetNodeTriads(G, Ran_n)))
print("Number of edges that participate in at least one triad:",
      snap.GetTriadEdges(G))

snap.PlotInDegDistr(G, "D_" + sys.argv[1], "Degree Distribution")
MoveFile(os.path.join(dirname, "inDeg.D_" + sys.argv[1] + ".png"),
         os.path.join(dirname, "plots", "deg_dist_" + sys.argv[1] + ".png"))

snap.PlotShortPathDistr(G, "S_" + sys.argv[1], "Shortest path Distribution")
MoveFile(
    os.path.join(dirname, "diam.S_" + sys.argv[1] + ".png"),
    os.path.join(dirname, "plots", "shortest_path_" + sys.argv[1] + ".png"))

snap.PlotSccDistr(G, "C_" + sys.argv[1], "Component Size Distribution")
MoveFile(
    os.path.join(dirname, "scc.C_" + sys.argv[1] + ".png"),
    os.path.join(dirname, "plots", "connected_comp_" + sys.argv[1] + ".png"))

snap.PlotClustCf(G, "C_" + sys.argv[1], "Clustering Coefficient Distribution")
MoveFile(
    os.path.join(dirname, "ccf.C_" + sys.argv[1] + ".png"),
    os.path.join(dirname, "plots", "clustering_coeff_" + sys.argv[1] + ".png"))
Пример #13
0
        print("Approximate full diameter by sampling {} nodes: {}".format(
            numNodes[i], fullDia[i]))
    print(
        "Approximate full diameter (mean and variance): {:.4f} {:.4f}".format(
            np.mean(fullDia), np.var(fullDia)))

    ## Effective Diameter
    effDia = [sn.GetBfsEffDiam(graph, tNodes) for tNodes in numNodes]
    for i in range(3):
        print("Approximate effective diameter by sampling {} nodes: {:.4f}".
              format(numNodes[i], effDia[i]))
    print("Approximate effective diameter (mean and variance): {:.4f} {:.4f}".
          format(np.mean(effDia), np.var(effDia)))

    ## Plot Shortest Path Distr
    sn.PlotShortPathDistr(graph, name, "Shortest Path Distribution")
    plotRemove("diam", "shortest_path", name)

    #Question 4

    ## Max Comp Fraction
    MxConCompSize = sn.GetMxScc(graph).GetNodes()
    print("Fraction of nodes in largest connected component: {:0.4f}".format(
        MxConCompSize / graph.GetNodes()))

    ## Edge Bridges
    edgeBridge = sn.TIntPrV()
    sn.GetEdgeBridges(graph, edgeBridge)
    print("Number of edge bridges: {}".format(len(edgeBridge)))

    ## Articulation Points
Пример #14
0
            y.append(results[key])
        inds = np.argsort(x)
        x2 = []
        y2 = []
        for ind in inds:
            x2.append(x[ind])
            y2.append(y[ind])
        plt.loglog(x2, y2, color=color, label=label)
    plt.show()


G1, id2, synset2, _, _, _ = generate_word_graph(True, False, False)
print(G1.GetNodes())
G2, id2, synset2, _, _, _ = generate_word_graph(False, True, False)
G3, id2, synset2, _, _, _ = generate_word_graph(False, False, True)
snap.PlotShortPathDistr(G1, "hyp", "graph - shortest path", 1000)
snap.PlotShortPathDistr(G2, "poly", "graph - shortest path", 1000)
snap.PlotShortPathDistr(G3, "mero", "graph - shortest path", 1000)
make_log_degree_graph([(G1, "hypernym", "b"), (G2, "polysemy", "y"),
                       (G3, "meronymy", "r")])

print(meme)

G2, id2, synset2, _, _, _ = generate_meaning_graph(True, False, False)
print(G2.GetNodes())
print(G2.GetEdges())
GW = snap.GetMxScc(G2)
print(GW.GetNodes())
print(GW.GetNodes(), "lolhyp")

G3, id2, synset2, _, _, _ = generate_meaning_graph(False, False, True)
Пример #15
0
def graphStructure(elistName, elistPath):
    """
        Calculate properties of the graph as given in the assignment

        Args:
        elistName (str) -> Input elist name
        elistPath (pathlib.Path) -> Input elist using which graph needs to be built

        Return:
        RESULTS (dict) -> Dictionary containing results for different subparts of the assignment
    """

    RESULTS = {}
    subGraph = snap.LoadEdgeList(snap.PUNGraph, elistPath, 0, 1)

    # Part 1 (Size of the network)
    RESULTS['nodeCount'] = subGraph.GetNodes()
    RESULTS['edgeCount'] = subGraph.GetEdges()

    # Part 2 (Degree of nodes in the network)
    maxDegree = 0
    maxDegreeNodes = []
    degree7Count = 0

    for node in subGraph.Nodes():
        if node.GetDeg() == 7:
            degree7Count += 1

        maxDegree = max(maxDegree, node.GetDeg())

    for node in subGraph.Nodes():
        if node.GetDeg() == maxDegree:
            maxDegreeNodes.append(node.GetId())

    plotFilename = f"deg_dist_{elistName}"
    # Since it is an undirected graph, in/out degree is unimportant
    snap.PlotOutDegDistr(subGraph, plotFilename)

    RESULTS['maxDegree'] = maxDegree
    RESULTS['maxDegreeNodes'] = ','.join(map(str, maxDegreeNodes))
    RESULTS['degree7Count'] = degree7Count

    # Part 3 (Paths in the network)
    # Full Diameter Calculation
    fullDiameters = {
        10: snap.GetBfsFullDiam(subGraph, 10, False),
        100: snap.GetBfsFullDiam(subGraph, 100, False),
        1000: snap.GetBfsFullDiam(subGraph, 1000, False)
    }
    fullMean, fullVariance = meanVariance(fullDiameters.values())
    fullDiameters['mean'] = fullMean
    fullDiameters['variance'] = fullVariance
    RESULTS['fullDiameters'] = fullDiameters

    # Effective Diameter Calculation
    effDiameters = {
        10: snap.GetBfsEffDiam(subGraph, 10, False),
        100: snap.GetBfsEffDiam(subGraph, 100, False),
        1000: snap.GetBfsEffDiam(subGraph, 1000, False),
    }
    effMean, effVariance = meanVariance(effDiameters.values())
    effDiameters['mean'] = effMean
    effDiameters['variance'] = effVariance
    RESULTS['effDiameters'] = effDiameters

    plotFilename = f"shortest_path_{elistName}"
    snap.PlotShortPathDistr(subGraph, plotFilename)

    # Part 4 (Components of the network)
    edgeBridges = snap.TIntPrV()
    articulationPoints = snap.TIntV()
    RESULTS['fractionLargestConnected'] = snap.GetMxSccSz(subGraph)
    snap.GetEdgeBridges(subGraph, edgeBridges)
    snap.GetArtPoints(subGraph, articulationPoints)
    RESULTS['edgeBridges'] = len(edgeBridges)
    RESULTS['articulationPoints'] = len(articulationPoints)

    plotFilename = f"connected_comp_{elistName}"
    snap.PlotSccDistr(subGraph, plotFilename)

    # Part 5 (Connectivity and clustering in the network)
    RESULTS['avgClusterCoefficient'] = snap.GetClustCf(subGraph, -1)
    RESULTS['triadCount'] = snap.GetTriadsAll(subGraph, -1)[0]

    nodeX = subGraph.GetRndNId(Rnd)
    nodeY = subGraph.GetRndNId(Rnd)
    RESULTS['randomClusterCoefficient'] = (nodeX,
                                           snap.GetNodeClustCf(
                                               subGraph, nodeX))
    RESULTS['randomNodeTriads'] = (nodeY, snap.GetNodeTriads(subGraph, nodeY))
    RESULTS['edgesTriads'] = snap.GetTriadEdges(subGraph)

    plotFilename = f"clustering_coeff_{elistName}"
    snap.PlotClustCf(subGraph, plotFilename)

    return RESULTS
    print("Approximate effective diameter by sampling 10 nodes:",
          round(effective_diameter[-1], 4))
    effective_diameter.append(snap.GetBfsEffDiam(graph, 100))
    print("Approximate effective diameter by sampling 100 nodes:",
          round(effective_diameter[-1], 4))
    effective_diameter.append(snap.GetBfsEffDiam(graph, 1000))
    print("Approximate effective diameter by sampling 1000 nodes:",
          round(effective_diameter[-1], 4))

    print("Approximate effective diameter (mean and variance):",
          round(get_mean(effective_diameter), 4),
          ',',
          round(get_variance(effective_diameter), 4),
          sep="")

    snap.PlotShortPathDistr(graph, "temp", "Undirected graph - shortest path")
    os.system("mv diam.temp.png plots/shortest_path_" + subgraph_name + ".png")
    os.system("rm diam.*")

    print("Fraction of nodes in largest connected component:",
          round(snap.GetMxSccSz(graph), 4))
    print("Number of edge bridges:", get_bridges(graph).Len())
    print("Number of articulation points:",
          get_articulation_points(graph).Len())

    snap.PlotSccDistr(graph, "temp", "Undirected graph - scc distribution")
    os.system("mv scc.temp.png plots/connected_comp_" + subgraph_name + ".png")
    os.system("rm scc.*")

    print("Average clustering coefficient:", round(snap.GetClustCf(graph), 4))
    print("Number of triads:", snap.GetTriads(graph))
# # 3a

full_diam_list = []
for i in range(1, 4):
    no_nodes = 10**i
    full_diam = snap.GetBfsFullDiam(Fb_graph, no_nodes, False)
    full_diam_list.append(full_diam)
    print("Approximate full diameter by sampling " + str(no_nodes) +
          " nodes: " + str(round(full_diam, 4)))

mean = sum(full_diam_list) / len(full_diam_list)
res = sum((i - mean)**2 for i in full_diam_list) / len(full_diam_list)
print("Approximate full diameter (mean and variance): " + str(round(mean, 4)) +
      "," + str(round(res, 4)))

eff_diam_list = []
for i in range(1, 4):
    no_nodes = 10**i
    eff_diam = snap.GetBfsEffDiam(Fb_graph, no_nodes, False)
    eff_diam_list.append(eff_diam)
    print("Approximate effective  diameter by sampling " + str(no_nodes) +
          " nodes: " + str(round(eff_diam, 4)))

mean = sum(eff_diam_list) / len(eff_diam_list)
res = sum((i - mean)**2 for i in eff_diam_list) / len(eff_diam_list)
print("Approximate effective  diameter (mean and variance): " +
      str(round(mean, 4)) + "," + str(round(res, 4)))

snap.PlotShortPathDistr(Fb_graph, "exa", "Directed graph - shortest path")
Пример #18
0
i = 10
average = 0.0
variance = 0.0
while (i <= 1000):
    diam = snap.GetBfsEffDiam(fbsgel, i, False)
    print("Approximate effective diameter by sampling", i, "nodes:",
          round(diam, 4))
    i *= 10
    average += diam
    variance += (diam * diam)
average /= 3
variance = (variance / 3) - average * average
print("Approximate effective diameter(mean and variance): %0.4f,%0.4f" %
      (average, variance))
#c Plot
snap.PlotShortPathDistr(fbsgel, "shortest_path_" + str(subgraph_name),
                        "shortest_path_" + str(subgraph_name))

#Q4
#a
print("Fraction of nodes in largest connected component:",
      round(snap.GetMxSccSz(fbsgel), 4))
#b
EdgeBridgeV = snap.TIntPrV()
snap.GetEdgeBridges(fbsgel, EdgeBridgeV)
print("Number of edge bridges:", len(EdgeBridgeV))
#c
ArtNIdV = snap.TIntV()
snap.GetArtPoints(fbsgel, ArtNIdV)
print("Number of articulation points:", len(ArtNIdV))
#d Plot
snap.PlotSccDistr(fbsgel, "connected_comp_" + str(subgraph_name),
Пример #19
0
def main(argv):
    if len(argv) != 1:
        print "usage: python gen-structure.py <path/to/edgelist>"
        sys.exit(0)

    # Q0. Uncomment to generate random5000by6.txt edge list.
    # generate_graph_nx(5000)

    graph_file_path = argv[0]
    graph_file_name = graph_file_path.split('/')[-1]

    print "Current file: {}".format(graph_file_name)

    # g_nx = nx.read_edgelist(graph_file_path)
    g_nx = nx.read_weighted_edgelist(graph_file_path)

    # Q1.a. print the number of nodes in the graph
    print "Number of nodes in {}: {}".format(graph_file_name, g_nx.number_of_nodes())
    # Q1.b print the number of edges in the graph
    print "Number of edges in {}: {}".format(graph_file_name, g_nx.number_of_edges())

    # Q2.a. nx.degree returns a number or a dictionary with nodes as keys and degree as value.
    degree_dict = nx.degree(g_nx)
    nodes_with_degree_1 = filter(lambda k: degree_dict[k] == 1, degree_dict.keys())
    print "Number of nodes with degree = 1 in {}: {}".format(graph_file_name, len(nodes_with_degree_1))

    # Q2.b. find max degree.
    max_degree = 0
    nodes_with_max_degree = []

    for k, v in degree_dict.items():
        if v > max_degree:
            max_degree = v
            nodes_with_max_degree = [k]
        elif v == max_degree:
            nodes_with_max_degree.append(k)

    print "Max Degree is {}".format(max_degree)
    # print "Check: Max Degree is {}".format(sorted(degree_dict.values())[-1])  # sanity check
    print "Node id(s) with highest degree in {}: {}".format(graph_file_name,
                                                            ", ".join(str(i) for i in nodes_with_max_degree))

    # Q2.c. 2-hop Neighbors
    for node in nodes_with_degree_1:
        neighbors = g_nx.neighbors(node)  # nodes in 1 hop. Should just be 1.
        if len(neighbors) > 1:  # Sanity check
            print "Not a node with degree 1!!!"
            continue

        n1 = neighbors[0]

        n2s = g_nx.neighbors(n1)
        sum_degrees_n2s = reduce(lambda acc, d: acc + g_nx.degree(d), n2s, 0)
        avg_degree_n2 = float(sum_degrees_n2s)/len(n2s)

        print "The average degree of {}'s 2-hop neighborhood is: {}".format(node, avg_degree_n2)

    # Using snap for plots.
    g_snap = snap.LoadEdgeList(snap.PUNGraph, graph_file_path)

    # Q2.d Plot the degree distribution
    # snap.PlotOutDegDistr(g_snap, graph_file_name+"-degree_distribution", "Plot of the degree distribution")
    plot_degree_distribution(g_nx, graph_file_name)
    print "Degree distribution of {} is in: {}".format(graph_file_name,
                                                       graph_file_name+"-degree_distribution.png")

    #  added for assignment 2
    print "Approx. diameter in {} ".format(nx.diameter(g_nx))


    # Q3.a. Approximate full diameter (maximum shortest path length)
    full_diameters = []
    for max_size in RANDOM_SIZE_LIST:
        full_diam = snap.GetBfsFullDiam(g_snap, max_size, False)
        full_diameters.append(full_diam)
        print "Approx. diameter in {} with sampling {} nodes: {}".format(graph_file_name,
                                                                         max_size, full_diam)

    print "Approx. diameter in {} (mean and variance): {}, {}.".format(graph_file_name,
                                                                       numpy.mean(full_diameters),
                                                                       numpy.var(full_diameters))

    # Q3.b. Effective Diameter
    effective_diameters = []
    for max_size in RANDOM_SIZE_LIST:
        effective_diam = snap.GetBfsEffDiam(g_snap, max_size, False)
        effective_diameters.append(effective_diam)

        print "Approx. effective diameter in {} with sampling {} nodes: {}".format(graph_file_name,
                                                                                   max_size, effective_diam)

    print "Approx. effective diameter in {} (mean and variance): {}, {}.".format(graph_file_name,
                                                                                 numpy.mean(effective_diameters),
                                                                                 numpy.var(effective_diameters))

    # Q3.c. Plot distribution of shortest path lengths
    snap.PlotShortPathDistr(g_snap, graph_file_name+"-shortest_path_distribution",
                            "Plot of the distribution of shortest path lengths")
    print "Shortest path distribution of {} is in: {}".format(graph_file_name,
                                                              "diam."+graph_file_name+"-shortest_path_distribution.png")

    # Q4.a. Fraction of nodes in the largest connected component.
    num_nodes_largest_comp_gnx = 0
    connected_components_gnx = sorted(nx.connected_components(g_nx), key=len, reverse=True)

    if len(connected_components_gnx) > 0:
        num_nodes_largest_comp_gnx = len(connected_components_gnx[0])

    frac_largest_comp_gnx = float(num_nodes_largest_comp_gnx)/g_nx.number_of_nodes()

    print "Fraction of nodes in largest connected component in {}: {}".format(graph_file_name, frac_largest_comp_gnx)

    # Q4.b. Fraction of nodes in the largest connected component of the complement of the real graph
    num_nodes_largest_comp_gnxc = 0
    g_nx_c = nx.complement(g_nx, "g_nx_c")
    # Sort the connected components based on size
    connected_components_gnxc = sorted(nx.connected_components(g_nx_c), key=len, reverse=True)

    if len(connected_components_gnxc) > 0:
        num_nodes_largest_comp_gnxc = len(connected_components_gnxc[0])

    frac_largest_comp_gnxc = float(num_nodes_largest_comp_gnxc) / g_nx_c.number_of_nodes()

    print "Fraction of nodes in largest connected component in {}'s complement: {}".format(graph_file_name,
                                                                                           frac_largest_comp_gnxc)

    # Q4.c. Plot of the distribution of sizes of connected components.
    plot_distribution_of_connected_components(connected_components_gnx, graph_file_name)

    print "Component size distribution of {} is in: {}".format(graph_file_name,
                                                               graph_file_name + "-scc_distribution.png")

    plot_distribution_of_connected_components(connected_components_gnxc,
                                              graph_file_name + "_complement")

    print "Component size distribution of the complement of {} is in: {}".format(
        graph_file_name,
        graph_file_name + "_complement-scc_distribution.png")