Exemplo n.º 1
0
def gen_D(Pi, V_exo, theta2):
    """
    Returns a triplet of three snap graphs:
    D = opportunity graph with robust links removed.
    Pi_minus = subgraph of Pi without robustly absent potential links.
    Pi_exo = subgraph of Pi with only robust links.

    NB: This function is specific to the joint surplus used in our simulations.

    Pi = opportunity graph (in our case, the output of gen_RGG).
    V_exo = 'exogenous' part of joint surplus (output of gen_V_exo).
    theta2 = transitivity parameter (theta[2]).
    """
    N = V_exo.shape[0]
    D = snap.ConvertGraph(snap.PUNGraph, Pi)
    Pi_minus = snap.ConvertGraph(snap.PUNGraph, Pi)
    Pi_exo = snap.GenRndGnm(snap.PUNGraph, N, 0)

    for edge in Pi.Edges():
        i = min(edge.GetSrcNId(), edge.GetDstNId())
        j = max(edge.GetSrcNId(), edge.GetDstNId())
        if V_exo[i, j] + min(theta2, 0) > 0:
            D.DelEdge(i, j)
            Pi_exo.AddEdge(i, j)
        if V_exo[i, j] + max(theta2, 0) <= 0:
            D.DelEdge(i, j)
            Pi_minus.DelEdge(i, j)

    return (D, Pi_minus, Pi_exo)
Exemplo n.º 2
0
def tuntoall():
    FIn = snap.TFIn(NW.twitter_binary)
    G = snap.TUNGraph.Load(FIn)

    t0 = t()
    # convert undirected graph to directed
    GOut = snap.ConvertGraph(snap.PNGraph, G)
    t1 = reportTime(t0, "convert TUNGRAPH to TNGRAPH")

    # convert directed graph to a network
    GOut = snap.ConvertGraph(snap.PNEANet, G)
    reportTime(t1, "convert TUNGRAPH to TNEANet")
Exemplo n.º 3
0
def get_connected_component(graph):
    if isinstance(graph, snap.PNGraph):
        lcc = snap.GetMxScc(graph)
        # renumber the node numbers from 0 to the size-1
        lcc = snap.ConvertGraph(snap.PNGraph, lcc, True)
    elif isinstance(graph, snap.PUNGraph):
        lcc = snap.GetMxWcc(graph)
        # renumber the node numbers from 0 to the size-1
        lcc = snap.ConvertGraph(snap.PUNGraph, lcc, True)
    else:
        raise NotAGraphError(graph)
    return lcc
Exemplo n.º 4
0
def main ():
    import json
    import snap
    import graphviz
    import matplotlib.pyplot as plt
    import numpy as np
    import xlrd
    #-----------------
    #The common area
    rumor_number = "21"

    path_input = 'D:\\Papers\\Social Network Mining\\Analysis_of_Rumor_Dataset\\Step 18\\Rumor_'+ rumor_number +'\\Input\\'
    workbook_input1_D = xlrd.open_workbook(path_input + 'DATASET.xlsx', on_demand = True)
    
    path_jsonl = 'D:\\Papers\\Social Network Mining\\Analysis_of_Rumor_Dataset\\Step 18\\Rumor_'+ rumor_number +'\\Input\\Rumor_' + rumor_number + '.jsonl'
    path_graph = 'D:\\Papers\\Social Network Mining\\Analysis_of_Rumor_Dataset\\Step 18\\Rumor_'+ rumor_number +'\\Input\\Rumor_' + rumor_number + '.graph'

    path_output  = 'D:\\Papers\\Social Network Mining\\Analysis_of_Rumor_Dataset\\Step 18\\Rumor_'+ rumor_number +'\\Output\\'

    FIn = snap.TFIn(path_graph)
    G_Directed = snap.TNGraph.Load(FIn)        
    G_Directed_with_Attributes = snap.ConvertGraph(snap.PNEANet, G_Directed) #Convert Directed Graph to Directed Graph with attributes: it means now we can assign attributes to the graph nodes
    G_Directed_with_Attributes = Get_Graph_with_Attributes_New (path_jsonl, G_Directed_with_Attributes, workbook_input1_D)
    #-----------------
    #The specific area
    snap.PrintInfo(G_Directed_with_Attributes, "Python type PNEANet", path_output + "S18_5_Output.txt", False)
Exemplo n.º 5
0
def set_degree_proportional_thresholds(graph, value):
    g = snap.ConvertGraph(snap.PNEANet, graph)
    print("Number of graph nodes: ", g.GetNodes())
    for n in g.Nodes():
        g.AddIntAttrDatN(n.GetId(),
                         math.floor(n.GetDeg() * value) + 1, "threshold")
    return g
Exemplo n.º 6
0
def set_random_threshold(graph):
    g = snap.ConvertGraph(snap.PNEANet, graph)
    for n in g.Nodes():
        max = n.GetDeg() + int((n.GetDeg() / 100) * 20 + 1)
        random_value = random.randint(0, max)
        g.AddIntAttrDatN(n.GetId(), random_value, "threshold")
        #print("Threshold of the node ", n.GetId()," with value", g.GetIntAttrDatN(n.GetId(),"threshold"))
    return g
Exemplo n.º 7
0
def transform_directed_to_undirected():
    GUn = snap.ConvertGraph(snap.PUNGraph, G)
    snap.PrintInfo(GUn, "Tweets UN stats", "Tweets_UN_info.txt", False)
    f = open('Tweets_UN_info.txt', 'r')
    file_contents = f.read()
    #print(file_contents)
    f.close()
    return GUn
def proportional_to_the_degree_threshold_assignment(g):
    g = snap.ConvertGraph(snap.PNEANet, g)
    for n in g.Nodes():
        deg = n.GetDeg()
        value = 5
        if deg > 0:
            value = int((1 / (deg + value)) * (g.GetEdges() / g.GetNodes()))
        g.AddIntAttrDatN(n.GetId(), value, "threshold")
    return g
    def join_subgraphs_EB(subgraph1, subgraph2, nmE, nmN):
        c = snap.ConvertGraph(type(subgraph2), subgraph2)

        if nmN:
            c.AddNode(nmN)

        c.AddEdge(nmE[0], nmE[1])

        return c
Exemplo n.º 10
0
def snowball_sample(G, num_waves, seeds):
    """
    Parameters:
      G - SNAP graph or network to sample frpm
      num_waves - number of snowball waves 
      seeds - SNAP vector (TIntV) of seeds (node ids) to start snowball sample 
             from
    
    Return value:
      SNAP network (TNEANet) snowball sampled from G with each node having 
      an integer "zone" attribute for snowball sampling zone 
       (0=seed, 1=first wave, etc.)
      [TNEANet needed to allow zone attribute, not actually using multigraph 
       capability].

    Note directions on directed graph are ignored - can sample in undirected
    or directed graph.
    """
    assert (len(seeds) == len(set(seeds)))  # no duplicate node ids
    # It seems like GetSubGraph does not preserve node attributse
    # so instead of adding attributes ot nodes on N, make a Python
    # dictionary mapping node ids to zone and then add them back
    # ass attributes on the subgraph (node ids are preserved so we
    # can do this)
    zonedict = dict()  # map nodeid : zone
    N = snap.ConvertGraph(snap.PNEANet, G)  # copy graph/network G to network N
    nodes = set(seeds)  # will accumulate all nodes (including seeds) here
    for seed in seeds:
        zonedict[seed] = 0  # seed nodes are zone 0
    newNodes = set(nodes)
    for i in range(num_waves):
        wave = i + 1
        #print 'wave',wave
        for node in set(newNodes):
            neighbours = snap.TIntV()
            snap.GetNodesAtHop(G, node, 1, neighbours,
                               False)  # neighbours of node
            newNeighbours = set(
                neighbours) - nodes  # neighbours that are not already in nodes
            for node in newNeighbours:
                if not zonedict.has_key(node):
                    zonedict[node] = wave
            newNodes.update(
                newNeighbours
            )  # newNodes gets set union of itslf and newNeighbours
        nodes.update(newNodes)
    # have to convert nodes set into TIntV for use in SNAP
    NodeVec = snap.TIntV()
    for node in nodes:
        NodeVec.Add(node)
    sampleN = snap.GetSubGraph(N, NodeVec)
    # now put the zones as attributes on the subgraph nodes (which depends
    # on nodeids being preserved in the subgraph)
    sampleN.AddIntAttrN("zone", -1)  # add zone attribute init to -1
    for (nodeid, zone) in zonedict.iteritems():
        sampleN.AddIntAttrDatN(nodeid, zone, "zone")
    return sampleN
Exemplo n.º 11
0
def getEdgeBridges(network):
    UGraph = snap.ConvertGraph(snap.PUNGraph, network)

    EdgeV = snap.TIntPrV()
    snap.GetEdgeBridges(UGraph, EdgeV)

    for edge in EdgeV:
        print("edge: (%d, %d)" % (edge.GetVal1(), edge.GetVal2()))
    print(len(EdgeV))
    return EdgeV
Exemplo n.º 12
0
def estimate4SubgraphFrequencies(Network, connected=True):
    subgraph_counts = np.zeros(10)
    # 0 -> 0 edges
    # 1 -> 1 edge
    # 2 -> 2 adjacent edges
    # 3 -> 2 non-adjacent edges
    # 4 -> 3-star
    # 5 -> 3-path
    # 6 -> tailed triangle
    # 7 -> 4-cycle
    # 8 -> chordal 4-cycle
    # 9 -> 4-clique

    G = snap.ConvertGraph(snap.PUNGraph, Network)

    for _ in range(num_samples):
        sG = snap.GetRndSubGraph(G, 4)
        num_edges = sG.GetEdges()
        if connected and num_edges < 3:
            continue

        if num_edges == 0:
            subgraph_counts[0] += 1

        elif num_edges == 1:
            subgraph_counts[1] += 1

        elif num_edges == 2:
            maxdeg = sG.GetNI(snap.GetMxDegNId(sG)).GetDeg()
            if maxdeg == 2:
                subgraph_counts[2] += 1
            else:
                subgraph_counts[3] += 1

        elif num_edges == 3:
            maxdeg = sG.GetNI(snap.GetMxDegNId(sG)).GetDeg()
            if maxdeg == 3:
                subgraph_counts[4] += 1
            else:
                subgraph_counts[5] += 1

        elif num_edges == 4:
            maxdeg = sG.GetNI(snap.GetMxDegNId(sG)).GetDeg()
            if maxdeg == 3:
                subgraph_counts[6] += 1
            else:
                subgraph_counts[7] += 1

        elif num_edges == 5:
            subgraph_counts[8] += 1

        else:
            subgraph_counts[9] += 1

    return list(subgraph_counts / sum(subgraph_counts))
def community_detection(input, output):
    print("Loading graph...")
    FIn = snap.TFIn(input)
    graph = snap.TNGraph.Load(FIn)

    ugraph = snap.ConvertGraph(snap.PUNGraph, graph)

    print("Performing community detection...")
    CmtyV = snap.TCnComV()
    modularity = snap.CommunityCNM(ugraph, CmtyV)
    print("Modularity:", modularity)

    with open(output, "w") as file:
        for Cmty in CmtyV:
            file.write(repr([NI for NI in Cmty]))
            file.write("\n")
Exemplo n.º 14
0
def deferred_decision(G, probs, dist):
    graph = snap.ConvertGraph(snap.PUNGraph, G)

    for e in graph.Edges():

        if dist == 'uniform':
            x = np.random.uniform()
        else:
            x = np.random.normal()

        src = e.GetSrcNId()
        dst = e.GetDstNId()

        if x < probs[(src, dst)]:
            graph.DelEdge(src, dst)

    return graph
Exemplo n.º 15
0
def set_median_threshold(graph):
    g = snap.ConvertGraph(snap.PNEANet, graph)
    data = []
    print("Number of graph nodes: ", g.GetNodes())
    count = 0
    for n in g.Nodes():
        data.append(n.GetDeg())
    value = median(data)
    print("The median value is: ", value)
    for n in g.Nodes():
        g.AddIntAttrDatN(n.GetId(), value, "threshold")
        print("Threshold of the node ", n.GetId(), " with value ",
              g.GetIntAttrDatN(n.GetId(), "threshold"))
        if n.GetDeg() < value:
            count += 1
    print("Number of nodes below the median: ", count)
    return g
Exemplo n.º 16
0
def visualiseGraph(rowData, activityCodeList, fileName, title, undirect_conversion=False):
    columnList = generateTransition(activityCodeList)
    G1 = snap.TNGraph.New()
    checkActivityList = []
    for i in columnList:
        if i[1] in rowData.index:
            if rowData[i[1]] > 0:
                if i[0][0] not in checkActivityList:
                    G1.AddNode(i[0][0])
                    checkActivityList.append(i[0][0])
                if i[0][1] not in checkActivityList:
                    G1.AddNode(i[0][1])
                    checkActivityList.append(i[0][1])
                G1.AddEdge(i[0][0],i[0][1])
        
    if undirect_conversion:
        G1 = snap.ConvertGraph(snap.PUNGraph,G1)
    snap.DrawGViz(G1, snap.gvlDot, "graphs/" + "/" + fileName + ".png", title, True)
Exemplo n.º 17
0
def estimate3SubgraphFrequencies(Network):
    G = snap.ConvertGraph(snap.PNGraph, Network)

    subgraph_counts = np.zeros(7)
    # 0 -> 0 edges
    # 1 -> 1 edge
    # 2 -> 2 edges to same node
    # 3 -> 2 edges from same node
    # 4 -> 2 edges though one node
    # 5 -> 3 edge cycle
    # 6 -> 3 edge, not cycle

    for _ in range(num_samples):
        sG = snap.GetRndSubGraph(G, 3)
        num_edges = sG.GetEdges()

        if num_edges == 0:
            subgraph_counts[0] += 1

        elif num_edges == 1:
            subgraph_counts[1] += 1

        elif num_edges == 2:
            max_indeg = sG.GetNI(snap.GetMxInDegNId(sG)).GetInDeg()
            max_outdeg = sG.GetNI(snap.GetMxOutDegNId(sG)).GetOutDeg()
            if max_indeg == 2:
                subgraph_counts[2] += 1
            elif max_outdeg == 2:
                subgraph_counts[3] += 1
            else:
                subgraph_counts[4] += 1

        else:
            max_indeg = sG.GetNI(snap.GetMxInDegNId(sG)).GetInDeg()
            if max_indeg == 1:
                subgraph_counts[5] += 1
            else:
                subgraph_counts[6] += 1

    return list(subgraph_counts / sum(subgraph_counts))
Exemplo n.º 18
0
def girvin_neuman_profile_extract(rowData, activityCodeList, index,week):
    columnList = generateTransition(activityCodeList)
    G1 = snap.TNGraph.New()
    checkActivityList = []
    # for node1 in activityCodeList:
    #     for node2 in activityCodeList:
    #         a = node1[1] + '-' + node2[1]
    #         if a in rowData.index:
    #             if node1[0] not in checkActivityList:
    #                 G1.AddNode(node1[0])
    #                 checkActivityList.append(node1[0])
    #             if node2[0] not in checkActivityList:
    #                 G1.AddNode(node2[0])
    #                 checkActivityList.append(node2[0])
    for i in columnList:
        if i[1] in rowData.index:
            if rowData[i[1]] > 0:
                if i[0][0] not in checkActivityList:
                    G1.AddNode(i[0][0])
                    checkActivityList.append(i[0][0])
                if i[0][1] not in checkActivityList:
                    G1.AddNode(i[0][1])
                    checkActivityList.append(i[0][1])
                G1.AddEdge(i[0][0],i[0][1])
    G1_undirect = snap.ConvertGraph(snap.PUNGraph,G1)
    # snap.DrawGViz(G1_undirect, snap.gvlDot, "graphs/week/" + str(week) + "/" + index + ".png", index)
    CmtyV = snap.TCnComV()
    modularity = snap.CommunityGirvanNewman(G1_undirect, CmtyV)
    noOfCluster = len(CmtyV)
    clusterList = []
    for Cmty in CmtyV:
        community = []
        for NI in Cmty:
            community.append(NI)
        clusterList.append(community)
        
    return [index, modularity, noOfCluster, clusterList]
Exemplo n.º 19
0
#def getCentralities(network):
network = loadGraph()
nameToNId = {}
uIdToNId = {}
for n in network.Nodes():
    id = n.GetId()
    nameToNId[network.GetStrAttrDatN(id, 'name').decode('utf-8')] = id
infile = codecs.open('csv/dblpusersaff.csv', 'r', 'utf-8')
lines = infile.read().splitlines()
infile.close()
for line in lines:
    tokens = line.split('||')
    if tokens[2] != '':
        nId = nameToNId[tokens[1]]
        uIdToNId[int(tokens[0])] = nId
graph = snap.ConvertGraph(snap.PUNGraph, network)
degCenters = {}
closeCenters = {}
pageRanks = snap.TIntFltH()
eigenCenters = snap.TIntFltH()
# btwnCenters = snap.TIntFltH()
# edgeHash = snap.TIntPrFltH()
print('Running PageRank...')
snap.GetPageRank(graph, pageRanks)
print('Running Eigenvector centrality...')
snap.GetEigenVectorCentr(graph, eigenCenters)
# print('Running Betweeness...')
# snap.GetBetweennessCentr(graph, btwnCenters, edgeHash)
print('Running Degree and Closeness...')
for uId, nId in uIdToNId.iteritems():
    print uId, nId
Exemplo n.º 20
0
def set_fixed_threshold(graph, value):
    g = snap.ConvertGraph(snap.PNEANet, graph)
    for n in g.Nodes():
        g.AddIntAttrDatN(n.GetId(), value, "threshold")
    return g
Exemplo n.º 21
0
        for x in parsed:
            vid = x.videoid
            for v in list(x.related) + [vid]:
                if v not in self.nodeid:
                    self.nodeid[v] = self.size
                    self.videoid[self.size] = v
                    self.size += 1


#filenames = [ "0301/{}.txt".format(i) for i in range(0, 4) ]
#data = Data(filenames)
#graph = make_graph(data)
#save_graph_data(data, graph, "try")
data, graph = load_graph_data("try")
Graph = snap.ConvertGraph(snap.PUNGraph, graph)

NId1 = snap.GetMxDegNId(Graph)
NIdToDistH = snap.TIntH()
shortestPath = snap.GetShortPath(Graph, NId1, NIdToDistH)
shortestDist = {}
for item in NIdToDistH:
    shortestDist[item] = NIdToDistH[item]

PRankH = snap.TIntFltH()
snap.GetPageRank(Graph, PRankH)

simRanks = {}


def simRank(Graph, nIters, gamma):
Exemplo n.º 22
0
def algorithm(G, D):
    #Pruning Step
    P = 1
    T = 0
    while P == 1:
        P = 0
        for NI in G.Nodes():
            NID = NI.GetId()
            d = NI.GetDeg()
            if d <= D or d > G.GetNodes() - 2:
                if d <= D and d > 1:
                    for i in range(d - 1):
                        for j in range(i + 1, d):
                            a = NI.GetNbrNId(i)
                            b = NI.GetNbrNId(j)
                            if G.IsEdge(a, b):
                                T = T + 1
                if d > D and d > G.GetNodes() - 2:
                    T = T + G.GetEdges() - NI.GetDeg()
                P = 1
                G.DelNode(NID)
#Hierarchical Clustering Step
    if G.GetNodes() > 5:
        H = snap.ConvertGraph(type(G), G)
        S = []
        i = 0
        while H.GetNodes() > 0:
            S.append([])
            S[i].append(snap.GetMxDegNId(H))
            j = 1
            TTT = True
            while TTT:
                s = snap.TIntV()
                snap.GetNodesAtHop(H, S[i][0], j, s, True)
                if len(s) != 0:
                    S[i].append(s)
                    j = j + 1
                else:
                    TTT = False
            H.DelNode(S[i][0])
            for j in range(1, len(S[i])):
                for nodeID in S[i][j]:
                    H.DelNode(nodeID)
            i = i + 1
        subgraphs = [[] for x in range(len(S))]
        #Counting Step
        for i in range(len(S)):
            for j in range(1, len(S[i])):
                G01 = snap.ConvertSubGraph(snap.PUNGraph, G, S[i][j])
                subgraphs[i].append(G01)
            T = T + subgraphs[i][0].GetEdges()
            G.DelNode(S[i][0])
        for i in range(len(S)):
            for j in range(1, len(S[i])):
                for upnodeID in S[i][j]:
                    U = []
                    D = []
                    for t in range(G.GetNI(upnodeID).GetDeg()):
                        a = G.GetNI(upnodeID).GetNbrNId(t)
                        if j < len(S[i]) - 1:
                            if subgraphs[i][j].IsNode(a):
                                U.append(a)
                        if j > 1:
                            if subgraphs[i][j - 2].IsNode(a):
                                D.append(a)
                    for s in range(len(U)):
                        for t in range(s + 1, len(U)):
                            if subgraphs[i][j].IsEdge(U[s], U[t]):
                                T = T + 1
                    for s in range(len(D)):
                        for t in range(s + 1, len(D)):
                            if subgraphs[i][j - 2].IsEdge(D[s], D[t]):
                                T = T + 1
        for i in range(len(S)):
            for j in range(len(S[i]) - 1):
                T = T + algorithm(subgraphs[i][j], D)

    return T
Exemplo n.º 23
0
import snap
import sys

# Simple script to re-index to 0-indexed graph.

graph = sys.argv[1]
if len(sys.argv) > 2 and sys.argv[2] == 1:
    Gin = snap.LoadEdgeList(snap.PUNGraph, graph)
else:
    Gin = snap.LoadEdgeList(snap.PNGraph, graph)
MxScc = snap.GetMxScc(Gin)
Gout = snap.ConvertGraph(snap.PNGraph, MxScc, True)
print 'Number of nodes: ', Gout.GetNodes()
print 'Number of edges: ', Gout.GetEdges()
snap.SaveEdgeList(Gout, graph)
def convert_to_undirected(in_Graph):
    return snap.ConvertGraph(snap.PUNGraph, in_Graph)
Exemplo n.º 25
0
import snap
import sys

# Simple script to re-index to 0-indexed graph.

graph = sys.argv[1]
if len(sys.argv) > 2 and sys.argv[2] == 2:
    Gin = snap.LoadEdgeList(snap.PUNGraph, graph)
else:
    Gin = snap.LoadEdgeList(snap.PNGraph, graph)

Gout = snap.ConvertGraph(snap.PNGraph, Gin, True)
print 'Number of nodes: ', Gout.GetNodes()
print 'Number of edges: ', Gout.GetEdges()
snap.SaveEdgeList(Gout, graph)
Exemplo n.º 26
0
def convert_undirected(G1):
    G2 = snap.ConvertGraph(snap.PUNGraph, G1)
    return G2
Exemplo n.º 27
0
G2.Save(FOut)
FOut.Flush()
FIn = snap.TFIn("test.graph")
G4 = snap.TNGraph.Load(FIn)
print "G4: Nodes %d, Edges %d" % (G4.GetNodes(), G4.GetEdges())

# save and load from a text file
snap.SaveEdgeList(G4, "test.txt", "Save as tab-separated list of edges")
G5 = snap.LoadEdgeList(snap.PNGraph, "test.txt", 0, 1)
print "G5: Nodes %d, Edges %d" % (G5.GetNodes(), G5.GetEdges())

# create a directed random graph on 10k nodes and 5k edges
G6 = snap.GenRndGnm(snap.PNGraph, 10000, 5000)
print "G6: Nodes %d, Edges %d" % (G6.GetNodes(), G6.GetEdges())
# convert to undirected graph
G7 = snap.ConvertGraph(snap.PUNGraph, G6)
print "G7: Nodes %d, Edges %d" % (G7.GetNodes(), G7.GetEdges())
# get largest weakly connected component
WccG = snap.GetMxWcc(G6)

# generate a network using Forest Fire model
G8 = snap.GenForestFire(1000, 0.35, 0.35)
print "G8: Nodes %d, Edges %d" % (G8.GetNodes(), G8.GetEdges())

# get a subgraph induced on nodes {0,1,2,3,4}
SubG = snap.GetSubGraph(G8, snap.TIntV.GetV(0, 1, 2, 3, 4))

# get 3-core of G8
Core3 = snap.GetKCore(G8, 3)
print "Core3: Nodes %d, Edges %d" % (Core3.GetNodes(), Core3.GetEdges())
Exemplo n.º 28
0
import snap
import sys

'''
Simple script to get maximal bi-connected component.
'''

graph = sys.argv[1]
Gin = snap.LoadEdgeList(snap.PNGraph, graph)
BiCon = snap.GetMxBiCon(Gin)
Gout = snap.ConvertGraph(snap.PNGraph, BiCon, True)
print 'Number of nodes: ', Gout.GetNodes()
print 'Number of edges: ', Gout.GetEdges()
out_graph = graph.split('.txt')[0] + '-bicon.txt'
snap.SaveEdgeList(Gout, out_graph)

Exemplo n.º 29
0
def intro():

    # create a graph PNGraph
    G1 = snap.TNGraph.New()
    G1.AddNode(1)
    G1.AddNode(5)
    G1.AddNode(32)
    G1.AddEdge(1, 5)
    G1.AddEdge(5, 1)
    G1.AddEdge(5, 32)
    print("G1: Nodes %d, Edges %d" % (G1.GetNodes(), G1.GetEdges()))

    # create a directed random graph on 100 nodes and 1k edges
    G2 = snap.GenRndGnm(snap.PNGraph, 100, 1000)
    print("G2: Nodes %d, Edges %d" % (G2.GetNodes(), G2.GetEdges()))

    # traverse the nodes
    for NI in G2.Nodes():
        print("node id %d with out-degree %d and in-degree %d" %
              (NI.GetId(), NI.GetOutDeg(), NI.GetInDeg()))
    # traverse the edges
    for EI in G2.Edges():
        print("edge (%d, %d)" % (EI.GetSrcNId(), EI.GetDstNId()))

    # traverse the edges by nodes
    for NI in G2.Nodes():
        for Id in NI.GetOutEdges():
            print("edge (%d %d)" % (NI.GetId(), Id))

    # generate a network using Forest Fire model
    G3 = snap.GenForestFire(1000, 0.35, 0.35)
    print("G3: Nodes %d, Edges %d" % (G3.GetNodes(), G3.GetEdges()))

    # save and load binary
    FOut = snap.TFOut("test.graph")
    G3.Save(FOut)
    FOut.Flush()
    FIn = snap.TFIn("test.graph")
    G4 = snap.TNGraph.Load(FIn)
    print("G4: Nodes %d, Edges %d" % (G4.GetNodes(), G4.GetEdges()))

    # save and load from a text file
    snap.SaveEdgeList(G4, "test.txt", "Save as tab-separated list of edges")
    G5 = snap.LoadEdgeList(snap.PNGraph, "test.txt", 0, 1)
    print("G5: Nodes %d, Edges %d" % (G5.GetNodes(), G5.GetEdges()))

    # generate a network using Forest Fire model
    G6 = snap.GenForestFire(1000, 0.35, 0.35)
    print("G6: Nodes %d, Edges %d" % (G6.GetNodes(), G6.GetEdges()))
    # convert to undirected graph
    G7 = snap.ConvertGraph(snap.PUNGraph, G6)
    print("G7: Nodes %d, Edges %d" % (G7.GetNodes(), G7.GetEdges()))
    # get largest weakly connected component of G
    WccG = snap.GetMxWcc(G6)
    # get a subgraph induced on nodes {0,1,2,3,4,5}
    SubG = snap.GetSubGraph(G6, snap.TIntV.GetV(0, 1, 2, 3, 4))
    # get 3-core of G
    Core3 = snap.GetKCore(G6, 3)
    # delete nodes of out degree 10 and in degree 5
    snap.DelDegKNodes(G6, 10, 5)
    print("G6a: Nodes %d, Edges %d" % (G6.GetNodes(), G6.GetEdges()))

    # generate a Preferential Attachment graph on 1000 nodes and node out degree of 3
    G8 = snap.GenPrefAttach(1000, 3)
    print("G8: Nodes %d, Edges %d" % (G8.GetNodes(), G8.GetEdges()))
    # vector of pairs of integers (size, count)
    CntV = snap.TIntPrV()
    # get distribution of connected components (component size, count)
    snap.GetWccSzCnt(G8, CntV)
    # get degree distribution pairs (degree, count)
    snap.GetOutDegCnt(G8, CntV)
    # vector of floats
    EigV = snap.TFltV()
    # get first eigenvector of graph adjacency matrix
    snap.GetEigVec(G8, EigV)
    # get diameter of G8
    snap.GetBfsFullDiam(G8, 100)
    # count the number of triads in G8, get the clustering coefficient of G8
    snap.GetTriads(G8)
    snap.GetClustCf(G8)
Exemplo n.º 30
0
def main():

    # Load data
    nodes = pd.read_csv("../data/nodes.csv", sep='\t', index_col=0)

    # Data in nice form
    headers = list(nodes.columns)
    nodes = np.asarray(nodes)

    # Load social network accordingly
    if path.exists("../data/youtube.graph"):
        FIn = snap.TFIn("../data/youtube.graph")
        social_network = snap.TNGraph.Load(FIn)
    else:
        edges = pd.read_csv("../data/edges.csv", sep='\t', index_col=0)
        edges = np.asarray(edges).astype(int)
        social_network = data2dag(edges, nodes.shape[0])

    # Check for self edges
    for e in social_network.Edges():
        if e.GetSrcNId() == e.GetDstNId():
            print("Self Loop Found:", e.GetSrcNId())

    # CNM Algorithm from snap.py
    print("Computing CNM")
    start = timeit.default_timer()
    CmtyV = snap.TCnComV()
    undirected = snap.ConvertGraph(snap.PUNGraph, social_network)
    snap.DelSelfEdges(undirected)
    the_modularity = snap.CommunityCNM(undirected, CmtyV)
    stop = timeit.default_timer()
    node_to_cmty = np.zeros(nodes.shape[0])
    cmty_sizes = np.zeros(len(CmtyV))
    for i in range(len(CmtyV)):
        for node in CmtyV[i]:
            node_to_cmty[node] = i
        cmty_sizes[i] = len(CmtyV[i])
    cmtys = [[node for node in cmty] for cmty in CmtyV]
    '''
  edges = pd.read_csv("../data/edges.csv", sep='\t', index_col=0)
  edges = np.asarray(edges).astype(int)
  G = nx.Graph()
  G.add_nodes_from(range(nodes.shape[0]))
  G.add_edges_from(list(map(tuple, edges)))
  '''

    #assert(is_partition(G, cmtys))

    #print("Calculating Modularity")
    #modul = modularity(G, cmtys)
    print("Results from Clauset-Newman-Moore:")
    #print("Modularity:",modul)
    print("Number of clusters:", len(CmtyV))
    print("Time elapsed:", stop - start)

    # Fun category stuff to do
    upload_col = headers.index('category')
    categories = set()
    for i in range(nodes.shape[0]):
        categories.add(nodes[i][upload_col])
    idx_to_categories = list(categories)
    print("Number of categories:", len(idx_to_categories))
    categories_to_idx = dict()
    for i in range(len(idx_to_categories)):
        categories_to_idx[idx_to_categories[i]] = i

    # Communities and categories
    cmty_category_count = np.zeros((len(CmtyV), len(idx_to_categories)))
    for i in range(nodes.shape[0]):
        cmty_category_count[int(node_to_cmty[i]),
                            categories_to_idx[nodes[i][upload_col]]] += 1
    cmty_category_count = cmty_category_count / cmty_sizes[:, np.newaxis]

    # Create graphs per category
    plt.figure()
    plt.plot(sorted(np.max(cmty_category_count, axis=1), reverse=True),
             label="Top proportion")
    plt.plot(0.5 * np.ones(cmty_category_count.shape[0]),
             label="Majority Threshold",
             linestyle='dashed')
    plt.title("Category Proportions in Clusters")
    plt.xlabel("Cluster")
    plt.ylabel("Proportion")
    plt.legend()
    plt.savefig("../figures/category_top_clusters.png")
    '''
  for i in range(cmty_category_count.shape[0]):
    top_category = np.argmax(cmty_category_count[i])
    print("Community "+str(i)+": "+str(idx_to_categories[top_category])+",",cmty_category_count[i][top_category])
  '''
    '''