Пример #1
0
    def __init__(self, filename, mVals, pVal, tau):
        self.fileName = fileName
        self.mVals = mVals
        self.pVal = pVal
        self.pVals = []
        self.pVals.append(pVal)
        self.pVals.append(1 - pVal)
        self.tau = tau
        self.nLH = snap.TIntStrH()
        self.lblNH = snap.TStrIntH()  # Node count with attached label
        self.lblEH = snap.TIntIntH()  # Edge count with attached src dst labels

        self.RH = snap.TIntFltPrH()
        self.BH = snap.TIntFltPrH()

        self.cRV = snap.TIntV()
        self.cBV = snap.TIntV()

        self.G = self.getGraph(snap.PUNGraph)
        self.NG = snap.TNEANet()
        self.graphName = self.getGraphName()
        self.rootDir = self.getParentDir(self.fileName)
        self.absrootDir = os.path.abspath(self.rootDir)

        self.cR_count = 0
        self.cB_count = 0

        self.RH_count = 0
        self.BH_count = 0
Пример #2
0
 def generate_scores(self):
     scores = {}
     common_neighbor_scores = {}
     for e in self.g.Edges():
         # common_neighbor_scores[(e.GetSrcNId(), e.GetDstNId())] = snap.GetCmnNbrs(self.g, e.GetSrcNId(), e.GetDstNId())
         n1 = snap.TIntV()
         n2 = snap.TIntV()
         snap.GetNodesAtHop(self.g, e.GetSrcNId(), 1, n1, True)
         snap.GetNodesAtHop(self.g, e.GetDstNId(), 1, n2, True)
         common_neighbor_scores[(e.GetSrcNId(), e.GetDstNId())] = len(set(n1) & set(n2))
     Nodes = snap.TIntFltH()
     Edges = snap.TIntPrFltH()
     snap.GetBetweennessCentr(self.g, Nodes, Edges, self.node_frac, True)
     edge_betweenness_scores = {}
     for e in Edges:
         edge_betweenness_scores[(e.GetVal1(), e.GetVal2())] = Edges[e]
     max_cn = max(common_neighbor_scores.values())
     max_eb = max(edge_betweenness_scores.values())
     print(common_neighbor_scores)
     print(edge_betweenness_scores)
     for e in self.g.Edges():
         src = e.GetSrcNId()
         dst = e.GetDstNId()
         scores[(src, dst)] = self.l * common_neighbor_scores[(src,dst)] / max_cn + (1-self.l) * edge_betweenness_scores[(src,dst)] / max_eb
     return scores
Пример #3
0
def CN_similarity_max(graph, player, dic_path, n=5, directed=True):
    """Find the n most similar players to player by CN metrics"""
    with open(dic_path, 'rb') as dic_id:
        mydict = pickle.load(dic_id)
        player_id = mydict[player]

        player_neighbor = snap.TIntV()
        snap.GetNodesAtHop(graph, player_id, 1, player_neighbor, directed)

        player_cn = {}
        for node in graph.Nodes():
            nodeId = node.GetId()
            if nodeId != player_id:
                node_neighbor = snap.TIntV()
                snap.GetNodesAtHop(graph, nodeId, 1, node_neighbor, directed)
                Inter = snap.TIntV()
                player_neighbor.Intrs(node_neighbor, Inter)
                x = Inter.Len()
                player_cn[nodeId] = x

        player_cn = Counter(player_cn)

        print("5 most similar players to {} by CN metrics:".format(player))
        for k, v in player_cn.most_common(n):
            print('{}: {}'.format(
                list(mydict.keys())[list(mydict.values()).index(k)], v))
Пример #4
0
def InitState(taskindex, msglist):

    # the original node is on input
    node = None
    for item in msglist:
        msg = sw.GetMsg(item)
        node = msg["body"]

    ds = {}
    ds["start"] = node
    ds["dist"] = 0
    #ds["count"] = 1

    nnodes = int(sw.GetVar("nodes"))
    Visited = Snap.TIntV(nnodes)
    Snap.ZeroVec(Visited)
    Visited.GetVal(node).Val = 1  # set start node to 1, reset to 0 at the end

    ds["visit"] = Visited

    tsize = sw.GetRange()
    tn = TaskId(node, tsize)

    # send the message
    Vec1 = Snap.TIntV()
    Vec1.Add(node)
    Vec1.Add(taskindex)
    sw.Send(tn, Vec1, swsnap=True)

    return ds
Пример #5
0
def extend_subgraph(G, k, sg, v_ext, node_id, motifs, verbose=False):
    """Recursive function in the ESU algorithm"""
    if len(sg) is k:
        count_iso(G, sg, motifs, verbose)
        return
    while len(v_ext) != 0:
        w = v_ext[0]
        del v_ext[0]
        v_ext_bis = copy.deepcopy(v_ext)
        neighbors = snap.TIntV()
        snap.GetNodesAtHop(G, w, 1, neighbors, False)
        for node in neighbors:
            if not (node in sg):
                if node > node_id:
                    bool = False
                    nb = snap.TIntV()
                    snap.GetNodesAtHop(G, node, 1, nb, False)
                    for n in sg:
                        b = (n in nb)
                        bool = bool | b
                    if not bool:
                        v_ext_bis.append(node)
        sg_bis = copy.deepcopy(sg)
        sg_bis.append(w)
        extend_subgraph(G, k, sg_bis, v_ext_bis, node_id, motifs, verbose)
    return
Пример #6
0
def runRecursive(G, testNodes, k=2):
    dicts = [{} for i in range(k)]

    # Subset
    #print testNodes

    # Now for each iteration, we keep going
    for i in range(k):
        # Handle base case separately
        print "Feature level", i
        counter = 1
        if i == 0:
            for node in testNodes:
                print 'Node', counter
                counter += 1
                dicts[0][node] = getFeatureVecBasic(G, node)

                # Also need to get all of the neighbor feature vecs
                Nbrs = snap.TIntV()
                snap.GetNodesAtHop(G, node, 1, Nbrs, True)
                for nbr in Nbrs:
                    dicts[0][nbr] = getFeatureVecBasic(G, nbr)

                    # And for neighbors' neighbors
                    NbrsNbrs = snap.TIntV()
                    snap.GetNodesAtHop(G, nbr, 1, NbrsNbrs, True)
                    for nbrnbr in NbrsNbrs:
                        dicts[0][nbrnbr] = getFeatureVecBasic(G, nbrnbr)

        # Otherwise, we do it the normal way
        else:
            for node in testNodes:
                print 'Node', counter
                counter += 1
                # Get all of the neighbors of our node
                Nbrs = snap.TIntV()
                snap.GetNodesAtHop(G, node, 1, Nbrs, True)
                sumVec = np.zeros(3**(i))

                # For each neighbor, get mean and sum
                for nbr in Nbrs:
                    sumVec += dicts[i - 1][nbr]

                # Handle edge case
                meanVec = None
                if len(Nbrs) == 0:
                    meanVec = np.zeros(3**(i))
                else:
                    meanVec = sumVec / float(len(Nbrs))

                # Now concatenate
                dicts[i][node] = np.concatenate(
                    (dicts[i - 1][node], meanVec, sumVec))

        # Pickle to save
        with open('feats_' + str(i) + '_v1.pkl', 'wb') as f:
            pickle.dump(dicts[i], f)

    return dicts[-1]
Пример #7
0
def GenGraph(sw):
    """
    generate the graph edges
    """

    # extract the stubs from the args
    # iterate through the input queue and add new items to the stub list

    # taskname = sw.GetName()

    msglist = sw.GetMsgList()
    sw.log.debug("msglist: %s" % msglist)

    Stubs = Snap.TIntV()  # Stubs is an empty vector
    for item in msglist:

        # 1) Get item in msglist

        # 2) Get name of item
        name = sw.GetMsgName(item)

        # 3) Get vector associated with name
        FIn = Snap.TFIn(Snap.TStr(name))
        Vec = Snap.TIntV(FIn)

        # 4) Add vector to Stubs
        Stubs.AddV(Vec)

    # 5) Got all stubs, which is of length msglist


#    # Randomize the items (aka shuffle)
#    Snap.Randomize(Stubs)
#
#    # nodes in each task and the number of tasks
#    tsize = sw.GetRange()
#    ntasks = int(sw.GetVar("gen_tasks"))
#
#    # get edges for a specific task
#    Tasks = Snap.TIntIntVV(ntasks)  # vector of length ntasks containing vectors
#    Snap.AssignEdges(Stubs, Tasks, tsize)
    ntasks = int(sw.GetVar("gen_tasks"))
    seg_bits = int(sw.GetVar('seg_bits'))
    tsize = sw.GetRange()

    Tasks = Snap.TIntVVV(ntasks)
    Stubs = Snap.segment(Stubs, seg_bits)  # segmentize stubs

    # do segmented random edge assignment
    Snap.AssignRandomEdges64(Stubs, Tasks, tsize, seg_bits)

    # desegment results
    Tasks = Snap.desegmentRandomizedEdges(Tasks, seg_bits, tsize)

    # send messages
    for i in xrange(0, Tasks.Len()):
        sw.log.debug("sending task: %d, len: %d" % (i, Tasks.GetVal(i).Len()))
        sw.Send(i, Tasks.GetVal(i), swsnap=True)
Пример #8
0
def snowball_sample(G, num_waves, seeds):
    """
    Parameters:
      G - SNAP graph or network to sample frpm
      num_waves - number of snowball waves 
      seeds - SNAP vector (TIntV) of seeds (node ids) to start snowball sample 
             from
    
    Return value:
      SNAP network (TNEANet) snowball sampled from G with each node having 
      an integer "zone" attribute for snowball sampling zone 
       (0=seed, 1=first wave, etc.)
      [TNEANet needed to allow zone attribute, not actually using multigraph 
       capability].

    Note directions on directed graph are ignored - can sample in undirected
    or directed graph.
    """
    assert (len(seeds) == len(set(seeds)))  # no duplicate node ids
    # It seems like GetSubGraph does not preserve node attributse
    # so instead of adding attributes ot nodes on N, make a Python
    # dictionary mapping node ids to zone and then add them back
    # ass attributes on the subgraph (node ids are preserved so we
    # can do this)
    zonedict = dict()  # map nodeid : zone
    N = snap.ConvertGraph(snap.PNEANet, G)  # copy graph/network G to network N
    nodes = set(seeds)  # will accumulate all nodes (including seeds) here
    for seed in seeds:
        zonedict[seed] = 0  # seed nodes are zone 0
    newNodes = set(nodes)
    for i in range(num_waves):
        wave = i + 1
        #print 'wave',wave
        for node in set(newNodes):
            neighbours = snap.TIntV()
            snap.GetNodesAtHop(G, node, 1, neighbours,
                               False)  # neighbours of node
            newNeighbours = set(
                neighbours) - nodes  # neighbours that are not already in nodes
            for node in newNeighbours:
                if not zonedict.has_key(node):
                    zonedict[node] = wave
            newNodes.update(
                newNeighbours
            )  # newNodes gets set union of itslf and newNeighbours
        nodes.update(newNodes)
    # have to convert nodes set into TIntV for use in SNAP
    NodeVec = snap.TIntV()
    for node in nodes:
        NodeVec.Add(node)
    sampleN = snap.GetSubGraph(N, NodeVec)
    # now put the zones as attributes on the subgraph nodes (which depends
    # on nodeids being preserved in the subgraph)
    sampleN.AddIntAttrN("zone", -1)  # add zone attribute init to -1
    for (nodeid, zone) in zonedict.iteritems():
        sampleN.AddIntAttrDatN(nodeid, zone, "zone")
    return sampleN
def create_weighted_cosponsorship_graph(chamber, session):
    print("Creating weighted cosponsorship graph (wcg)...")
    m = np.load('raw_data/govtrack_cosponsor_temp/m_%s_%s.npy' %
                (chamber, session))
    b = np.load('raw_data/govtrack_cosponsor_temp/b_%s_%s.npy' %
                (chamber, session)).item()
    to_bills = np.load('raw_data/govtrack_cosponsor_temp/to_bills_%s_%s.npy' %
                       (chamber, session)).item()
    g, node_info, id_to_nid = read_bcg(chamber, session)
    edge_weights = {}
    sponsored_bills = {}
    wcg = snap.TUNGraph.New()
    for node in tqdm(node_info, total=len(node_info), position=0):
        if node_info[node]['type'] == 'bill':
            continue
        if not wcg.IsNode(node):
            wcg.AddNode(node)
        connected = snap.TIntV()
        if not g.IsNode(node):
            print("F**K WHY IS %s NOT A NODE" % (node, ))
            continue
        snap.GetNodesAtHop(g, node, 2, connected, False)
        if node in sponsored_bills:
            num_bills = sponsored_bills[node]
        else:
            bills = snap.TIntV()
            snap.GetNodesAtHop(g, node, 1, bills, False)
            num_bills = len(bills)
            sponsored_bills[node] = num_bills
        for node2 in connected:
            if node == node2:
                continue
            if not wcg.IsNode(node2):
                wcg.AddNode(node2)
            if node2 in sponsored_bills:
                num_bills2 = sponsored_bills[node2]
            else:
                bills2 = snap.TIntV()
                snap.GetNodesAtHop(g, node2, 1, bills2, False)
                num_bills2 = len(bills2)
                sponsored_bills[node2] = num_bills2
            common_bills = len(
                get_cosponsorship(node_info[node]['info']['id'],
                                  node_info[node2]['info']['id'], to_bills))
            edge_weights[(node, node2)] = common_bills / len(
                to_bills[node_info[node]['info']['id']])
            edge_weights[(node2, node)] = common_bills / len(
                to_bills[node_info[node2]['info']['id']])
            wcg.AddEdge(node, node2)
    snap.SaveEdgeList(wcg,
                      'govtrack_data/wcg_%s_%s.graph' % (chamber, session))
    np.save('govtrack_data/wcg_edge_weights_%s_%s.npy' % (chamber, session),
            edge_weights)
    np.save('govtrack_data/wcg_sponsored_bills_%s_%s.npy' % (chamber, session),
            sponsored_bills)
    print("Completed weighted cosponsorship graph!")
Пример #10
0
def CN_similarity(graph, id1, id2, directed=True):
    """Computes CN similarity between nodes id1 and id2 in graph"""
    neighbors1 = snap.TIntV()
    snap.GetNodesAtHop(graph, id1, 1, neighbors1, directed)
    neighbors2 = snap.TIntV()
    snap.GetNodesAtHop(graph, id2, 1, neighbors2, directed)

    Inter = snap.TIntV()
    neighbors1.Intrs(neighbors2, Inter)
    x = Inter.Len()
    return x
Пример #11
0
def temporal_neighbors(graphs, nodeID, directed=True):
    """Return a snap.TIntV() set of nodes that have been at least once a neighbor of node nodeID in graphs"""
    t_neighbors = snap.TIntV()
    for g in graphs:
        neighbors = snap.TIntV()
        snap.GetNodesAtHop(g, nodeID, 1, neighbors, directed)
        for n in neighbors:
            if n not in t_neighbors:
                t_neighbors.Add(n)
        # t_neighbors.Union(neighbors)
    return t_neighbors
def common_neighbors_2(G, n1, n2, directed=False):
    deleted = False
    if G.IsEdge(n1, n2):
        G.DelEdge(n1, n2)
        deleted = True
    n1_neighbors = snap.TIntV()
    snap.GetNodesAtHop(G, n1, 1, n1_neighbors, directed)
    n2_neighbors = snap.TIntV()
    snap.GetNodesAtHop(G, n2, 2, n2_neighbors, directed)
    common_neighbors = set(n1_neighbors) & set(n2_neighbors)
    if deleted: G.AddEdge(n1, n2)
    return len(common_neighbors)
def sim_rank_wrapper(G, n1, n2, gamma, directed=False):
    if n1 == n2: return 1
    constant = gamma / preferential_attachment(G, n1, n2)
    n1_neighbors = snap.TIntV()
    snap.GetNodesAtHop(G, n1, 1, n1_neighbors, directed)
    n2_neighbors = snap.TIntV()
    snap.GetNodesAtHop(G, n2, 1, n2_neighbors, directed)
    result = 0
    for a in n1_neighbors:
        for b in n2_neighbors:
            result += sim_rank_wrapper(G, a, b, gamma)
    return result * constant
Пример #14
0
def InitState(sw, taskindex, msglist):

    # the original node is on input
    node = None  # TODO (smacke): ^^^ ???
    sw.cum_timer.cum_start("disk")
    for item in msglist:
        msg = sw.GetMsg(item)
        d = msg["body"]
    sw.cum_timer.cum_stop("disk")

    ns = d["s"]
    nrange = d[
        "r"]  # we don't use sw.GetRange() since this could be truncated I guess
    node = d.get("source",
                 -1)  # either this task has the source, or it doesn't

    ds = {}
    ds["first"] = ns
    ds["range"] = nrange
    ds["count"] = 0  # no. of visited nodes, since Visited is bitset-like vector
    ds["dist"] = 0
    ds["source"] = node

    seg_bits = int(sw.GetVar('seg_bits'))

    Visited = Snap.TIntV(nrange)  # This also stores distances
    Snap.ZeroVec(Visited)
    if node >= 0:
        # set start node to 1, reset to 0 at the end
        # TODO (smacke): the reason we're doing this is because
        # the visited vec stores distances, but a "0" means not
        # visited yet. It would be better to make -1 mean not
        # visited yet so that we don't have this weird edge case

        Visited.GetVal(Snap.trailing(node - ns, seg_bits)).Val = 1
        ds["count"] = 1

    ds["visit"] = Visited

    tsize = sw.GetRange()
    taskNumber = TaskId(
        node, tsize)  # TODO (smacke): these really need to be named better

    # send the message
    if node >= 0:
        sw.log.debug('[%s] sending source node %d' % (sw.GetName(), node))
        Vec1 = Snap.TIntV()
        Vec1.Add(Snap.trailing(node, seg_bits))
        Vec1.Add(0)  # this is the distance from source node to source node
        sw.Send(taskNumber, Vec1, swsnap=True)  # send to GetNbrCpp64

    return ds
Пример #15
0
def ranking(graph,
            alpha=0.6,
            primary=degreeDifference,
            secondary=randomValue,
            edgeAttrs=None):
    """
    Implements the node ranking algorithm described by Guo, Yang, and Zhou

    Args:
        graph (snap.TNGraph): a directed graph to rank
        alpha (float): the relative size of the leader partition
        primary ((nodeID, graph, edgeAttrs) -> int): sorting key for primary sorting of the nodes
        secondary ((nodes, graph, edgeAttrs) -> nodes): sorting key for secondary sorting of nodes
        edgeAttrs (dict): edge attributes for use with sorting key functions
    Returns:
        A list of node IDs ordered in descending order by ranking
    """

    # Group the nodes by degree difference (d_in - d_out)
    nodeOrdering = sorted([node.GetId() for node in graph.Nodes()],
                          key=lambda nodeID:
                          (primary(nodeID, graph, edgeAttrs),
                           secondary(nodeID, graph, edgeAttrs)),
                          reverse=True)

    # Split the nodes in leaders and followers
    splitIndex = int(alpha * graph.GetNodes())
    leaders = nodeOrdering[:splitIndex]
    followers = nodeOrdering[splitIndex:]

    # Recursive base case check: if either leaders or followers is empty
    # then further recursing won't change the ordering, so just return the
    # current ordering
    if len(leaders) == 0 or len(followers) == 0:
        return leaders + followers

    # Create the subgraphs
    leaderNIdVector = snap.TIntV()
    for node in leaders:
        leaderNIdVector.Add(node)
    leaderGraph = snap.GetSubGraph(graph, leaderNIdVector)

    followerNIdVector = snap.TIntV()
    for node in followers:
        followerNIdVector.Add(node)
    followerGraph = snap.GetSubGraph(graph, followerNIdVector)

    # Recurse on the leaders and followers
    return ranking(leaderGraph, alpha,
                   primary, secondary, edgeAttrs) + ranking(
                       followerGraph, alpha, primary, secondary, edgeAttrs)
    def getOverlapSet(self, nodeDegH):
        nodeIdV = snap.TIntV()
        dnodeIdV = snap.TIntV()
        nodeDegH.GetKeyV(nodeIdV)
        randNodeIdV=snap.TIntV()
        # random choice
        if self.overlap_choice == 1:
            randNodeIdV=np.random.choice(nodeIdV, self.overlap_size, replace=False)
        # random choice over the higher degree nodes
        elif self.overlap_choice == 2:
            nodeDegH_walker = nodeDegH.BegI()
            counter=0
            while not nodeDegH_walker.IsEnd():
                if (counter>self.overlap_size*2):
                    break;
                dnodeIdV.Add(nodeDegH_walker.GetKey())
                nodeDegH_walker.Next()
            randNodeIdV = np.random.choice(dnodeIdV, self.overlap_size, replace=False)
        # bsf tree nodes from the highest degree node
        elif self.overlap_choice == 3:
            startNodeId=nodeDegH.BegI().GetKey()
            BfsTree = snap.GetBfsTree(self.G, startNodeId, True, False)
            for EI in BfsTree.Edges():
                sourceNodeId=EI.GetSrcNId()
                if(sourceNodeId not in randNodeIdV):
                    randNodeIdV.Add(sourceNodeId)
                destNodeId = EI.GetDstNId()
                if (destNodeId not in randNodeIdV):
                    randNodeIdV.Add(destNodeId)
                if(randNodeIdV.Len()==self.overlap_size):
                    break;
        # bsf tree nodes from random node
        elif self.overlap_choice == 4:
            startNodeId = np.random.choice(nodeIdV, 1, replace=False)
            #print startNodeId
            BfsTree = snap.GetBfsTree(self.G, startNodeId[0], True, False)
            for EI in BfsTree.Edges():
                sourceNodeId = EI.GetSrcNId()
                #print sourceNodeId
                if (sourceNodeId not in randNodeIdV):
                    randNodeIdV.Add(sourceNodeId)
                destNodeId = EI.GetDstNId()
                #print destNodeId
                if (destNodeId not in randNodeIdV):
                    randNodeIdV.Add(destNodeId)
                if (randNodeIdV.Len() == self.overlap_size):
                    break;



        return randNodeIdV
Пример #17
0
def getFeat():
    #G1, id2, synset2, _,_,_ = generate_word_graph(True, False, False)

    #G2, id2, synset2, _,_,_ = generate_word_graph(False, True, False)
    #G3, id2, synset2, _,_,_ = generate_word_graph(False, False, True)
    G4, id2, synset2, _, _, _ = generate_meaning_graph(True, False, False)
    #G5, id2, synset2, _,_,_ = generate_meaning_graph(False, True, False)
    #G6, id2, synset2, _,_,_ = generate_meaning_graph(False, False, True)
    #G7, id2, synset2, _,_,_ = generate_meaning_graph(True, False, True)

    degVec = {}
    egoNet = {}
    egoNetDeg = {}
    G = G4
    it = 0
    for node in G.Nodes():
        it += 1
        deg = node.GetDeg()
        nodeName = node.GetId()
        degVec[nodeName] = deg
        egoNet[nodeName] = 0
        egoNetDeg[nodeName] = 0
        commNeib = 0
        NodeVec = snap.TIntV()
        nodeList = snap.GetNodesAtHop(G, nodeName, 1, NodeVec, False)
        for node2 in NodeVec:
            nodeIt = G.GetNI(node2)
            dest = nodeIt.GetDeg()
            egoNetDeg[nodeName] += dest
            NodeVec2 = snap.TIntV()
            nodeList = snap.GetNodesAtHop(G, node2, 1, NodeVec2, False)
            for el in NodeVec2:
                if el in NodeVec:
                    commNeib += 1
        egoNet[nodeName] = deg + commNeib / 2
        egoNetDeg[nodeName] = egoNetDeg[nodeName] - commNeib - deg
    print egoNet[numImp], egoNetDeg[numImp], degVec[numImp]

    featDictOne = {}
    for el in G.Nodes():
        numId = el.GetId()
        featVec = [degVec[numId], egoNet[numId], egoNetDeg[numId]]
        featDictOne[numId] = featVec
    for i in range(0, 2):
        featDict = {}
        for el in G.Nodes():
            numId = el.GetId()
            featVec = recFeat(featDictOne, G, numId)
            featDict[numId] = featVec
        featDictOne = featDict
    getSim(featDictOne)
Пример #18
0
    def GetMaxKDegree(self, k):
        self.seedNodes.clear()
        resultInDegree = snap.TIntV()
        resultOutDegree = snap.TIntV()
        snap.GetDegSeqV(self.graph, resultInDegree, resultOutDegree)
        count = len(resultOutDegree)
        listDegree = []
        nodesId = []
        for i in range(count):
            listDegree.append(resultOutDegree[i])
            nodesId.append(i)

        # random.Random().shuffle(listDegree)
        return self.GetMaxK(listDegree, nodesId, k)
Пример #19
0
def clustering_coffecient(G):
    cluster_dict = []
    nodes = []
    # All nodes are stored
    for s in G.Nodes():
        nodes.append(s.GetId())
    k = len(nodes)

    if (k > 2):
        for s in nodes:
            neighbors_node = []
            neighbors_mutual = []
            # node ids of all the nodes that are at distance Hop from node StartNId
            NodeVec = snap.TIntV()
            snap.GetNodesAtHop(G, s, 1, NodeVec, False)
            # nodes are stored for comparing
            for neighbor in NodeVec:
                neighbors_node.append(neighbor)

            for neighbor_2 in NodeVec:
                Second_NodeVec = snap.TIntV()
                snap.GetNodesAtHop(G, neighbor_2, 1, Second_NodeVec, False)
                # Finding triangles
                for second_neighbor in Second_NodeVec:
                    if second_neighbor in neighbors_node:
                        neighbors_mutual.append(second_neighbor)

            neighbors_mutual = list(neighbors_mutual)

            clust_node = 0

            if len(neighbors_mutual):
                # Clustering Coefficient for each node
                clust_node = (float(len(neighbors_mutual))) / (
                    (float(len(NodeVec)) * (float(len(NodeVec)) - 1)))

            cluster_dict.append(clust_node)
    else:
        pass

    # Average Clustering Coefficient for all nodes in a graph
    Average_Value = 0
    l = len(cluster_dict)
    if (l is not 0):
        for val in cluster_dict:
            Average_Value = Average_Value + val

        return Average_Value / l
    else:
        return 0
Пример #20
0
 def getCurrentandRestNet(self, n_list, g):
     lis_g = []
     NIdV1 = snap.TIntV()
     for i in g.Nodes():
         lis_g += [i.GetId()]
     lis_rest = [i for i in lis_g if i not in n_list]
     for i in lis_rest:
         NIdV1.Add(i)
     SubG1 = snap.GetSubGraph(g, NIdV1)
     NIdV2 = snap.TIntV()
     for i in n_list:
         # print i
         NIdV2.Add(i)
     SubG2 = snap.GetSubGraph(g, NIdV2)
     return SubG1, SubG2
def jaccard_2(G, n1, n2, directed=False):
    deleted = False
    if G.IsEdge(n1, n2):
        G.DelEdge(n1, n2)
        deleted = True
    n1_neighbors = snap.TIntV()
    snap.GetNodesAtHop(G, n1, 1, n1_neighbors, directed)
    n2_neighbors = snap.TIntV()
    snap.GetNodesAtHop(G, n2, 2, n2_neighbors, directed)
    total_neighbors = set(n1_neighbors) | set(n2_neighbors)
    common_neighbors = set(n1_neighbors) & set(n2_neighbors)
    if len(total_neighbors) == 0: result = 0.0
    else: result = float(len(common_neighbors)) / float(len(total_neighbors))
    if deleted: G.AddEdge(n1, n2)
    return result
Пример #22
0
def GetNbr(sw):
    """
    provide graph neighbors
    """

    taskname = sw.GetName()

    msglist = sw.GetMsgList()
    sw.flog.write("msglist " + str(msglist) + "\n")
    sw.flog.flush()

    AdjLists = LoadState()

    if AdjLists:
        # state is available, process requests for neighbors
        for item in msglist:
            name = sw.GetMsgName(item)

            # read the input nodes
            FIn = Snap.TFIn(Snap.TStr(name))
            msg = Snap.TIntV(FIn)

            GetNeighbors(sw, AdjLists, msg)
        return

    # state not found, initialize it with neighbors
    Edges = Snap.TIntV()

    for item in msglist:
        name = sw.GetMsgName(item)

        FIn = Snap.TFIn(Snap.TStr(name))
        Vec = Snap.TIntV(FIn)

        Edges.AddV(Vec)

    # first iteration: input are edges, save the state
    AdjLists = GetEdges(Edges)
    sw.flog.write("state " + str(AdjLists.Len()) + "\n")
    sw.flog.flush()

    SaveState(AdjLists)

    dmsgout = {}
    dmsgout["src"] = sw.GetName()
    dmsgout["cmd"] = "targets"
    dmsgout["body"] = {}
    sw.Send(0, dmsgout, "2")
Пример #23
0
def GetNbr(sw):
    """
    provide graph neighbors
    """

    # taskname = sw.GetName()

    msglist = sw.GetMsgList()
    sw.log.debug("msglist %s" % msglist)

    with perf.Timer(sw.log, "LoadState-GetNbrCpp"):
        AdjLists = LoadState(sw)

    if AdjLists:
        # state is available, process requests for neighbors
        for item in msglist:
            name = sw.GetMsgName(item)

            # read the input nodes
            FIn = Snap.TFIn(Snap.TStr(name))
            msg = Snap.TIntV(FIn)

            GetNeighbors(sw, AdjLists, msg)
        return

    # state not found, initialize it with neighbors
    Edges = Snap.TIntV()

    for item in msglist:
        name = sw.GetMsgName(item)

        FIn = Snap.TFIn(Snap.TStr(name))
        Vec = Snap.TIntV(FIn)

        Edges.AddV(Vec)

    # first iteration: input are edges, save the state
    AdjLists = GetEdges(sw, Edges)
    sw.log.debug("state: %d" % AdjLists.Len())

    with perf.Timer(sw.log, "SaveState-GetNbrCpp"):
        SaveState(sw, AdjLists)

    dmsgout = {}
    dmsgout["src"] = sw.GetName()
    dmsgout["cmd"] = "targets"
    dmsgout["body"] = {}
    sw.Send(0, dmsgout, "2")
Пример #24
0
def pageRank_components(g):
    print 'executing pagerank components ---- getting components for page rank'
    Components = snap.TCnComV()
    snap.GetWccs(g, Components)
    f = open('component_pr.txt', 'w')
    cgraphs = []
    for com in Components:
        v = snap.TIntV()
        for ni in com:
            v.Add(ni)
        cgraphs.append(snap.GetSubGraph_PNGraph(g, v))

    print 'components retrived for pagerank'
    f.write('Total components:' + str(len(cgraphs)) + '\n')
    for graph in cgraphs:
        if graph.GetNodes() == 2:
            continue
        sprank = snap.TIntFltH()
        snap.GetPageRank_PNGraph(graph, sprank)
        sprank.SortByDat(False)
        f.write(
            str(graph.GetNodes()) + ' ' + str(sprank[sprank.BegI().GetKey()]) +
            '\n')
    f.close()
    print 'finished writing pagerank components values'
Пример #25
0
def get_deg_data(G):
    result_degree = snap.TIntV()
    snap.GetDegSeqV(G, result_degree)
    deg_data = []
    for i in range(result_degree.Len()):
        deg_data.append(result_degree[i])
    return deg_data
Пример #26
0
def calcClusteringCoefficientSingleNode(Node, Graph):
    """
    :param - Node: node from snap.PUNGraph object. Graph.Nodes() will give an
                   iterable of nodes in a graph
    :param - Graph: snap.PUNGraph object representing an undirected graph

    return type: float
    returns: local clustering coeffient of Node
    """
    ############################################################################
    # TODO: Your code here!
    # C = 0.0
    ki = Node.GetDeg()
    if ki < 2:
        return 0
    else:
        Nbrs = snap.TIntV()
        common_neighbours = 0
        for Id in Node.GetOutEdges():
            common_neighbour = snap.GetCmnNbrs(Graph, Node.GetId(), Id, Nbrs)
            common_neighbours += common_neighbour
        # ei is number of edges between neighbours,
        # divide by 2, because each pair of connected neighbours are counted twice
        ei = common_neighbours / 2
        # c = 2ei/ki(ki-1)
        C = 2 * ei / (ki * (ki - 1))
    ############################################################################
    return C
Пример #27
0
def gen_G(D, Pi_minus, Pi_exo, V_exo, theta2, N):
    """
    Returns pairwise-stable network on N nodes. 
    
    D, Pi_minus, Pi_exo = outputs of gen_D().
    V_exo = 'exogenous' part of joint surplus (output of gen_V_exo).
    theta2 = transitivity parameter (theta[2]).
    """
    G = snap.GenRndGnm(snap.PUNGraph, N, 0)  # initialize empty graph
    Components = snap.TCnComV()
    snap.GetWccs(D, Components)  # collects components of D
    NIdV = snap.TIntV()  # initialize vector
    for C in Components:
        if C.Len() > 1:
            NIdV.Clr()
            for i in C:
                NIdV.Add(i)
            tempnet = gen_G_subgraph(NIdV, D, Pi_minus, Pi_exo, V_exo, theta2)
            for edge in tempnet.Edges():
                G.AddEdge(edge.GetSrcNId(), edge.GetDstNId())

    # add robust links
    for edge in Pi_exo.Edges():
        G.AddEdge(edge.GetSrcNId(), edge.GetDstNId())

    return G
Пример #28
0
def clustering_coef(G, mode_for_end_nodes='put_zeros'):
    '''
    Calculate vector of clustering coefficient for each node 
    G is an undirected Graph 
    mode_for_end_nodes = 'nan' - end/disconnected nodes will be ignored i.e. non included in output list
    mode_for_end_nodes = 'put_zero' - assign zero for end/disconnected
    
    list_clusterning_coefs_allnodes = clustering_coef(UGraph)
    https://www.kaggle.com/alexandervc/hw1-ac/notebook
    '''
    list_clusterning_coefs_allnodes = []
    for n in G.Nodes():
        NodeVec = snap.TIntV()
        snap.GetNodesAtHop(G, n.GetId(), 1, NodeVec,
                           False)  # Get neigbours of current node
        current_degree = len(NodeVec)  # same as n.GetDeg()
        if current_degree <= 1:  # skip disconnected&end nodes - impossible to calculate for them - getting division by zero
            if mode_for_end_nodes == 'nan':
                continue
            else:
                list_clusterning_coefs_allnodes.append(0)
                continue
        count_edges_between_neigbours = 0
        for neigbor1 in NodeVec:
            for neigbor2 in NodeVec:
                if neigbor1 >= neigbor2:
                    continue
            if G.IsEdge(neigbor1, neigbor2):
                count_edges_between_neigbours += 1
        clustering_coef_current_node = 2 * count_edges_between_neigbours / (
            current_degree * (current_degree - 1))
        list_clusterning_coefs_allnodes.append(clustering_coef_current_node)
    return list_clusterning_coefs_allnodes
Пример #29
0
def basicFeature(G):
    V = []
    cnt = 0
    x1 = 0
    x2 = 0
    x3 = 0
    for NI in G.Nodes():
        ## Get egonet
        NIdV = snap.TIntV()
        NIdV.Add(NI.GetId())
        for Id in NI.GetOutEdges():
            NIdV.Add(Id)
        results = snap.GetEdgesInOut(G, NIdV)
        V.append([NI.GetId(), NI.GetOutDeg(), results[0], results[1]])
        cnt = cnt + 1
        x1 = x1 + NI.GetOutDeg()
        x2 = x2 + results[0]
        x3 = x3 + results[1]

    Id9 = 9999  ##hard code the biggest possible node value for candidates to avoid collision
    x1 = x1 * 1.0 / cnt
    x2 = x2 * 1.0 / cnt
    x3 = x3 * 1.0 / cnt

    a = np.sqrt(x1 * x1 + x2 * x2 + x3 * x3)
    res = []
    scores = []

    for i in V:
        [Id, y1, y2, y3] = i
        if Id != Id9:
            b = np.sqrt(y1 * y1 + y2 * y2 + y3 * y3)
            dem = x1 * y1 + x2 * y2 + x3 * y3
            if (b == 0 or a == 0 or dem == 0):
                sim = 0
            else:
                sim = dem * 1.0 / a / b
            res.append([Id, sim])
            scores.append(sim)

    arr = np.array(res)
    r = arr[arr[:, 1].argsort()]
    l = len(res)
    print "Top 5 similar nodes based on cosine similarity, for basic features like HW2 Q1, when compared to mean node"
    print(r[l - 1], r[l - 2], r[l - 3], r[l - 4], r[l - 5])

    ###
    ## print roles
    plt.figure()
    plt.hist(scores, bins=20)
    plt.title(
        'Distribution of cosine similarity between mean node and any node in the graph'
    )
    plt.xlabel('cosine similarity')
    plt.ylabel('count')
    plt.show()

    ###

    return
Пример #30
0
def shrinkGraph(graph, maxDist):
    nameToNId = {}
    for n in graph.Nodes():
        id = n.GetId()
        nameToNId[graph.GetStrAttrDatN(id, 'name').decode('utf-8')] = id
    infile = codecs.open('csv/dblpusersaff.csv', 'r', 'utf-8')
    lines = infile.read().splitlines()
    validNodes = set()
    for line in lines:
        tokens = line.split('||')
        if tokens[2] != '':
            id = nameToNId[tokens[1]]
            closeNeighborsSet = set()
            pq = deque()
            pq.append((id, 0))
            closeNeighborsSet.add(id)
            while len(pq) > 0:
                (id, dist) = pq.popleft()
                if dist == maxDist:
                    break
                node = graph.GetNI(id)
                for i in range(node.GetOutDeg()):
                    nbrId = node.GetOutNId(i)
                    if nbrId not in closeNeighborsSet:
                        closeNeighborsSet.add(nbrId)
                        pq.append((nbrId, dist + 1))
            validNodes.update(closeNeighborsSet)
            print tokens[0], len(closeNeighborsSet), len(validNodes)
    unused = snap.TIntV()
    for n in graph.Nodes():
        if n.GetId() not in validNodes:
            unused.Add(n.GetId())
    graph.DelNodes(unused)