def __init__(self, filename, mVals, pVal, tau): self.fileName = fileName self.mVals = mVals self.pVal = pVal self.pVals = [] self.pVals.append(pVal) self.pVals.append(1 - pVal) self.tau = tau self.nLH = snap.TIntStrH() self.lblNH = snap.TStrIntH() # Node count with attached label self.lblEH = snap.TIntIntH() # Edge count with attached src dst labels self.RH = snap.TIntFltPrH() self.BH = snap.TIntFltPrH() self.cRV = snap.TIntV() self.cBV = snap.TIntV() self.G = self.getGraph(snap.PUNGraph) self.NG = snap.TNEANet() self.graphName = self.getGraphName() self.rootDir = self.getParentDir(self.fileName) self.absrootDir = os.path.abspath(self.rootDir) self.cR_count = 0 self.cB_count = 0 self.RH_count = 0 self.BH_count = 0
def generate_scores(self): scores = {} common_neighbor_scores = {} for e in self.g.Edges(): # common_neighbor_scores[(e.GetSrcNId(), e.GetDstNId())] = snap.GetCmnNbrs(self.g, e.GetSrcNId(), e.GetDstNId()) n1 = snap.TIntV() n2 = snap.TIntV() snap.GetNodesAtHop(self.g, e.GetSrcNId(), 1, n1, True) snap.GetNodesAtHop(self.g, e.GetDstNId(), 1, n2, True) common_neighbor_scores[(e.GetSrcNId(), e.GetDstNId())] = len(set(n1) & set(n2)) Nodes = snap.TIntFltH() Edges = snap.TIntPrFltH() snap.GetBetweennessCentr(self.g, Nodes, Edges, self.node_frac, True) edge_betweenness_scores = {} for e in Edges: edge_betweenness_scores[(e.GetVal1(), e.GetVal2())] = Edges[e] max_cn = max(common_neighbor_scores.values()) max_eb = max(edge_betweenness_scores.values()) print(common_neighbor_scores) print(edge_betweenness_scores) for e in self.g.Edges(): src = e.GetSrcNId() dst = e.GetDstNId() scores[(src, dst)] = self.l * common_neighbor_scores[(src,dst)] / max_cn + (1-self.l) * edge_betweenness_scores[(src,dst)] / max_eb return scores
def CN_similarity_max(graph, player, dic_path, n=5, directed=True): """Find the n most similar players to player by CN metrics""" with open(dic_path, 'rb') as dic_id: mydict = pickle.load(dic_id) player_id = mydict[player] player_neighbor = snap.TIntV() snap.GetNodesAtHop(graph, player_id, 1, player_neighbor, directed) player_cn = {} for node in graph.Nodes(): nodeId = node.GetId() if nodeId != player_id: node_neighbor = snap.TIntV() snap.GetNodesAtHop(graph, nodeId, 1, node_neighbor, directed) Inter = snap.TIntV() player_neighbor.Intrs(node_neighbor, Inter) x = Inter.Len() player_cn[nodeId] = x player_cn = Counter(player_cn) print("5 most similar players to {} by CN metrics:".format(player)) for k, v in player_cn.most_common(n): print('{}: {}'.format( list(mydict.keys())[list(mydict.values()).index(k)], v))
def InitState(taskindex, msglist): # the original node is on input node = None for item in msglist: msg = sw.GetMsg(item) node = msg["body"] ds = {} ds["start"] = node ds["dist"] = 0 #ds["count"] = 1 nnodes = int(sw.GetVar("nodes")) Visited = Snap.TIntV(nnodes) Snap.ZeroVec(Visited) Visited.GetVal(node).Val = 1 # set start node to 1, reset to 0 at the end ds["visit"] = Visited tsize = sw.GetRange() tn = TaskId(node, tsize) # send the message Vec1 = Snap.TIntV() Vec1.Add(node) Vec1.Add(taskindex) sw.Send(tn, Vec1, swsnap=True) return ds
def extend_subgraph(G, k, sg, v_ext, node_id, motifs, verbose=False): """Recursive function in the ESU algorithm""" if len(sg) is k: count_iso(G, sg, motifs, verbose) return while len(v_ext) != 0: w = v_ext[0] del v_ext[0] v_ext_bis = copy.deepcopy(v_ext) neighbors = snap.TIntV() snap.GetNodesAtHop(G, w, 1, neighbors, False) for node in neighbors: if not (node in sg): if node > node_id: bool = False nb = snap.TIntV() snap.GetNodesAtHop(G, node, 1, nb, False) for n in sg: b = (n in nb) bool = bool | b if not bool: v_ext_bis.append(node) sg_bis = copy.deepcopy(sg) sg_bis.append(w) extend_subgraph(G, k, sg_bis, v_ext_bis, node_id, motifs, verbose) return
def runRecursive(G, testNodes, k=2): dicts = [{} for i in range(k)] # Subset #print testNodes # Now for each iteration, we keep going for i in range(k): # Handle base case separately print "Feature level", i counter = 1 if i == 0: for node in testNodes: print 'Node', counter counter += 1 dicts[0][node] = getFeatureVecBasic(G, node) # Also need to get all of the neighbor feature vecs Nbrs = snap.TIntV() snap.GetNodesAtHop(G, node, 1, Nbrs, True) for nbr in Nbrs: dicts[0][nbr] = getFeatureVecBasic(G, nbr) # And for neighbors' neighbors NbrsNbrs = snap.TIntV() snap.GetNodesAtHop(G, nbr, 1, NbrsNbrs, True) for nbrnbr in NbrsNbrs: dicts[0][nbrnbr] = getFeatureVecBasic(G, nbrnbr) # Otherwise, we do it the normal way else: for node in testNodes: print 'Node', counter counter += 1 # Get all of the neighbors of our node Nbrs = snap.TIntV() snap.GetNodesAtHop(G, node, 1, Nbrs, True) sumVec = np.zeros(3**(i)) # For each neighbor, get mean and sum for nbr in Nbrs: sumVec += dicts[i - 1][nbr] # Handle edge case meanVec = None if len(Nbrs) == 0: meanVec = np.zeros(3**(i)) else: meanVec = sumVec / float(len(Nbrs)) # Now concatenate dicts[i][node] = np.concatenate( (dicts[i - 1][node], meanVec, sumVec)) # Pickle to save with open('feats_' + str(i) + '_v1.pkl', 'wb') as f: pickle.dump(dicts[i], f) return dicts[-1]
def GenGraph(sw): """ generate the graph edges """ # extract the stubs from the args # iterate through the input queue and add new items to the stub list # taskname = sw.GetName() msglist = sw.GetMsgList() sw.log.debug("msglist: %s" % msglist) Stubs = Snap.TIntV() # Stubs is an empty vector for item in msglist: # 1) Get item in msglist # 2) Get name of item name = sw.GetMsgName(item) # 3) Get vector associated with name FIn = Snap.TFIn(Snap.TStr(name)) Vec = Snap.TIntV(FIn) # 4) Add vector to Stubs Stubs.AddV(Vec) # 5) Got all stubs, which is of length msglist # # Randomize the items (aka shuffle) # Snap.Randomize(Stubs) # # # nodes in each task and the number of tasks # tsize = sw.GetRange() # ntasks = int(sw.GetVar("gen_tasks")) # # # get edges for a specific task # Tasks = Snap.TIntIntVV(ntasks) # vector of length ntasks containing vectors # Snap.AssignEdges(Stubs, Tasks, tsize) ntasks = int(sw.GetVar("gen_tasks")) seg_bits = int(sw.GetVar('seg_bits')) tsize = sw.GetRange() Tasks = Snap.TIntVVV(ntasks) Stubs = Snap.segment(Stubs, seg_bits) # segmentize stubs # do segmented random edge assignment Snap.AssignRandomEdges64(Stubs, Tasks, tsize, seg_bits) # desegment results Tasks = Snap.desegmentRandomizedEdges(Tasks, seg_bits, tsize) # send messages for i in xrange(0, Tasks.Len()): sw.log.debug("sending task: %d, len: %d" % (i, Tasks.GetVal(i).Len())) sw.Send(i, Tasks.GetVal(i), swsnap=True)
def snowball_sample(G, num_waves, seeds): """ Parameters: G - SNAP graph or network to sample frpm num_waves - number of snowball waves seeds - SNAP vector (TIntV) of seeds (node ids) to start snowball sample from Return value: SNAP network (TNEANet) snowball sampled from G with each node having an integer "zone" attribute for snowball sampling zone (0=seed, 1=first wave, etc.) [TNEANet needed to allow zone attribute, not actually using multigraph capability]. Note directions on directed graph are ignored - can sample in undirected or directed graph. """ assert (len(seeds) == len(set(seeds))) # no duplicate node ids # It seems like GetSubGraph does not preserve node attributse # so instead of adding attributes ot nodes on N, make a Python # dictionary mapping node ids to zone and then add them back # ass attributes on the subgraph (node ids are preserved so we # can do this) zonedict = dict() # map nodeid : zone N = snap.ConvertGraph(snap.PNEANet, G) # copy graph/network G to network N nodes = set(seeds) # will accumulate all nodes (including seeds) here for seed in seeds: zonedict[seed] = 0 # seed nodes are zone 0 newNodes = set(nodes) for i in range(num_waves): wave = i + 1 #print 'wave',wave for node in set(newNodes): neighbours = snap.TIntV() snap.GetNodesAtHop(G, node, 1, neighbours, False) # neighbours of node newNeighbours = set( neighbours) - nodes # neighbours that are not already in nodes for node in newNeighbours: if not zonedict.has_key(node): zonedict[node] = wave newNodes.update( newNeighbours ) # newNodes gets set union of itslf and newNeighbours nodes.update(newNodes) # have to convert nodes set into TIntV for use in SNAP NodeVec = snap.TIntV() for node in nodes: NodeVec.Add(node) sampleN = snap.GetSubGraph(N, NodeVec) # now put the zones as attributes on the subgraph nodes (which depends # on nodeids being preserved in the subgraph) sampleN.AddIntAttrN("zone", -1) # add zone attribute init to -1 for (nodeid, zone) in zonedict.iteritems(): sampleN.AddIntAttrDatN(nodeid, zone, "zone") return sampleN
def create_weighted_cosponsorship_graph(chamber, session): print("Creating weighted cosponsorship graph (wcg)...") m = np.load('raw_data/govtrack_cosponsor_temp/m_%s_%s.npy' % (chamber, session)) b = np.load('raw_data/govtrack_cosponsor_temp/b_%s_%s.npy' % (chamber, session)).item() to_bills = np.load('raw_data/govtrack_cosponsor_temp/to_bills_%s_%s.npy' % (chamber, session)).item() g, node_info, id_to_nid = read_bcg(chamber, session) edge_weights = {} sponsored_bills = {} wcg = snap.TUNGraph.New() for node in tqdm(node_info, total=len(node_info), position=0): if node_info[node]['type'] == 'bill': continue if not wcg.IsNode(node): wcg.AddNode(node) connected = snap.TIntV() if not g.IsNode(node): print("F**K WHY IS %s NOT A NODE" % (node, )) continue snap.GetNodesAtHop(g, node, 2, connected, False) if node in sponsored_bills: num_bills = sponsored_bills[node] else: bills = snap.TIntV() snap.GetNodesAtHop(g, node, 1, bills, False) num_bills = len(bills) sponsored_bills[node] = num_bills for node2 in connected: if node == node2: continue if not wcg.IsNode(node2): wcg.AddNode(node2) if node2 in sponsored_bills: num_bills2 = sponsored_bills[node2] else: bills2 = snap.TIntV() snap.GetNodesAtHop(g, node2, 1, bills2, False) num_bills2 = len(bills2) sponsored_bills[node2] = num_bills2 common_bills = len( get_cosponsorship(node_info[node]['info']['id'], node_info[node2]['info']['id'], to_bills)) edge_weights[(node, node2)] = common_bills / len( to_bills[node_info[node]['info']['id']]) edge_weights[(node2, node)] = common_bills / len( to_bills[node_info[node2]['info']['id']]) wcg.AddEdge(node, node2) snap.SaveEdgeList(wcg, 'govtrack_data/wcg_%s_%s.graph' % (chamber, session)) np.save('govtrack_data/wcg_edge_weights_%s_%s.npy' % (chamber, session), edge_weights) np.save('govtrack_data/wcg_sponsored_bills_%s_%s.npy' % (chamber, session), sponsored_bills) print("Completed weighted cosponsorship graph!")
def CN_similarity(graph, id1, id2, directed=True): """Computes CN similarity between nodes id1 and id2 in graph""" neighbors1 = snap.TIntV() snap.GetNodesAtHop(graph, id1, 1, neighbors1, directed) neighbors2 = snap.TIntV() snap.GetNodesAtHop(graph, id2, 1, neighbors2, directed) Inter = snap.TIntV() neighbors1.Intrs(neighbors2, Inter) x = Inter.Len() return x
def temporal_neighbors(graphs, nodeID, directed=True): """Return a snap.TIntV() set of nodes that have been at least once a neighbor of node nodeID in graphs""" t_neighbors = snap.TIntV() for g in graphs: neighbors = snap.TIntV() snap.GetNodesAtHop(g, nodeID, 1, neighbors, directed) for n in neighbors: if n not in t_neighbors: t_neighbors.Add(n) # t_neighbors.Union(neighbors) return t_neighbors
def common_neighbors_2(G, n1, n2, directed=False): deleted = False if G.IsEdge(n1, n2): G.DelEdge(n1, n2) deleted = True n1_neighbors = snap.TIntV() snap.GetNodesAtHop(G, n1, 1, n1_neighbors, directed) n2_neighbors = snap.TIntV() snap.GetNodesAtHop(G, n2, 2, n2_neighbors, directed) common_neighbors = set(n1_neighbors) & set(n2_neighbors) if deleted: G.AddEdge(n1, n2) return len(common_neighbors)
def sim_rank_wrapper(G, n1, n2, gamma, directed=False): if n1 == n2: return 1 constant = gamma / preferential_attachment(G, n1, n2) n1_neighbors = snap.TIntV() snap.GetNodesAtHop(G, n1, 1, n1_neighbors, directed) n2_neighbors = snap.TIntV() snap.GetNodesAtHop(G, n2, 1, n2_neighbors, directed) result = 0 for a in n1_neighbors: for b in n2_neighbors: result += sim_rank_wrapper(G, a, b, gamma) return result * constant
def InitState(sw, taskindex, msglist): # the original node is on input node = None # TODO (smacke): ^^^ ??? sw.cum_timer.cum_start("disk") for item in msglist: msg = sw.GetMsg(item) d = msg["body"] sw.cum_timer.cum_stop("disk") ns = d["s"] nrange = d[ "r"] # we don't use sw.GetRange() since this could be truncated I guess node = d.get("source", -1) # either this task has the source, or it doesn't ds = {} ds["first"] = ns ds["range"] = nrange ds["count"] = 0 # no. of visited nodes, since Visited is bitset-like vector ds["dist"] = 0 ds["source"] = node seg_bits = int(sw.GetVar('seg_bits')) Visited = Snap.TIntV(nrange) # This also stores distances Snap.ZeroVec(Visited) if node >= 0: # set start node to 1, reset to 0 at the end # TODO (smacke): the reason we're doing this is because # the visited vec stores distances, but a "0" means not # visited yet. It would be better to make -1 mean not # visited yet so that we don't have this weird edge case Visited.GetVal(Snap.trailing(node - ns, seg_bits)).Val = 1 ds["count"] = 1 ds["visit"] = Visited tsize = sw.GetRange() taskNumber = TaskId( node, tsize) # TODO (smacke): these really need to be named better # send the message if node >= 0: sw.log.debug('[%s] sending source node %d' % (sw.GetName(), node)) Vec1 = Snap.TIntV() Vec1.Add(Snap.trailing(node, seg_bits)) Vec1.Add(0) # this is the distance from source node to source node sw.Send(taskNumber, Vec1, swsnap=True) # send to GetNbrCpp64 return ds
def ranking(graph, alpha=0.6, primary=degreeDifference, secondary=randomValue, edgeAttrs=None): """ Implements the node ranking algorithm described by Guo, Yang, and Zhou Args: graph (snap.TNGraph): a directed graph to rank alpha (float): the relative size of the leader partition primary ((nodeID, graph, edgeAttrs) -> int): sorting key for primary sorting of the nodes secondary ((nodes, graph, edgeAttrs) -> nodes): sorting key for secondary sorting of nodes edgeAttrs (dict): edge attributes for use with sorting key functions Returns: A list of node IDs ordered in descending order by ranking """ # Group the nodes by degree difference (d_in - d_out) nodeOrdering = sorted([node.GetId() for node in graph.Nodes()], key=lambda nodeID: (primary(nodeID, graph, edgeAttrs), secondary(nodeID, graph, edgeAttrs)), reverse=True) # Split the nodes in leaders and followers splitIndex = int(alpha * graph.GetNodes()) leaders = nodeOrdering[:splitIndex] followers = nodeOrdering[splitIndex:] # Recursive base case check: if either leaders or followers is empty # then further recursing won't change the ordering, so just return the # current ordering if len(leaders) == 0 or len(followers) == 0: return leaders + followers # Create the subgraphs leaderNIdVector = snap.TIntV() for node in leaders: leaderNIdVector.Add(node) leaderGraph = snap.GetSubGraph(graph, leaderNIdVector) followerNIdVector = snap.TIntV() for node in followers: followerNIdVector.Add(node) followerGraph = snap.GetSubGraph(graph, followerNIdVector) # Recurse on the leaders and followers return ranking(leaderGraph, alpha, primary, secondary, edgeAttrs) + ranking( followerGraph, alpha, primary, secondary, edgeAttrs)
def getOverlapSet(self, nodeDegH): nodeIdV = snap.TIntV() dnodeIdV = snap.TIntV() nodeDegH.GetKeyV(nodeIdV) randNodeIdV=snap.TIntV() # random choice if self.overlap_choice == 1: randNodeIdV=np.random.choice(nodeIdV, self.overlap_size, replace=False) # random choice over the higher degree nodes elif self.overlap_choice == 2: nodeDegH_walker = nodeDegH.BegI() counter=0 while not nodeDegH_walker.IsEnd(): if (counter>self.overlap_size*2): break; dnodeIdV.Add(nodeDegH_walker.GetKey()) nodeDegH_walker.Next() randNodeIdV = np.random.choice(dnodeIdV, self.overlap_size, replace=False) # bsf tree nodes from the highest degree node elif self.overlap_choice == 3: startNodeId=nodeDegH.BegI().GetKey() BfsTree = snap.GetBfsTree(self.G, startNodeId, True, False) for EI in BfsTree.Edges(): sourceNodeId=EI.GetSrcNId() if(sourceNodeId not in randNodeIdV): randNodeIdV.Add(sourceNodeId) destNodeId = EI.GetDstNId() if (destNodeId not in randNodeIdV): randNodeIdV.Add(destNodeId) if(randNodeIdV.Len()==self.overlap_size): break; # bsf tree nodes from random node elif self.overlap_choice == 4: startNodeId = np.random.choice(nodeIdV, 1, replace=False) #print startNodeId BfsTree = snap.GetBfsTree(self.G, startNodeId[0], True, False) for EI in BfsTree.Edges(): sourceNodeId = EI.GetSrcNId() #print sourceNodeId if (sourceNodeId not in randNodeIdV): randNodeIdV.Add(sourceNodeId) destNodeId = EI.GetDstNId() #print destNodeId if (destNodeId not in randNodeIdV): randNodeIdV.Add(destNodeId) if (randNodeIdV.Len() == self.overlap_size): break; return randNodeIdV
def getFeat(): #G1, id2, synset2, _,_,_ = generate_word_graph(True, False, False) #G2, id2, synset2, _,_,_ = generate_word_graph(False, True, False) #G3, id2, synset2, _,_,_ = generate_word_graph(False, False, True) G4, id2, synset2, _, _, _ = generate_meaning_graph(True, False, False) #G5, id2, synset2, _,_,_ = generate_meaning_graph(False, True, False) #G6, id2, synset2, _,_,_ = generate_meaning_graph(False, False, True) #G7, id2, synset2, _,_,_ = generate_meaning_graph(True, False, True) degVec = {} egoNet = {} egoNetDeg = {} G = G4 it = 0 for node in G.Nodes(): it += 1 deg = node.GetDeg() nodeName = node.GetId() degVec[nodeName] = deg egoNet[nodeName] = 0 egoNetDeg[nodeName] = 0 commNeib = 0 NodeVec = snap.TIntV() nodeList = snap.GetNodesAtHop(G, nodeName, 1, NodeVec, False) for node2 in NodeVec: nodeIt = G.GetNI(node2) dest = nodeIt.GetDeg() egoNetDeg[nodeName] += dest NodeVec2 = snap.TIntV() nodeList = snap.GetNodesAtHop(G, node2, 1, NodeVec2, False) for el in NodeVec2: if el in NodeVec: commNeib += 1 egoNet[nodeName] = deg + commNeib / 2 egoNetDeg[nodeName] = egoNetDeg[nodeName] - commNeib - deg print egoNet[numImp], egoNetDeg[numImp], degVec[numImp] featDictOne = {} for el in G.Nodes(): numId = el.GetId() featVec = [degVec[numId], egoNet[numId], egoNetDeg[numId]] featDictOne[numId] = featVec for i in range(0, 2): featDict = {} for el in G.Nodes(): numId = el.GetId() featVec = recFeat(featDictOne, G, numId) featDict[numId] = featVec featDictOne = featDict getSim(featDictOne)
def GetMaxKDegree(self, k): self.seedNodes.clear() resultInDegree = snap.TIntV() resultOutDegree = snap.TIntV() snap.GetDegSeqV(self.graph, resultInDegree, resultOutDegree) count = len(resultOutDegree) listDegree = [] nodesId = [] for i in range(count): listDegree.append(resultOutDegree[i]) nodesId.append(i) # random.Random().shuffle(listDegree) return self.GetMaxK(listDegree, nodesId, k)
def clustering_coffecient(G): cluster_dict = [] nodes = [] # All nodes are stored for s in G.Nodes(): nodes.append(s.GetId()) k = len(nodes) if (k > 2): for s in nodes: neighbors_node = [] neighbors_mutual = [] # node ids of all the nodes that are at distance Hop from node StartNId NodeVec = snap.TIntV() snap.GetNodesAtHop(G, s, 1, NodeVec, False) # nodes are stored for comparing for neighbor in NodeVec: neighbors_node.append(neighbor) for neighbor_2 in NodeVec: Second_NodeVec = snap.TIntV() snap.GetNodesAtHop(G, neighbor_2, 1, Second_NodeVec, False) # Finding triangles for second_neighbor in Second_NodeVec: if second_neighbor in neighbors_node: neighbors_mutual.append(second_neighbor) neighbors_mutual = list(neighbors_mutual) clust_node = 0 if len(neighbors_mutual): # Clustering Coefficient for each node clust_node = (float(len(neighbors_mutual))) / ( (float(len(NodeVec)) * (float(len(NodeVec)) - 1))) cluster_dict.append(clust_node) else: pass # Average Clustering Coefficient for all nodes in a graph Average_Value = 0 l = len(cluster_dict) if (l is not 0): for val in cluster_dict: Average_Value = Average_Value + val return Average_Value / l else: return 0
def getCurrentandRestNet(self, n_list, g): lis_g = [] NIdV1 = snap.TIntV() for i in g.Nodes(): lis_g += [i.GetId()] lis_rest = [i for i in lis_g if i not in n_list] for i in lis_rest: NIdV1.Add(i) SubG1 = snap.GetSubGraph(g, NIdV1) NIdV2 = snap.TIntV() for i in n_list: # print i NIdV2.Add(i) SubG2 = snap.GetSubGraph(g, NIdV2) return SubG1, SubG2
def jaccard_2(G, n1, n2, directed=False): deleted = False if G.IsEdge(n1, n2): G.DelEdge(n1, n2) deleted = True n1_neighbors = snap.TIntV() snap.GetNodesAtHop(G, n1, 1, n1_neighbors, directed) n2_neighbors = snap.TIntV() snap.GetNodesAtHop(G, n2, 2, n2_neighbors, directed) total_neighbors = set(n1_neighbors) | set(n2_neighbors) common_neighbors = set(n1_neighbors) & set(n2_neighbors) if len(total_neighbors) == 0: result = 0.0 else: result = float(len(common_neighbors)) / float(len(total_neighbors)) if deleted: G.AddEdge(n1, n2) return result
def GetNbr(sw): """ provide graph neighbors """ taskname = sw.GetName() msglist = sw.GetMsgList() sw.flog.write("msglist " + str(msglist) + "\n") sw.flog.flush() AdjLists = LoadState() if AdjLists: # state is available, process requests for neighbors for item in msglist: name = sw.GetMsgName(item) # read the input nodes FIn = Snap.TFIn(Snap.TStr(name)) msg = Snap.TIntV(FIn) GetNeighbors(sw, AdjLists, msg) return # state not found, initialize it with neighbors Edges = Snap.TIntV() for item in msglist: name = sw.GetMsgName(item) FIn = Snap.TFIn(Snap.TStr(name)) Vec = Snap.TIntV(FIn) Edges.AddV(Vec) # first iteration: input are edges, save the state AdjLists = GetEdges(Edges) sw.flog.write("state " + str(AdjLists.Len()) + "\n") sw.flog.flush() SaveState(AdjLists) dmsgout = {} dmsgout["src"] = sw.GetName() dmsgout["cmd"] = "targets" dmsgout["body"] = {} sw.Send(0, dmsgout, "2")
def GetNbr(sw): """ provide graph neighbors """ # taskname = sw.GetName() msglist = sw.GetMsgList() sw.log.debug("msglist %s" % msglist) with perf.Timer(sw.log, "LoadState-GetNbrCpp"): AdjLists = LoadState(sw) if AdjLists: # state is available, process requests for neighbors for item in msglist: name = sw.GetMsgName(item) # read the input nodes FIn = Snap.TFIn(Snap.TStr(name)) msg = Snap.TIntV(FIn) GetNeighbors(sw, AdjLists, msg) return # state not found, initialize it with neighbors Edges = Snap.TIntV() for item in msglist: name = sw.GetMsgName(item) FIn = Snap.TFIn(Snap.TStr(name)) Vec = Snap.TIntV(FIn) Edges.AddV(Vec) # first iteration: input are edges, save the state AdjLists = GetEdges(sw, Edges) sw.log.debug("state: %d" % AdjLists.Len()) with perf.Timer(sw.log, "SaveState-GetNbrCpp"): SaveState(sw, AdjLists) dmsgout = {} dmsgout["src"] = sw.GetName() dmsgout["cmd"] = "targets" dmsgout["body"] = {} sw.Send(0, dmsgout, "2")
def pageRank_components(g): print 'executing pagerank components ---- getting components for page rank' Components = snap.TCnComV() snap.GetWccs(g, Components) f = open('component_pr.txt', 'w') cgraphs = [] for com in Components: v = snap.TIntV() for ni in com: v.Add(ni) cgraphs.append(snap.GetSubGraph_PNGraph(g, v)) print 'components retrived for pagerank' f.write('Total components:' + str(len(cgraphs)) + '\n') for graph in cgraphs: if graph.GetNodes() == 2: continue sprank = snap.TIntFltH() snap.GetPageRank_PNGraph(graph, sprank) sprank.SortByDat(False) f.write( str(graph.GetNodes()) + ' ' + str(sprank[sprank.BegI().GetKey()]) + '\n') f.close() print 'finished writing pagerank components values'
def get_deg_data(G): result_degree = snap.TIntV() snap.GetDegSeqV(G, result_degree) deg_data = [] for i in range(result_degree.Len()): deg_data.append(result_degree[i]) return deg_data
def calcClusteringCoefficientSingleNode(Node, Graph): """ :param - Node: node from snap.PUNGraph object. Graph.Nodes() will give an iterable of nodes in a graph :param - Graph: snap.PUNGraph object representing an undirected graph return type: float returns: local clustering coeffient of Node """ ############################################################################ # TODO: Your code here! # C = 0.0 ki = Node.GetDeg() if ki < 2: return 0 else: Nbrs = snap.TIntV() common_neighbours = 0 for Id in Node.GetOutEdges(): common_neighbour = snap.GetCmnNbrs(Graph, Node.GetId(), Id, Nbrs) common_neighbours += common_neighbour # ei is number of edges between neighbours, # divide by 2, because each pair of connected neighbours are counted twice ei = common_neighbours / 2 # c = 2ei/ki(ki-1) C = 2 * ei / (ki * (ki - 1)) ############################################################################ return C
def gen_G(D, Pi_minus, Pi_exo, V_exo, theta2, N): """ Returns pairwise-stable network on N nodes. D, Pi_minus, Pi_exo = outputs of gen_D(). V_exo = 'exogenous' part of joint surplus (output of gen_V_exo). theta2 = transitivity parameter (theta[2]). """ G = snap.GenRndGnm(snap.PUNGraph, N, 0) # initialize empty graph Components = snap.TCnComV() snap.GetWccs(D, Components) # collects components of D NIdV = snap.TIntV() # initialize vector for C in Components: if C.Len() > 1: NIdV.Clr() for i in C: NIdV.Add(i) tempnet = gen_G_subgraph(NIdV, D, Pi_minus, Pi_exo, V_exo, theta2) for edge in tempnet.Edges(): G.AddEdge(edge.GetSrcNId(), edge.GetDstNId()) # add robust links for edge in Pi_exo.Edges(): G.AddEdge(edge.GetSrcNId(), edge.GetDstNId()) return G
def clustering_coef(G, mode_for_end_nodes='put_zeros'): ''' Calculate vector of clustering coefficient for each node G is an undirected Graph mode_for_end_nodes = 'nan' - end/disconnected nodes will be ignored i.e. non included in output list mode_for_end_nodes = 'put_zero' - assign zero for end/disconnected list_clusterning_coefs_allnodes = clustering_coef(UGraph) https://www.kaggle.com/alexandervc/hw1-ac/notebook ''' list_clusterning_coefs_allnodes = [] for n in G.Nodes(): NodeVec = snap.TIntV() snap.GetNodesAtHop(G, n.GetId(), 1, NodeVec, False) # Get neigbours of current node current_degree = len(NodeVec) # same as n.GetDeg() if current_degree <= 1: # skip disconnected&end nodes - impossible to calculate for them - getting division by zero if mode_for_end_nodes == 'nan': continue else: list_clusterning_coefs_allnodes.append(0) continue count_edges_between_neigbours = 0 for neigbor1 in NodeVec: for neigbor2 in NodeVec: if neigbor1 >= neigbor2: continue if G.IsEdge(neigbor1, neigbor2): count_edges_between_neigbours += 1 clustering_coef_current_node = 2 * count_edges_between_neigbours / ( current_degree * (current_degree - 1)) list_clusterning_coefs_allnodes.append(clustering_coef_current_node) return list_clusterning_coefs_allnodes
def basicFeature(G): V = [] cnt = 0 x1 = 0 x2 = 0 x3 = 0 for NI in G.Nodes(): ## Get egonet NIdV = snap.TIntV() NIdV.Add(NI.GetId()) for Id in NI.GetOutEdges(): NIdV.Add(Id) results = snap.GetEdgesInOut(G, NIdV) V.append([NI.GetId(), NI.GetOutDeg(), results[0], results[1]]) cnt = cnt + 1 x1 = x1 + NI.GetOutDeg() x2 = x2 + results[0] x3 = x3 + results[1] Id9 = 9999 ##hard code the biggest possible node value for candidates to avoid collision x1 = x1 * 1.0 / cnt x2 = x2 * 1.0 / cnt x3 = x3 * 1.0 / cnt a = np.sqrt(x1 * x1 + x2 * x2 + x3 * x3) res = [] scores = [] for i in V: [Id, y1, y2, y3] = i if Id != Id9: b = np.sqrt(y1 * y1 + y2 * y2 + y3 * y3) dem = x1 * y1 + x2 * y2 + x3 * y3 if (b == 0 or a == 0 or dem == 0): sim = 0 else: sim = dem * 1.0 / a / b res.append([Id, sim]) scores.append(sim) arr = np.array(res) r = arr[arr[:, 1].argsort()] l = len(res) print "Top 5 similar nodes based on cosine similarity, for basic features like HW2 Q1, when compared to mean node" print(r[l - 1], r[l - 2], r[l - 3], r[l - 4], r[l - 5]) ### ## print roles plt.figure() plt.hist(scores, bins=20) plt.title( 'Distribution of cosine similarity between mean node and any node in the graph' ) plt.xlabel('cosine similarity') plt.ylabel('count') plt.show() ### return
def shrinkGraph(graph, maxDist): nameToNId = {} for n in graph.Nodes(): id = n.GetId() nameToNId[graph.GetStrAttrDatN(id, 'name').decode('utf-8')] = id infile = codecs.open('csv/dblpusersaff.csv', 'r', 'utf-8') lines = infile.read().splitlines() validNodes = set() for line in lines: tokens = line.split('||') if tokens[2] != '': id = nameToNId[tokens[1]] closeNeighborsSet = set() pq = deque() pq.append((id, 0)) closeNeighborsSet.add(id) while len(pq) > 0: (id, dist) = pq.popleft() if dist == maxDist: break node = graph.GetNI(id) for i in range(node.GetOutDeg()): nbrId = node.GetOutNId(i) if nbrId not in closeNeighborsSet: closeNeighborsSet.add(nbrId) pq.append((nbrId, dist + 1)) validNodes.update(closeNeighborsSet) print tokens[0], len(closeNeighborsSet), len(validNodes) unused = snap.TIntV() for n in graph.Nodes(): if n.GetId() not in validNodes: unused.Add(n.GetId()) graph.DelNodes(unused)