def gen_G_subgraph(component, D, Pi_minus, Pi_exo, V_exo, theta2): """ Returns a pairwise-stable network for nodes in component, via myopic best- response dynamics. This subnetwork is pairwise-stable taking as given the links in the rest of the network. Initial network for best-response dynamics is the opportunity graph. NB: This function is specific to the joint surplus used in our simulations. component = component of D for which we want a pairwise-stable subnetwork. D, Pi_minus, Pi_exo = outputs of gen_D(). V_exo = 'exogenous' part of joint surplus (output of gen_V_exo). theta2 = transitivity parameter (theta[2]). """ stable = False meetings_without_deviations = 0 D_subgraph = snap.GetSubGraph(D, component) # Start initial network on Pi, without robustly absent potential links. G = snap.GetSubGraph(Pi_minus, component) # For each node pair (i,j) linked in Pi_exo (i.e. their links are robust), # with either i or j in component, add their link to G. Result is the # subgraph of Pi_minus on an augmented component of D. for i in component: for j in Pi_exo.GetNI(i).GetOutEdges(): if not G.IsNode(j): G.AddNode(j) G.AddEdge(i, j) while not stable: # Need only iterate through links of D, since all other links are # robust. for edge in D_subgraph.Edges(): # Iterate deterministically through default edge order order. Add or # remove link in G according to myopic best-respnose dynamics. If we # cycle back to any edge with no changes to the network, conclude # it's pairwise stable. i = min(edge.GetSrcNId(), edge.GetDstNId()) j = max(edge.GetSrcNId(), edge.GetDstNId()) cfriend = snap.GetCmnNbrs(G, i, j) > 0 if V_exo[i, j] + theta2 * cfriend > 0: # specific to model of V if G.IsEdge(i, j): meetings_without_deviations += 1 else: G.AddEdge(i, j) meetings_without_deviations = 0 else: if G.IsEdge(i, j): G.DelEdge(i, j) meetings_without_deviations = 0 else: meetings_without_deviations += 1 if meetings_without_deviations > D_subgraph.GetEdges(): stable = True return snap.GetSubGraph(G, component)
def ranking(graph, alpha=0.6, primary=degreeDifference, secondary=randomValue, edgeAttrs=None): """ Implements the node ranking algorithm described by Guo, Yang, and Zhou Args: graph (snap.TNGraph): a directed graph to rank alpha (float): the relative size of the leader partition primary ((nodeID, graph, edgeAttrs) -> int): sorting key for primary sorting of the nodes secondary ((nodes, graph, edgeAttrs) -> nodes): sorting key for secondary sorting of nodes edgeAttrs (dict): edge attributes for use with sorting key functions Returns: A list of node IDs ordered in descending order by ranking """ # Group the nodes by degree difference (d_in - d_out) nodeOrdering = sorted([node.GetId() for node in graph.Nodes()], key=lambda nodeID: (primary(nodeID, graph, edgeAttrs), secondary(nodeID, graph, edgeAttrs)), reverse=True) # Split the nodes in leaders and followers splitIndex = int(alpha * graph.GetNodes()) leaders = nodeOrdering[:splitIndex] followers = nodeOrdering[splitIndex:] # Recursive base case check: if either leaders or followers is empty # then further recursing won't change the ordering, so just return the # current ordering if len(leaders) == 0 or len(followers) == 0: return leaders + followers # Create the subgraphs leaderNIdVector = snap.TIntV() for node in leaders: leaderNIdVector.Add(node) leaderGraph = snap.GetSubGraph(graph, leaderNIdVector) followerNIdVector = snap.TIntV() for node in followers: followerNIdVector.Add(node) followerGraph = snap.GetSubGraph(graph, followerNIdVector) # Recurse on the leaders and followers return ranking(leaderGraph, alpha, primary, secondary, edgeAttrs) + ranking( followerGraph, alpha, primary, secondary, edgeAttrs)
def getCurrentandRestNet(self, n_list, g): lis_g = [] NIdV1 = snap.TIntV() for i in g.Nodes(): lis_g += [i.GetId()] lis_rest = [i for i in lis_g if i not in n_list] for i in lis_rest: NIdV1.Add(i) SubG1 = snap.GetSubGraph(g, NIdV1) NIdV2 = snap.TIntV() for i in n_list: # print i NIdV2.Add(i) SubG2 = snap.GetSubGraph(g, NIdV2) return SubG1, SubG2
def temporal_subgraphs(graphs, nodeV): """Return the set of induced subgraphs by nodeV from graphs""" subgraphs = [] for g in graphs: subgraph = snap.GetSubGraph(g, nodeV) subgraphs.append(subgraph) return subgraphs
def get_subgraph(graph, nodes_ids): node_ids_vector = snap.TIntV() for node_id in nodes_ids: if node_id not in node_ids_vector: node_ids_vector.Add(node_id) subgraph = snap.GetSubGraph(graph, node_ids_vector) return subgraph
def getEgonetFeatures(Graph, allNodeIds, nodeDeg, Node): """ :param Graph (TUNGraph): A snap graph of Collaboration network :param allNodeIds(TIntV): Vector of all node ids :param nodeDeg(int): Degree of node under consideration :param Node(NodeI): Current node Iterator :returns: dictionary of egonet features - Number of egonet edges - Number of boundary edges outside egonet """ interiorNodeIds = snap.TIntV() for i in range(nodeDeg): interiorNodeIds.Add(Node.GetNbrNId(i)) if interiorNodeIds.Len() == 0: return {"egonet_edges": 0, "egonet_boundary_edges": 0} exteriorNodeIds = list(set(allNodeIds) - set(interiorNodeIds)) SubGraph = snap.GetSubGraph(Graph, interiorNodeIds) edge_count = SubGraph.GetEdges() boundary_edges = 0 for edge in Graph.Edges(): if checkIsBoundary(edge.GetId(), interiorNodeIds, exteriorNodeIds): boundary_edges += 1 return { "egonet_edges": edge_count, "egonet_boundary_edges": boundary_edges } """
def feature_vec_test(G, nId): #print "deg of nine", G.GetNI(9).GetDeg() #for n in neighbours(G, 9): # print n, G.GetNI(9).GetDeg() node = G.GetNI(nId) degree = node.GetDeg() NIdV = snap.TIntV() for Id in node.GetOutEdges(): NIdV.Add(Id) for i in NIdV: print i #print "vector sosedov:",NIdV egoNet = snap.GetSubGraph(G, NIdV) print "egonet:", egoNet.Dump() egoinsideEdges = egoNet.GetEdges() egoOutEdges = 0 for id in NIdV: nodeTemp = G.GetNI(id) for dstNiD in nodeTemp.GetOutEdges(): bool = dstNiD not in NIdV and dstNiD != nId if bool: egoOutEdges += 1 return [degree, egoinsideEdges + degree, egoOutEdges]
def Get_Subgraphs(G_Directed_with_Attributes): import snap NIdV = snap.TIntV() x = 0 for nid in G_Directed_with_Attributes.Nodes(): if G_Directed_with_Attributes.GetStrAttrDatN(nid, "NAME_USERS"): NIdV.Add(nid.GetId()) SubGraph_All_Spreaders = snap.GetSubGraph(G_Directed_with_Attributes, NIdV) ## NIdV = snap.TIntV() ## for nid in G_Directed_with_Attributes.Nodes (): ## if (G_Directed_with_Attributes.GetStrAttrDatN(nid, "User_Category") == 'Tweeter_Rumor') or (G_Directed_with_Attributes.GetStrAttrDatN(nid, "User_Category") == 'Retweeter_Rumor') : ## NIdV.Add(nid.GetId()) ## SubGraph_Rumor_Spreaders = snap.GetSubGraph(G_Directed_with_Attributes, NIdV) ## ## NIdV = snap.TIntV() ## for nid in G_Directed_with_Attributes.Nodes (): ## if (G_Directed_with_Attributes.GetStrAttrDatN(nid, "User_Category") == 'Tweeter_AntiRumor') or (G_Directed_with_Attributes.GetStrAttrDatN(nid, "User_Category") == 'Retweeter_AntiRumor') : ## NIdV.Add(nid.GetId()) ## SubGraph_AntiRumor_Spreaders = snap.GetSubGraph(G_Directed_with_Attributes, NIdV) ## ## NIdV = snap.TIntV() ## for nid in G_Directed_with_Attributes.Nodes (): ## if (G_Directed_with_Attributes.GetStrAttrDatN(nid, "User_Category") == 'Tweeter_Rumor') or (G_Directed_with_Attributes.GetStrAttrDatN(nid, "User_Category") == 'Tweeter_AntiRumor'): ## NIdV.Add(nid.GetId()) ## SubGraph_Tweeters = snap.GetSubGraph(G_Directed_with_Attributes, NIdV) return SubGraph_All_Spreaders
def bin2graph(lstbin, lstids, graph): SNAPIntVet = snap.TIntV() for i in range(0, len(lstids)): if lstbin[i] == 1: SNAPIntVet.Add(int(lstids[i])) return snap.GetSubGraph(graph, SNAPIntVet)
def gen_event_mention_head(self, sentid, starttokid, endtokid): idvec = snap.TIntV() for x in range(starttokid, endtokid): idvec.Add(x) subgraph = snap.GetSubGraph(self.dependgraphs[sentid][0], idvec) for x in range(endtokid - 1, starttokid - 1, -1): if subgraph.GetNI(x).GetInDeg() == 0: return x
def snowball_sample(G, num_waves, seeds): """ Parameters: G - SNAP graph or network to sample frpm num_waves - number of snowball waves seeds - SNAP vector (TIntV) of seeds (node ids) to start snowball sample from Return value: SNAP network (TNEANet) snowball sampled from G with each node having an integer "zone" attribute for snowball sampling zone (0=seed, 1=first wave, etc.) [TNEANet needed to allow zone attribute, not actually using multigraph capability]. Note directions on directed graph are ignored - can sample in undirected or directed graph. """ assert (len(seeds) == len(set(seeds))) # no duplicate node ids # It seems like GetSubGraph does not preserve node attributse # so instead of adding attributes ot nodes on N, make a Python # dictionary mapping node ids to zone and then add them back # ass attributes on the subgraph (node ids are preserved so we # can do this) zonedict = dict() # map nodeid : zone N = snap.ConvertGraph(snap.PNEANet, G) # copy graph/network G to network N nodes = set(seeds) # will accumulate all nodes (including seeds) here for seed in seeds: zonedict[seed] = 0 # seed nodes are zone 0 newNodes = set(nodes) for i in range(num_waves): wave = i + 1 #print 'wave',wave for node in set(newNodes): neighbours = snap.TIntV() snap.GetNodesAtHop(G, node, 1, neighbours, False) # neighbours of node newNeighbours = set( neighbours) - nodes # neighbours that are not already in nodes for node in newNeighbours: if not zonedict.has_key(node): zonedict[node] = wave newNodes.update( newNeighbours ) # newNodes gets set union of itslf and newNeighbours nodes.update(newNodes) # have to convert nodes set into TIntV for use in SNAP NodeVec = snap.TIntV() for node in nodes: NodeVec.Add(node) sampleN = snap.GetSubGraph(N, NodeVec) # now put the zones as attributes on the subgraph nodes (which depends # on nodeids being preserved in the subgraph) sampleN.AddIntAttrN("zone", -1) # add zone attribute init to -1 for (nodeid, zone) in zonedict.iteritems(): sampleN.AddIntAttrDatN(nodeid, zone, "zone") return sampleN
def plot_subgraph(graph, nodes, output, title): node_ids_vector = snap.TIntV() labels = snap.TIntStrH() for pair in nodes: if pair[0] not in node_ids_vector: node_ids_vector.Add(pair[0]) pkg = graph.GetStrAttrDatN(pair[0], "pkg") labels[pair[0]] = ellipsize_text(pkg, 30) subgraph = snap.GetSubGraph(graph, node_ids_vector) snap.DrawGViz(subgraph, snap.gvlNeato, output, title, labels)
def get_random_subgraph(G, banned_ids, subgraph_size = 300, num_banned=10): banned_ids = random.sample(banned_ids, num_banned) # Graph contains nodes with ids that increase sequentially, so # just take a random random from a list from 0 to G.GetNodes() others_ids = random.sample([i for i in range(G.GetNodes())], subgraph_size - num_banned) ids_to_include = list(set(banned_ids + others_ids)) Vector_to_include = snap.TIntV() for id_included in ids_to_include: Vector_to_include.Add(id_included) return snap.GetSubGraph(G, Vector_to_include)
def egonet(graph, node): NIdV = snap.TIntV() NIdV.Add(node) for n in graph.GetNI(node).GetOutEdges(): NIdV.Add(n) # for n in graph.GetNI(node).GetInEdges(): # NIdV.Add(n) subGraph = snap.GetSubGraph(graph, NIdV) return subGraph
def single_KO_resilience_df(G, g_to_node, KO_list, strain_name, reps=100): ''' Input: a starting graph G representing nodes and edges in either the LTEE ancestor REL606, or a 50K clone A from one of the 12 populations; a dictionary of genes to nodes; a list of genes to knockout; the name of the strain represented by the graph G. For every node in the graph: generate a subgraph without that node. calculate network resilience for the subgraph. return a DataFrame of the 'strain' with the gene knocked out, and the resilience statistic of the subgraph without that node. If the gene does not encode a node in the graph, give it the initial resilience value. ''' initial_resilience = sum( (GraphResilience(G) for x in range(reps))) / float(reps) clone_col = [] resilience = [] for gene in KO_list: clone = gene + "_knockout" clone_col.append( clone) ## to ensure that clone and resilience values match up. my_resilience = initial_resilience ## default value, if the gene is not in the graph. if gene in g_to_node: knocked_out_node = g_to_node[gene] ''' now filter the node from the PPI graph. ''' ## make a subgraph G2 that omits the KO'ed node. NIdV = snap.TIntV() for n in G.Nodes(): NId = n.GetId() if NId == knocked_out_node: continue ## skip the KO'ed node. NIdV.Add(NId) G2 = snap.GetSubGraph(G, NIdV) ## calculate this clone's resilience. default is 100 replicates. ## save memory by using a generator comprehension. my_resilience = sum( (GraphResilience(G2) for x in range(reps))) / float(reps) print(my_resilience) resilience.append(my_resilience) strain_col = [strain_name] + clone_col resilience_col = [initial_resilience] + resilience resilience_results = pd.DataFrame.from_dict({ 'strain': strain_col, 'resilience': resilience_col }) return resilience_results
def writeToFile(fname, NIdV, idx): subG = snap.GetSubGraph(graph, NIdV) string = '' if idx==1: pass #print subG.GetEdges() for edge in subG.Edges(): n1 = edge.GetSrcNId() n2 = edge.GetDstNId() string += '%d\t%d\n' %(n1, n2) with open(fname, 'w') as f: f.write(string)
def make_subgraph(region_start, region_end, graph): print("make_subgraph: start: 0x%x and end: 0x%x" % (region_start, region_end)) NIdV = snap.TIntV() #this would be much faster if we had a linear list of functions (nodes) for Node in graph.Nodes(): start = Node.GetId() if (start >= region_start) and (start <= region_end): NIdV.Add(start) if (start > region_end): break return snap.GetSubGraph(graph, NIdV)
def pruneGraph(graph, minDegree): """ Prunes a graph to remove nodes with degree less than minDegree Returns: A subgraph of graph """ nIdV = snap.TIntV() for node in graph.Nodes(): if node.GetDeg() > minDegree: nIdV.Add(node.GetId()) return snap.GetSubGraph(graph, nIdV)
def GraphResilience(G): ''' calculate the resilience of the graph G, using method in Zitnik et al. (2019), described in the supplementary methods.''' nodes = G.GetNodes() ## get distribution of strongly connected component sizes for the starting graph. sscdict = GraphComponentDistributionDict(G) ## calculate the entropy of the set of strongly connected components. H_0 = ComponentDistributionEntropy(sscdict, nodes) ## initialize a dictionary from failure rate to entropy. failure_rate_to_entropy = {0: H_0} ## copy G by making a subgraph G2 with the same nodes. NIdV = snap.TIntV() for n in G.Nodes(): NId = n.GetId() NIdV.Add(NId) G2 = snap.GetSubGraph(G, NIdV) ## iteratively remove edges from random nodes to fragment G2. ## calculate H for a range of failure rates. ## adaptation of C++ code in Zitnik paper. See: ## https://github.com/mims-harvard/life-tree/blob/master/compute-net-stats/analyze.cpp ## make sure that we're always working on the G2 object in memory. num_deleted = 0 node_order = [n.GetId() for n in G.Nodes()] ## shuffle the order of the nodes. this is done in-place. random.shuffle(node_order) for fail_rate_p in range( 1, 100 + 1): ## failure rate as a percentage from 1 to 100. failed_frac = fail_rate_p / 100 ## fraction of failed nodes, ranging from 0 to 1. cur_deleted_total = nodes * failed_frac while (num_deleted < cur_deleted_total): NId = node_order[num_deleted] ## get the next random node num_deleted = num_deleted + 1 G2.DelNode(NId) ## delete it to remove its edges. G2.AddNode( NId ) ## add it back so that its edges are gone but the node remains. ## add entry to failure rate : component entropy dict. cur_ssc_dict = GraphComponentDistributionDict(G2) failure_rate_to_entropy[fail_rate_p] = ComponentDistributionEntropy( cur_ssc_dict, nodes) ## calculate resilience for the graph. ## This is 1 - AUC of the interpolated function. ## Use Simpson's rule to approximate the integral. x = np.array([i / 100 for i in range(0, 100 + 1)]) y = np.array([j for j in failure_rate_to_entropy.values()]) AUC = simps(y, x) resilience = 1 - AUC return resilience
def PlotSubGraph(G,NodeID,path="./graph/"): Node = G.GetNI(NodeID) NIdV = snap.TIntV() #subgraph nodes ID NIdV.Add(NodeID) Deg = Node.GetDeg() NidName = snap.TIntStrH() NidName[NodeID] = str(NodeID) for i in range(Deg): NbrID = Node.GetNbrNId(i) NIdV.Add(NbrID) NidName[NbrID] = str(NbrID) SubGraph = snap.GetSubGraph(G, NIdV) snap.DrawGViz(SubGraph,snap.gvlDot,path+"subgraph"+str(NodeID)+".png","SubGraph of "+str(NodeID),NidName) return 0
def community_density_new(c, g): node_id = snap.TIntV() for i in c: node_id.Add(i) sub = snap.GetSubGraph(g, node_id) e = 0 for i in sub.Edges(): e += 1 n = 0 for i in sub.Nodes(): n += 1 # print('den', 2*e*1.0/(n*(n-1))*1.0) if n != 0 and n != 1: return 2*e*1.0/(n*(n-1))*1.0
def egonet2(graph, node): a = snap.TIntV() checkSet = set() a.Add(node) checkSet.add(node) for n in graph.GetNI(node).GetOutEdges(): a.Add(n) checkSet.add(n) for i in a: for j in graph.GetNI(i).GetOutEdges(): if j not in checkSet: a.Add(j) subGraph = snap.GetSubGraph(graph, a) return subGraph
def getSubgraph(self,subgraphNodeIdHV): lblFiles=[] walker = subgraphNodeIdHV.BegI() while not walker.IsEnd(): graphId=walker.GetKey() subgraphNodeIdV=walker.GetDat() subG = snap.GetSubGraph(self.G, subgraphNodeIdV) print "Network %s: (%d,%d)" % ("induced subgraph " + str(graphId), subG.GetNodes(), subG.GetEdges()) subgraph1Name = self.graphName + "_" + str(graphId) snap.SaveEdgeList(subG, self.targetDir + "/" + subgraph1Name + ".txt") lblFile=self.targetDir + "/" + subgraph1Name + ".txt" #self.saveLblGraph(subG, lblFile) lblFiles.append(lblFile) walker.Next() return lblFiles;
def get_random_subgraph_connected(G, banned_ids, subgraph_size = 300): root = random.choice(banned_ids) # do bfs from root, which is a banned subreddit bfs_G = snap.GetBfsTree(G, root, True, False) Vec_of_bfs_G_nodes = snap.TIntV() level = [root] # iteratively levels of BFS tree while (len(level) > 0): curr = level.pop(0) if (curr not in Vec_of_bfs_G_nodes): Vec_of_bfs_G_nodes.Add(curr) if (Vec_of_bfs_G_nodes.Len() == subgraph_size): break for neigh in G.GetNI(curr).GetOutEdges(): level.append(neigh) return snap.GetSubGraph(G, Vec_of_bfs_G_nodes)
def extract_subgraph(g, n, skew): # extract a subgraph of size (n) from graph (g). # choose the subgraph vertices with weights normally distributed with skew. nodes = pd.DataFrame([(n.GetId(), n.GetInDeg()) for n in g.Nodes()], columns=['node_id', 'in_deg']) nodes['cc'] = nodes['node_id'] nodes['dist'] = -1 skew = skew rv = skewnorm(skew, loc=50, scale=10) weights = rv.pdf(nodes.in_deg) nodes['weight'] = weights u_deg = nodes.sort_values('in_deg').drop_duplicates('in_deg') print u_deg.shape plt.plot(u_deg.in_deg, u_deg.weight, 'k-', lw=1) plt.xlim(0, 50) #plt.scatter(nodes.in_deg, weights) thresh = 5.0e-3 high_prop_nodes = nodes.query("weight>%f" % thresh) subg_nodes = np.random.choice(high_prop_nodes.node_id, n, replace=False, p=high_prop_nodes.weight / high_prop_nodes.weight.sum()).tolist() all_neighbors = [] for n in subg_nodes: mx_n = 5 NodeVec = snap.TIntV() snap.GetNodesAtHop(g, n, 1, NodeVec, False) neighbors = [neig for neig in NodeVec] all_neighbors.extend(neighbors) subg_nodes.extend(all_neighbors) NIdV = snap.TIntV() for n in np.unique(subg_nodes): NIdV.Add(n) subg = snap.GetSubGraph(g, NIdV) print subg.GetNodes() return subg
def process_cite_2(Lfile, Cfile): path = config.path + config.subpath ''' # 保存最大连通图 # graph loading Graph = snap.LoadEdgeList(snap.PUNGraph, Lfile, 0, 1) MxScc = snap.GetMxScc(Graph) snap.SaveEdgeList(MxScc, path+"processed_2_cite.txt", "Save as tab-separated list of edges") ''' LL = set() # 把入度>某个值的节点加入进去 Graph = snap.LoadEdgeList(snap.PNGraph, Lfile, 0, 1) for node in Graph.Nodes(): if node.GetInDeg() > 200: LL.add(node.GetId()) SubG = snap.GetSubGraph(Graph, snap.TIntV.GetV(5193110)) snap.SaveEdgeList(SubG, path + "Sub_cite.txt")
def get_basic_feature(graph, node_id): NI = graph.GetNI(node_id) v_1 = NI.GetDeg() nbrs = NI.GetOutEdges() nbr_vec = snap.TIntV() nbrs = [nbr_vec.Add(nbr) for nbr in nbrs] nbr_vec.Add(node_id) subgraph = snap.GetSubGraph(graph, nbr_vec) v_2 = subgraph.GetEdges() total_edges = 0 for node in subgraph.Nodes(): orig_NI = graph.GetNI(node.GetId()) total_edges += orig_NI.GetDeg() v_3 = total_edges - 2 * v_2 feature = snap.TFltV() feature.Add(v_1) feature.Add(v_2) feature.Add(v_3) return feature
def featureVect(G, nId): ''' :param nId: graph node Id :param G: graph (undirected) :return: feature vector of [deg,#egoInsideEdges,#egooutedges] ''' node = G.GetNI(nId) degree = node.GetDeg() NIdV = snap.TIntV() for Id in node.GetOutEdges(): NIdV.Add(Id) egoNet = snap.GetSubGraph(G, NIdV) egoinsideEdges = egoNet.GetEdges() egoOutEdges = 0 for id in NIdV: nodeTemp = G.GetNI(id) for dstNiD in nodeTemp.GetOutEdges(): if dstNiD not in NIdV and dstNiD != nId: egoOutEdges += 1 return [degree, egoinsideEdges + degree, egoOutEdges]
def calcClusteringCoefficientSingleNode(Node, Graph): """ :param - Node: node from snap.PUNGraph object. Graph.Nodes() will give an iterable of nodes in a graph :param - Graph: snap.PUNGraph object representing an undirected graph return type: float returns: local clustering coeffient of Node """ ############################################################################ # TODO: Your code here! C = 0.0 deg = [] neighbors = snap.TIntV() ki = Node.GetOutDeg() #get the outer degree of the node if ki >= 2: #if outer degree is greater than or equal to two #reset variables ei = 0 Subgraph = snap.TUNGraph.New() for N in Node.GetOutEdges(): #store all neighbors in neighbors neighbors.Add(N) SubGraph = snap.GetSubGraph(Graph, neighbors) #create a sub graph of neighbors only ei = SubGraph.GetEdges() #ei is the number of edges between neighbors C = 2*abs(ei)/float(ki*(ki-1)) else: C = 0 ############################################################################ return C
def generateArtificialExamples(graph, newProportion=0.05, testProportion=0.3, seed=None, filename=None): # SNAP doesn't come with a graph copy method, so just induce an subgraph on the same set of nodes nodeIDVec = snap.TIntV() for node in graph.Nodes(): nodeIDVec.Add(node.GetId()) oldGraph = snap.GetSubGraph(graph, nodeIDVec) # Sample newProportion of edges and remove them the graph to artificially generate old graph sampleSize = int(graph.GetEdges() * newProportion) print "Sampling {} new edges out of {} total edges".format( sampleSize, graph.GetEdges()) newEdges = random.sample([(edge.GetSrcNId(), edge.GetDstNId()) for edge in graph.Edges()], sampleSize) for srcNID, dstNID in newEdges: oldGraph.DelEdge(srcNID, dstNID) return generateExampleSplit(oldGraph, graph, testProportion, seed, filename)