def degreeDiscountIC(G, k, p=.01): ''' Finds initial set of nodes to propagate in Independent Cascade model (with priority queue) Input: G -- networkx graph object k -- number of nodes needed p -- propagation probability Output: S -- chosen k nodes ''' S = [] dd = PQ() # degree discount t = dict() # number of adjacent vertices that are in S d = dict() # degree of each vertex # initialize degree discount for u in G.nodes(): d[u] = sum([G[u][v]['weight'] for v in G[u]]) # each edge adds degree 1 # d[u] = len(G[u]) # each neighbor adds degree 1 dd.add_task(u, -d[u]) # add degree of each node t[u] = 0 # add vertices to S greedily for i in range(k): u, priority = dd.pop_item( ) # extract node with maximal degree discount S.append(u) for v in G[u]: if v not in S: t[v] += G[u][v][ 'weight'] # increase number of selected neighbors priority = d[v] - 2 * t[v] - ( d[v] - t[v]) * t[v] * p # discount of degree dd.add_task(v, -priority) return S
def generalGreedy(G, k, p=.01): ''' Finds initial seed set S using general greedy heuristic Input: G -- networkx Graph object k -- number of initial nodes needed p -- propagation probability 传播概率 Output: S -- initial set of k nodes to propagate ''' import time start = time.time() R = 20 # number of times to run Random Cascade S = [] # set of selected nodes # add node to S if achieves maximum propagation for current chosen + this node for i in range(k): s = PQ() # priority queue for v in G.nodes(): # 遍历G中所有节点 if v not in S: s.add_task(v, 0) # initialize spread value 0为优先度 for j in range(R): # run R times Random Cascade 运行R次随机级联 [priority, count, task] = s.entry_finder[v] # 获取v的优先度 # runIC(G, S + [v], p) 表示把S+[v]看做种子集,p为传播概率 返回Influence Spread # priority - float(len(runIC(G, S + [v], p))) / R 为优先度 # v由于在上面已经加入pq, 所以会先执行remove_task 将v移出entry_finder 即把上面的task置为<removed-task> # 此时,该方法用于更新v的优先度 如果在IC模型中 S=[v]的影响越大,那么v的优先度越小 s.add_task(v, priority - float(len(runIC(G, S + [v], p))) / R) # add normalized spread value task, priority = s.pop_item() # 移除并返回最低优先度的节点 S.append(task) # 将优先度最低的节点加入S 优先度低是因为在IC模型中扩散的很快 print(i, k, time.time() - start) return S
def generalGreedy(G, k, p=.01): ''' Finds initial seed set S using general greedy heuristic Input: G -- networkx Graph object k -- number of initial nodes needed p -- propagation probability Output: S -- initial set of k nodes to propagate ''' # import time # start = time.time() R = 200 # number of times to run Random Cascade S = [] # set of selected nodes # add node to S if achieves maximum propagation for current chosen + this node for i in range(k): # cannot parallellize s = PQ() # priority queue for v in G.nodes(): if v not in S: s.add_task(v, 0) # initialize spread value # [priority, count, task] = s.entry_finder[v] for j in range( R ): # run R times Random Cascade The gain of parallelizing isn't a lot as the one runIC is not very complex maybe for huge graphs [priority, count, task] = s.entry_finder[v] s.add_task(v, priority - float(len(runIC(G, S + [v], p))) / R) # add normalized spread value task, priority = s.pop_item() print(task, priority) S.append(task) # print(i, k, time.time() - start) return S
def NewDiscount(G, k, p): S = [] dd = PQ() # degree discount t = dict() # number of adjacent vertices that are in S d = dict() # degree of each vertex # initialize degree discount for u in G.degree(): d[u] = sum([G[u][v]['weight'] for v in G[u]]) # each edge adds degree 1 d[u] = len(G[u]) # each neighbor adds degree 1 dd.add_task(u, -d[u]) # add degree of each node t[u] = 0 # add vertices to S greedily for i in range(k): u, priority = dd.pop_item( ) # extract node with maximal degree discount S.append(u) for v in G[u]: if v not in S: t[v] += G[u][v][ 'weight'] # increase number of selected neighbors priority = d[v] - 2 * t[v] - ( d[v] - t[v]) * t[v] * p[u, v] # discount of degree dd.add_task(v, -priority) return S
def GDD(G, k, Ep): ''' Finds initial set of nodes to propagate in Independent Cascade model (with priority queue) Input: G -- networkx graph object k -- number of nodes needed Ep -- propagation probabilities Output: S -- chosen k nodes ''' S = [] dd = PQ() # degree discount active = dict() inactive = dict() # initialize degree discount for u in G: active[u] = 1 # inactive[u] = sum([Ep[(u,v)]*G[u][v]['weight'] for v in G[u]]) inactive[u] = sum( [1 - (1 - Ep[(u, v)])**G[u][v]["weight"] for v in G[u]]) priority = active[u] * (1 + inactive[u]) dd.add_task(u, -priority) # add degree of each node # add vertices to S greedily for i in range(k): u, priority = dd.pop_item( ) # extract node with maximal degree discount S.append(u) for v in G[u]: if v not in S: active[v] *= (1 - Ep[(u, v)])**G[u][v]['weight'] inactive[v] -= 1 - (1 - Ep[(u, v)])**G[u][v]['weight'] priority = active[v] * (1 + inactive[v]) dd.add_task(v, -priority) return S
def distanceHeuristic(G, S): S_dist = PQ() # distances from each node in G to set S according to metric edgeProb={} for u in G.nodes(): edgeProb[u]=_sumDist(G, S, u); return edgeProb
def stopDegreeDiscount(G, tsize, ic_step=1, p=.01, iterations=200): ''' Finds initial set of nodes to propagate in Independent Cascade model (with priority queue) Input: G -- networkx graph object tsize -- number of nodes necessary to reach ic_step -- step of change in k between 2 iterations of IC p -- propagation probability Output: S -- seed set Tspread -- spread values for different sizes of seed set ''' S = [] dd = PQ() # degree discount t = dict() # number of adjacent vertices that are in S d = dict() # degree of each vertex # initialize degree discount for u in G.nodes(): d[u] = sum([G[u][v]['weight'] for v in G[u]]) # each edge adds degree 1 # d[u] = len(G[u]) # each neighbor adds degree 1 dd.add_task(u, -d[u]) # add degree of each node t[u] = 0 # add vertices to S greedily # until necessary number of nodes can be reached Tspread = dict() # spread for different k k = 0 Tspread[k] = 0 stepk = 1 while Tspread[k] < tsize: u, priority = dd.pop_item( ) # extract node with maximal degree discount S.append(u) for v in G[u]: if v not in S: t[v] += G[u][v][ 'weight'] # increase number of selected neighbors priority = d[v] - 2 * t[v] - ( d[v] - t[v]) * t[v] * p # discount of degree dd.add_task(v, -priority) # calculate IC spread with ic_step if stepk == ic_step: k = len(S) Tspread[k] = avgSize(G, S, p, iterations) print k, Tspread[k] stepk = 0 stepk += 1 # search precise boundary if abs(int(math.ceil(float(ic_step) / 2))) == 1: return S, Tspread else: return binarySearchBoundary(G, k, Tspread, tsize, ic_step, p, iterations)
def degreeHeuristicSeed(G, k): S = [] d = PQ() for u in G: degree = sum([1 for v in G[u] if G[u][v]['weight']]) # degree = len(G[u]) d.add_task(u, -degree) for i in range(k): u, priority = d.pop_item() S.append(u) return S
def read(G): # id to name & name to id dictionary id_to_name = dict() name_to_id = dict() # distance dictionary distance = dict() # predecessor dictionary predecessor = dict() # priority queue pq = PQ() # read nodes from movie_nodes.txt with open('movie_nodes.txt') as fn: # read data rows from file rows = fn.readlines() # for each row for row in rows: # string token tokens = row.strip('\n').split('\t') # dictionary for id -> name id_to_name[tokens[0]] = tokens[1] # dictionary for name -> id name_to_id[tokens[1]] = tokens[0] # graph add node G.add_node(tokens[0]) # initialize all nodes distance distance[tokens[0]] = MAXINT # initialize priority queue pq.add_task(tokens[0], MAXINT) # initialize all nodes predecessor predecessor[tokens[0]] = None # close file fn.close() # read edges from movie_edgesw.txt with open('movie_edgesw.txt') as fn: # read data rows from file rows = fn.readlines() # for each row for row in rows: # string token tokens = row.strip('\n').split('\t') # graph add edges G.add_edge(tokens[0], tokens[1], weight=float(tokens[2])) # close file fn.close() return id_to_name, name_to_id, distance, predecessor, pq, G
def farthestNodes(k, G, m=1): S=[] S_dist=PQ() for v in G.nodes(): if v not in S: if m ==1: S_dist.add_task(v, cumulativeSum(G, S, v)) while len(S)<k: u, priority= S_dist.pop_item() S.append(u) for v in G[u].keys(): if v not in S: [priority, count, task] = S_dist.entry_finder[v] if m == 1: S_dist.add_task(v, priority-1)
def generalGreedy(G, k, edgeProb, flag='N'): import time start = time.time() R = 5 S = [] for i in range(k): s = PQ() for v in G.nodes(): if v not in S: s.add_task(v, 0) for j in range(R): [priority, count, task] = s.entry_finder[v] s.add_task(v, priority - float(len(runIC(G, S + [v], edgeProb, flag))) / R) # add normalized spread value task, priority = s.pop_item() S.append(task) return S
def degreeHeuristic(G, k, p=.01): ''' Finds initial set of nodes to propagate in Independent Cascade model (with priority queue) Input: G -- networkx graph object k -- number of nodes needed p -- propagation probability Output: S -- chosen k nodes ''' S = [] d = PQ() for u in G: degree = sum([G[u][v]['weight'] for v in G[u]]) # degree = len(G[u]) d.add_task(u, -degree) for i in range(k): u, priority = d.pop_item() S.append(u) return S
def FIND_LDAG(G, v, t, Ew): ''' Compute local DAG for vertex v. Reference: W. Chen "Scalable Influence Maximization in Social Networks under LT model" Algorithm 3 INPUT: G -- networkx DiGraph object v -- vertex of G t -- parameter theta Ew -- influence weights of G NOTE: Since graph G can have multiple edges between u and v, total influence weight between u and v will be number of edges times influence weight of one edge. OUTPUT: D -- networkx DiGraph object that is also LDAG ''' # intialize Influence of nodes Inf = PQ() Inf.add_task(v, -1) x, priority = Inf.pop_item() M = -priority X = [x] D = nx.DiGraph() while M >= t: out_edges = G.out_edges([x], data=True) for (v1, v2, edata) in out_edges: if v2 in X: D.add_edge(v1, v2, edata) in_edges = G.in_edges([x]) for (u, _) in in_edges: if u not in X: try: [pr, _, _] = Inf.entry_finder[u] except KeyError: pr = 0 Inf.add_task(u, pr - G[u][x]['weight'] * Ew[(u, x)] * M) try: x, priority = Inf.pop_item() except KeyError: return D M = -priority X.append(x) return D
def getScores(G, Ep): '''Finds scores for GDD. Scores are degree for each node. ''' scores = PQ() # degree discount active = dict() inactive = dict() # initialize degree discount for u in G: active[u] = 1 # inactive[u] = sum([Ep[(u,v)]*G[u][v]['weight'] for v in G[u]]) inactive[u] = sum( [1 - (1 - Ep[(u, v)])**G[u][v]["weight"] for v in G[u]]) priority = active[u] * (1 + inactive[u]) scores.add_task(u, -priority) # add degree of each node return scores, active, inactive
def degreeDiscountStar(G, k, p=.01): S = [] scores = PQ() d = dict() t = dict() for u in G: d[u] = sum([G[u][v]['weight'] for v in G[u]]) t[u] = 0 score = -((1 - p)**t[u]) * (1 + (d[u] - t[u]) * p) scores.add_task(u, ) for iteration in range(k): u, priority = scores.pop_item() print(iteration, -priority) S.append(u) for v in G[u]: if v not in S: t[v] += G[u][v]['weight'] score = -((1 - p)**t[u]) * (1 + (d[u] - t[u]) * p) scores.add_task(v, score) return S
def singleDiscount(G, k, p=.1): ''' Finds initial set of nodes to propagate in Independent Cascade model (with priority queue) Input: G -- networkx graph object k -- number of nodes needed p -- propagation probability Output: S -- chosen k nodes ''' S = [] # set of activated nodes d = PQ() # degrees for u in G: degree = sum([G[u][v]['weight'] for v in G[u]]) d.add_task(u, -degree) for i in range(k): u, priority = d.pop_item() S.append(u) for v in G[u]: if v not in S: [priority, count, task] = d.entry_finder[v] d.add_task(v, priority + G[u][v]['weight'] ) # discount degree by the weight of the edge return S
def generalGreedy_parallel_inf(G, k, p=.01): ''' Finds initial seed set S using general greedy heuristic Input: G -- networkx Graph object k -- number of initial nodes needed p -- propagation probability Output: S -- initial set of k nodes to propagate parallel computation of influence of the node, but, probably, since the computation is not that complex ''' # import time # start = time.time() # define map function # CC_parallel(G, seed_size, .01) # results = []#np.asarray([]) R = 500 # number of times to run Random Cascade S = [] # set of selected nodes # add node to S if achieves maximum propagation for current chosen + this node for i in range(k): s = PQ() # priority queue for v in G.nodes(): if v not in S: s.add_task(v, 0) # initialize spread value [priority, count, task] = s.entry_finder[v] pool = multiprocessing.Pool(multiprocessing.cpu_count() / 2) results = pool.map(map_IC, [(G, S + [v], p)] * R) pool.close() pool.join() s.add_task(v, priority - float(np.sum(results)) / R) # for j in range(R): # run R times Random Cascade # [priority, count, task] = s.entry_finder[v] # s.add_task(v, priority - float(len(runIC(G, S + [v], p)))/R) # add normalized spread value task, priority = s.pop_item() S.append(task) # print(i, k, time.time() - start) return S
def spreadNewGreedyIC(G, targeted_size, step=1, p=.01, S0=[], iterations=200): ''' Finds initial set of nodes to propagate in Independent Cascade. Input: G -- networkx graph object k -- number of nodes needed p -- propagation probability Output: S -- set of k nodes chosen TODO: add step functionality ''' import time start = time.time() assert type( S0) == list, "S0 must be a list. %s provided instead" % type(S0) S = S0 # set of selected nodes tsize = 0 R = iterations for i in range(R): T = runIC(G, S, p) tsize += float(len(T)) / R while tsize <= targeted_size: s = PQ( ) # number of additional nodes each remained mode will bring to the set S in R iterations Rv = dict() # number of reachable nodes for node v # initialize values of s for v in G.nodes(): if v not in S: s.add_task(v, 0) # calculate potential additional spread for each vertex not in S prg_idx = 1 idx = 1 prcnt = .1 # for progress to print R = iterations # number of iterations to run RanCas for j in range(R): # create new pruned graph E E = deepcopy(G) edge_rem = [] # edges to remove for (u, v) in E.edges(): w = G[u][v]['weight'] if random() < 1 - (1 - p)**w: edge_rem.append((u, v)) E.remove_edges_from(edge_rem) # find reachable vertices from S Rs = bfs(E, S) # find additional nodes each vertex would bring to the set S for v in G.nodes(): if v not in S + Rs: # if node has not chosen in S and has chosen by spread from S [priority, c, task] = s.entry_finder[v] s.add_task(v, priority - float(len(bfs(E, [v]))) / R) if idx == int(prg_idx * prcnt * R): print '%s%%...' % (int(prg_idx * prcnt * 100)) prg_idx += 1 idx += 1 # add vertex with maximum potential spread task, priority = s.pop_item() S.append(task) print i, len(S), task, -priority, time.time() - start tsize = 0 for j in range(R): T = runIC(G, S, p) tsize += float(len(T)) / R return S
def generalGreedy_node_set_cover(filename, G, budget, h_l=0, color='all', seed_size_budget=14, gamma_a=1e-2, gamma_b=0, type_algo=1): ''' Finds initial seed set S using general greedy heuristic Input: G -- networkx Graph object k -- fraction of population needs to be influenced in all three groups p -- propagation probability Output: S -- initial set of k nodes to propagate ''' # import time # start = time.time() # R = 200 # number of times to run Random Cascade stats = ut.graph_stats(G, print_stats=False) if type_algo == 1: filename = filename + '_set_cover_reach_' + str(budget) elif type_algo == 2: filename = filename + '_set_cover_timings_reach_{budget}_gamma_a_{gamma_a}_gamma_b_{gamma_b}_' elif type_algo == 3: filename = filename + '_set_cover_timings_reach_{budget}_gamma_a_{gamma_a}_gamma_b_{gamma_a}_' reach = 0.0 S = [] # set of selected nodes # add node to S if achieves maximum propagation for current chosen + this node influenced = [] influenced_r = [] influenced_b = [] influenced_n = [] seeds_r = [] seeds_b = [] seeds_n = [] # try: # # influenced, influenced_r, influenced_b, influenced_n, seeds_r, seeds_b, seeds_n = ut.read_files(filename) # reach = min(influenced_r[-1] / stats['group_r'], budget) + min(influenced_b[-1] / stats['group_b'])+ min(influenced_n[-1] / stats['group_r'], budget) # S = seeds_r[-1] + seeds_b[-1]+ seeds_n[-1] # if reach >= budget: # # ut.write_files(filename,influenced, influenced_a, influenced_b, seeds_a, seeds_b) # print(influenced_r) # print("\n\n") # print(influenced_b) # print("\n\n") # print(influenced_n) # print(f" reach: {reach}") # ut.plot_influence(influenced_r, influenced_b, influenced_n, len(S), filename, stats['group_a'], stats['group_b'], stats['group_c'], # [len(S_a) for S_a in seeds_r], [len(S_b) for S_b in seeds_b], [len(S_c) for S_c in seeds_n]) # return (influenced, influenced_r, influenced_b, influenced_n, seeds_r, seeds_b, seeds_n) # # except FileNotFoundError: # print(f'{filename} not Found ') i = 0 S = [] while reach < 3 * budget: # while len(S) < seed_size_budget: # cannot parallellize pool = multiprocessing.Pool(multiprocessing.cpu_count() - 1) # pool = multiprocessing.Pool(1) # for v in G.nodes(): # results = pool.map(map_select_next_seed_set_cover, (G, S, v)) if type_algo == 1: # results = pool.map(map_select_next_seed_set_cover, ((G, S, v) for v in G.nodes())) # results = pool.starmap(map_select_next_seed_set_cover, zip(repeat(G), repeat(S), list(G.nodes()),repeat(h_l), repeat(color))) results = pool.map(map_select_next_seed_set_cover, ((G, S, v, h_l, color) for v in G.nodes())) elif type_algo == 2: results = pool.map(map_IC_timing, ((G, S, v, gamma_a, gamma_b) for v in G.nodes())) elif type_algo == 3: results = pool.map(map_IC_timing, ((G, S, v, gamma_a, gamma_a) for v in G.nodes())) pool.close() pool.join() s = PQ() # priority queue for v, p, p_a, p_b, p_c in results: # # s.add_task(v, -(min(p_a / stats['group_r'], budget) + min(p_b / stats['group_b'], budget))) s.add_task( v, -(min(p_a / stats['group_r'], budget) + min(p_b / stats['group_b'], budget) + min(p_b / stats['group_n'], budget))) node, priority = s.pop_item() # priority = -priority # as the current priority is negative fraction S.append(node) # results = map_select_next_seed_set_cover, ((G, S, v) for v in G.nodes()) I, I_a, I_b, I_c = map_fair_IC((G, S, h_l)) influenced.append(I) influenced_r.append(I_a) influenced_b.append(I_b) influenced_n.append(I_c) S_red = [] S_blue = [] S_purple = [] group = G.nodes[node]['color'] for n in S: if G.nodes[n]['color'] == 'red': S_red.append(n) elif G.nodes[n]['color'] == 'blue': S_blue.append(n) else: S_purple.append(n) seeds_r.append( S_red) # id's of the seeds so the influence can be recreated seeds_b.append(S_blue) seeds_n.append(S_purple) # reach += -priority both are fine reach_a = I_a / stats['group_r'] reach_b = I_b / stats['group_b'] reach_c = I_c / stats['group_n'] reach = (min(reach_a, budget) + min(reach_b, budget) + min(reach_c, budget)) print( str(i + 1) + ' Node ID ' + str(node) + ' group ' + str(group) + ' Ia = ' + str(I_a) + ' Ib ' + str(I_b) + ' Ic ' + str(I_c) + ' each: ' + str(reach) + ' reach_a ' + str(reach_a) + ' reach_b ' + str(reach_b) + ' reach_c ' + str(reach_c)) # print(i, k, time.time() - start) i += 1 # ut.plot_influence(influenced_r, influenced_b, influenced_n, len(S), filename, stats['group_r'], stats['group_b'], stats['group_n'], # [len(S_r) for S_r in seeds_r], [len(S_b) for S_b in seeds_b], [len(S_n) for S_n in seeds_n]) # ut.plot_influence_diff(influenced_r, influenced_b, influenced_n, len(S), ['Rep','Dem','Neut'], filename, # stats['group_r'], stats['group_b'], stats['group_n']) ut.write_files(filename, influenced, influenced_r, influenced_b, influenced_n, seeds_r, seeds_b, seeds_n) return (influenced, influenced_r, influenced_b, influenced_n, seeds_r, seeds_b, seeds_n)
def generalGreedy_node_parallel(filename, G, budget, h_l, gamma1, gamma2, beta1=1.0, beta2=1.0, type_algo=1): ''' Finds initial seed set S using general greedy heuristic Input: G -- networkx Graph object k -- number of initial nodes needed p -- propagation probability Output: S -- initial set of k nodes to propagate ''' # import time # start = time.time() # R = 200 # number of times to run Random Cascade S = [] # set of selected nodes influenced = [] influenced_a = [] influenced_b = [] influenced_c = [] seeds_a = [] seeds_b = [] seeds_c = [] seed_range = [] if type_algo == 1: filename = filename + '_greedy_' elif type_algo == 2: filename = filename + '_log_gamma_{gamma1,gamma2}_' elif type_algo == 3: filename = filename + '_root_gamma_{gamma1}_beta_{beta1,beta2}_' elif type_algo == 4: filename = filename + '_root_majority_gamma_{gamma1}_beta_{beta1,beta2}_' stats = ut.graph_stats(G, print_stats=False) try: influenced, influenced_a, influenced_b, influenced_c, seeds_a, seeds_b, seeds_c = ut.read_files( filename) S = seeds_a[-1] + seeds_b[-1] + seeds_c[-1] if len(S) >= budget: # ut.write_files(filename,influenced, influenced_a, influenced_b, seeds_a, seeds_b) print(influenced_a) print("\n\n") print(influenced_b) print("\n\n") print(influenced_c) print(" Seed length ", len(S)) ut.plot_influence(influenced_a, influenced_b, influenced_c, len(S), filename, stats['group_a'], stats['group_b'], stats['group_c'], [len(S_a) for S_a in seeds_a], [len(S_b) for S_b in seeds_b], [len(S_c) for S_c in seeds_c]) return (influenced, influenced_a, influenced_b, influenced_c, seeds_a, seeds_b, seeds_c) else: seed_range = range(budget - len(S)) except FileNotFoundError: print('{filename} not Found ') seed_range = range(budget) # add node to S if achieves maximum propagation for current chosen + this node for i in seed_range: # cannot parallellize pool = multiprocessing.Pool(multiprocessing.cpu_count()) # results = None if type_algo == 1: results = pool.starmap( map_select_next_seed_set_cover, zip(repeat(G), repeat(S), list(G.nodes()), repeat(h_l))) # results = pool.map(map_select_next_seed_greedy, ((G, S, v,h_l) for v in G.nodes())) elif type_algo == 2: results = pool.map(map_select_next_seed_log_greedy, ((G, S, v, gamma1, gamma2) for v in G.nodes())) elif type_algo == 3: results = pool.map(map_select_next_seed_root_greedy, ((G, S, v, gamma1, beta1, beta2) for v in G.nodes())) elif type_algo == 4: results = pool.map(map_select_next_seed_root_majority_greedy, ((G, S, v, gamma1) for v in G.nodes())) pool.close() pool.join() s = PQ() # priority queue # if results == None: for v, priority, p_a, p_b, p_c in results: # run R times Random Cascade The gain of parallelizing isn't a lot as the one runIC is not very complex maybe for huge graphs s.add_task(v, -priority) node, priority = s.pop_item() S.append(node) I, I_a, I_b, I_c = map_fair_IC((G, S, h_l)) influenced.append(I) influenced_a.append(I_a) influenced_b.append(I_b) influenced_c.append(I_c) S_red = [] S_blue = [] S_purple = [] group = G.nodes[node]['color'] print( str(i + 1) + ' Selected Node is ' + str(node) + ' group ' + str(group) + ' Ia = ' + str(I_a) + ' Ib = ' + str(I_b) + ' Ic = ' + str(I_c)) for n in S: if G.nodes[n]['color'] == 'red': S_red.append(n) if G.nodes[n]['color'] == 'blue': S_blue.append(n) else: S_purple.append(n) seeds_a.append( S_red) # id's of the seeds so the influence can be recreated seeds_b.append(S_blue) seeds_c.append(S_purple) # print(i, k, time.time() - start) # print ( "\n \n I shouldn't be here. ********* \n \n ") ut.plot_influence(influenced_a, influenced_b, influenced_c, len(S), filename, stats['group_r'], stats['group_b'], stats['group_n'], [len(S_a) for S_a in seeds_a], [len(S_b) for S_b in seeds_b], [len(S_c) for S_c in seeds_c]) ut.write_files(filename, influenced, influenced_a, influenced_b, influenced_c, seeds_a, seeds_b, seeds_c) return (influenced, influenced_a, influenced_b, influenced_c, seeds_a, seeds_b, seeds_c)
def generalGreedy_node_parallel(filename, G, budget, gamma, beta=1.0, type_algo=1, G_greedy=None): ''' Finds initial seed set S using general greedy heuristic Input: G -- networkx Graph object k -- number of initial nodes needed p -- propagation probability Output: S -- initial set of k nodes to propagate ''' if G_greedy is None: G_greedy = G # import time # start = time.time() # R = 200 # number of times to run Random Cascade S = [] # set of selected nodes influenced = [] influenced_grouped = [] seeds = [] seed_range = [] if type_algo == 1: filename = filename + f'_greedy_' elif type_algo == 2: filename = filename + f'_log_gamma_{gamma}_' elif type_algo == 3: filename = filename + f'_root_gamma_{gamma}_beta_{beta}_' elif type_algo == 4: filename = filename + f'_root_majority_gamma_{gamma}_beta_{beta}_' # stats = ut.graph_stats(G, print_stats=False) try: influenced, influenced_a, influenced_b, seeds_a, seeds_b = ut.read_files( filename) raise Exception('It was supposed not to be reached.') S = seeds_a[-1] + seeds_b[-1] if len(S) >= budget: # ut.write_files(filename,influenced, influenced_a, influenced_b, seeds_a, seeds_b) print(influenced_a) print("\n\n") print(influenced_b) print(" Seed length ", len(S)) ut.plot_influence(influenced_a, influenced_b, len(S), filename, stats['group_a'], stats['group_b'], [len(S_a) for S_a in seeds_a], [len(S_b) for S_b in seeds_b]) return (influenced, influenced_a, influenced_b, seeds_a, seeds_b) else: seed_range = range(budget - len(S)) except FileNotFoundError: print(f'{filename} not Found ') seed_range = range(budget) # add node to S if achieves maximum propagation for current chosen + this node for i in seed_range: # cannot parallellize print('--------', i) pool = multiprocessing.Pool(multiprocessing.cpu_count()) # results = None if type_algo == 1: results = pool.map(map_select_next_seed_greedy, ((G_greedy, S, v) for v in G_greedy.nodes())) elif type_algo == 2: results = pool.map(map_select_next_seed_log_greedy, ((G_greedy, S, v, gamma) for v in G_greedy.nodes())) elif type_algo == 3: results = pool.map(map_select_next_seed_root_greedy, ((G_greedy, S, v, gamma, beta) for v in G_greedy.nodes())) elif type_algo == 4: results = pool.map(map_select_next_seed_root_majority_greedy, ((G_greedy, S, v, gamma) for v in G_greedy.nodes())) pool.close() pool.join() s = PQ() # priority queue # if results == None: for v, priority in results: # run R times Random Cascade The gain of parallelizing isn't a lot as the one runIC is not very complex maybe for huge graphs s.add_task(v, priority) node, priority = s.pop_item() S.append(node) I, I_grouped = map_fair_IC((G, S)) influenced.append(I) influenced_grouped.append(I_grouped) group = G.nodes[node]['color'] print( f'{i + 1} Selected Node is {node} group {group} I_grouped = {I_grouped}' ) S_g = { c: [] for c in np.unique([G.nodes[v]['color'] for v in G.nodes]) } for n in S: c = G.nodes[n]['color'] S_g[c].append(n) seeds.append( S_g) # id's of the seeds so the influence can be recreated # print(i, k, time.time() - start) # print ( "\n \n I shouldn't be here. ********* \n \n ") # ut.plot_influence(influenced_a, influenced_b, len(S), filename, stats['group_a'], stats['group_b'], # [len(S_a) for S_a in seeds_a], [len(S_b) for S_b in seeds_b]) ut.write_files(filename, influenced, influenced_grouped, seeds) return (influenced, influenced_grouped, seeds)
def generalGreedy_node_set_cover(filename, G, budget, gamma_a=1e-2, gamma_b=0, type_algo=1): ''' Finds initial seed set S using general greedy heuristic Input: G -- networkx Graph object k -- fraction of population needs to be influenced in both groups p -- propagation probability Output: S -- initial set of k nodes to propagate ''' #import time #start = time.time() #R = 200 # number of times to run Random Cascade stats = ut.graph_stats(G, print_stats=False) if type_algo == 1: filename = filename + f'_set_cover_reach_{budget}_' elif type_algo == 2: filename = filename + f'_set_cover_timings_reach_{budget}_gamma_a_{gamma_a}_gamma_b_{gamma_b}_' elif type_algo == 3: filename = filename + f'_set_cover_timings_reach_{budget}_gamma_a_{gamma_a}_gamma_b_{gamma_a}_' reach = 0.0 S = [] # set of selected nodes # add node to S if achieves maximum propagation for current chosen + this node influenced = [] influenced_a = [] influenced_b = [] seeds_a = [] seeds_b = [] try: influenced, influenced_a, influenced_b, seeds_a, seeds_b = ut.read_files( filename) reach = min(influenced_a[-1] / stats['group_a'], budget) + min( influenced_b[-1] / stats['group_b'], budget) S = seeds_a[-1] + seeds_b[-1] if reach >= budget: #ut.write_files(filename,influenced, influenced_a, influenced_b, seeds_a, seeds_b) print(influenced_a) print("\n\n") print(influenced_b) print(f" reach: {reach}") ut.plot_influence(influenced_a, influenced_b, len(S), filename, stats['group_a'], stats['group_b'], [len(S_a) for S_a in seeds_a], [len(S_b) for S_b in seeds_b]) return (influenced, influenced_a, influenced_b, seeds_a, seeds_b) except FileNotFoundError: print(f'{filename} not Found ') i = 0 while reach < 2 * budget: # cannot parallellize pool = multiprocessing.Pool(multiprocessing.cpu_count() - 1) if type_algo == 1: results = pool.map(map_select_next_seed_set_cover, ((G, S, v) for v in G.nodes())) elif type_algo == 2: results = pool.map(map_IC_timing, ((G, S, v, gamma_a, gamma_b) for v in G.nodes())) elif type_algo == 3: results = pool.map(map_IC_timing, ((G, S, v, gamma_a, gamma_a) for v in G.nodes())) pool.close() pool.join() s = PQ() # priority queue for v, p, p_a, p_b in results: # s.add_task( v, -(min(p_a / stats['group_a'], budget) + min(p_b / stats['group_b'], budget))) node, priority = s.pop_item() #priority = -priority # as the current priority is negative fraction S.append(node) I, I_a, I_b = map_fair_IC((G, S)) influenced.append(I) influenced_a.append(I_a) influenced_b.append(I_b) S_red = [] S_blue = [] group = G.nodes[node]['color'] for n in S: if G.nodes[n]['color'] == 'red': S_red.append(n) else: S_blue.append(n) seeds_a.append( S_red) # id's of the seeds so the influence can be recreated seeds_b.append(S_blue) #reach += -priority both are fine reach_a = I_a / stats['group_a'] reach_b = I_b / stats['group_b'] reach = (min(reach_a, budget) + min(reach_b, budget)) print( f'{i+1} Node ID {node} group {group} Ia = {I_a} Ib {I_b} reach: {reach} reach_a {reach_a} reach_b {reach_b}' ) #print(i, k, time.time() - start) i += 1 ut.plot_influence(influenced_a, influenced_b, len(S), filename, stats['group_a'], stats['group_b'], [len(S_a) for S_a in seeds_a], [len(S_b) for S_b in seeds_b]) ut.write_files(filename, influenced, influenced_a, influenced_b, seeds_a, seeds_b) return (influenced, influenced_a, influenced_b, seeds_a, seeds_b)
def LDAG_heuristic(G, Ew, k, t): ''' LDAG algorithm for seed selection. Reference: [1] Algorithm 5 Input: G -- directed graph (nx.DiGraph) Ew -- inlfuence weights of edges (eg. uniform, random) (dict) k -- size of seed set (int) t -- parameter theta for finding LDAG (0 <= t <= 1; typical value: 1/320) (int) Output: S -- seed set (list) ''' # define variables S = [] IncInf = PQ() for node in G: IncInf.add_task(node, 0) # IncInf = dict(zip(G.nodes(), [0]*len(G))) # in case of usage dict instead of PQ LDAGs = dict() InfSet = dict() ap = dict() A = dict() print 'Initialization phase' for v in G: LDAGs[v] = FIND_LDAG(G, v, t, Ew) # update influence set for each node in LDAGs[v] with its root for u in LDAGs[v]: InfSet.setdefault(u, []).append(v) alpha = computeAlpha(LDAGs[v], Ew, S, v) A.update(alpha) # add new linear coefficients to A # update incremental influence of all nodes in LDAGs[v] with alphas for u in LDAGs[v]: ap[( v, u )] = 0 # additionally set initial activation probability (line 7) priority, _, _ = IncInf.entry_finder[ u] # find previous value of IncInf IncInf.add_task(u, priority - A[(v, u)]) # and add alpha # IncInf[u] += A[(v, u)] # in case of using dict instead of PQ print 'Main loop' for it in range(k): s, priority = IncInf.pop_item( ) # chose node with biggest incremental influence print it + 1, s, -priority for v in InfSet[s]: # for all nodes that s can influence if v not in S: D = LDAGs[v] # update alpha_v_u for all u that can reach s in D (lines 17-22) alpha_v_s = A[(v, s)] dA = computeAlpha(D, Ew, S, s, val=-alpha_v_s) for (s, u) in dA: if u not in S + [ s ]: # don't update IncInf if it's already in S A[(v, u)] += dA[(s, u)] priority, _, _ = IncInf.entry_finder[ u] # find previous value of incremental influence of u IncInf.add_task( u, priority - dA[(s, u)] * (1 - ap[(v, u)])) # and update it accordingly # update ap_v_u for all u reachable from s in D (liens 23-28) dap = computeActProb(D, Ew, S + [s], s, val=1 - ap[(v, s)]) for (s, u) in dap: if u not in S + [s]: ap[(v, u)] += dap[(s, u)] priority, _, _ = IncInf.entry_finder[ u] # find previous value of incremental influence of u IncInf.add_task( u, priority + A[(v, u)] * dap[(s, u)]) # and update it accordingly S.append(s) return S
def representativeNodes(G, k, metric=1): ''' Finds the most distinguishable (representative) nodes in graph G greedily. Takes the most furthest node to the already chosen nodes at each step. Input: G -- networkx object graph with weighted edges k -- number of nodes needed metric -- parameter for differentiating representative qualities metric == 1 trying to maximize total distance in the chosen set of k nodes metric == 2 trying to maximize minimal distance between a pair of k nodes Output: S -- chosen k nodes objv -- objective value according to the chosen metric and set of nodes ''' S = [] # set of chosen nodes S_dist = PQ() # distances from each node in G to set S according to metric # initialize S with furthest vertices try: u, v, d = max(G.edges(data=True), key=lambda (u, v, d): d['weight']) except KeyError: raise KeyError, 'Most likely you have no weight attribute' S.extend([u, v]) # compute distances from each node in G to S for v in G.nodes(): if v not in S: # calculate only for nodes in G if metric == 1: S_dist.add_task( v, -_sumDist(G, S, v) ) # take minus to pop the maximum value from priority queue elif metric == 2: S_dist.add_task( v, -_minDist(G, S, v) ) # take minus to pop the maximum value from priority queue # add new nodes to the set greedily while len(S) < k: u, priority = S_dist.pop_item( ) # find maximum value of distance to set S S.append(u) # append that node to S # only increase distance for nodes that are connected to u for v in G[u].keys(): if v not in S: # add only remained nodes [priority, count, task] = S_dist.entry_finder[ v] # finds distance for the previous step try: if metric == 1: S_dist.add_task( v, priority - G[u][v]['weight'] ) # adds distance to the new member of S elif metric == 2: S_dist.add_task(v, max(priority, -G[u][v]['weight']) ) # update min distance to the set S except: raise u, v, "These are vertices that caused the problem" # extract objective value of the chosen set if metric == 1: objv = 0 for u in S: objv += _sumDist(G, S, u) elif metric == 2: objv = float('Inf') for u in S: objv = min(objv, _minDist(G, S, u)) return S, objv