def binarySearchBoundary(G, k, Tsize, targeted_size, step, p, iterations): # initialization for binary search R = iterations stepk = -int(math.ceil(float(step) / 2)) k += stepk if k not in Tsize: S = newGreedyIC(G, k, p) avg = 0 for i in range(R): T = runIC(G, S, p) avg += float(len(T)) / R Tsize[k] = avg # check values of Tsize in between last 2 calculated steps while stepk != 1: print k, stepk, Tsize[k] if Tsize[k] >= targeted_size: stepk = -int(math.ceil(float(abs(stepk)) / 2)) else: stepk = int(math.ceil(float(abs(stepk)) / 2)) k += stepk if k not in Tsize: S = (G, k, p) avg = 0 for i in range(R): T = runIC(G, S, p) avg += float(len(T)) / R Tsize[k] = avg return S, Tsize
def binarySearchBoundary(G, k, Tsize, targeted_size, step, p, iterations): # initialization for binary search R = iterations stepk = -int(math.ceil(float(step)/2)) k += stepk if k not in Tsize: S = degreeDiscountIC(G, k, p) avg = 0 for i in range(R): T = runIC(G, S, p) avg += float(len(T))/R Tsize[k] = avg # check values of Tsize in between last 2 calculated steps while stepk != 1: print k, stepk, Tsize[k] if Tsize[k] >= targeted_size: stepk = -int(math.ceil(float(abs(stepk))/2)) else: stepk = int(math.ceil(float(abs(stepk))/2)) k += stepk if k not in Tsize: S = degreeDiscountIC(G, k, p) avg = 0 for i in range(R): T = runIC(G, S, p) avg += float(len(T))/R Tsize[k] = avg return S, Tsize
def spreadDegreeDiscount(G, targeted_size, step=1, p=.01, iterations=200): ''' Finds initial set of nodes to propagate in Independent Cascade model (with priority queue) Input: G -- networkx graph object targeted_size -- desired size of targeted set step -- step after each to calculate spread p -- propagation probability R -- number of iterations to average influence spread Output: S -- seed set that achieves targeted_size Tsize -- averaged targeted size for different sizes of seed set ''' Tsize = dict() k = 0 Tsize[k] = 0 R = iterations while Tsize[k] <= targeted_size: k += step S = degreeDiscountIC(G, k, p) avg = 0 for i in range(R): T = runIC(G, S, p) avg += float(len(T))/R Tsize[k] = avg print k, Tsize[k] # binary search for optimal solution return binarySearchBoundary(G, k, Tsize, targeted_size, step, p, iterations)
def spreadDegreeDiscount(G, targeted_size, step=1, p=.01, iterations=200): ''' Finds initial set of nodes to propagate in Independent Cascade model (with priority queue) Input: G -- networkx graph object targeted_size -- desired size of targeted set step -- step after each to calculate spread p -- propagation probability R -- number of iterations to average influence spread Output: S -- seed set that achieves targeted_size Tsize -- averaged targeted size for different sizes of seed set ''' Tsize = dict() k = 0 Tsize[k] = 0 R = iterations while Tsize[k] <= targeted_size: k += step S = degreeDiscountIC(G, k, p) avg = 0 for i in range(R): T = runIC(G, S, p) avg += float(len(T)) / R Tsize[k] = avg print k, Tsize[k] # binary search for optimal solution return binarySearchBoundary(G, k, Tsize, targeted_size, step, p, iterations)
def generalGreedy(G, k, p=0.01): """ Finds initial seed set S using general greedy heuristic Input: G -- networkx Graph object k -- number of initial nodes needed p -- propagation probability Output: S -- initial set of k nodes to propagate """ import time start = time.time() R = 20 # number of times to run Random Cascade S = [] # set of selected nodes # add node to S if achieves maximum propagation for current chosen + this node for i in range(k): s = PQ() # priority queue for v in G.nodes(): if v not in S: s.add_task(v, 0) # initialize spread value for j in range(R): # run R times Random Cascade [priority, count, task] = s.entry_finder[v] s.add_task(v, priority - float(len(runIC(G, S + [v], p))) / R) # add normalized spread value task, priority = s.pop_item() S.append(task) print i, k, time.time() - start return S
def generalGreedy(G, k, p=.01): ''' Finds initial seed set S using general greedy heuristic Input: G -- networkx Graph object k -- number of initial nodes needed p -- propagation probability Output: S -- initial set of k nodes to propagate ''' import time start = time.time() R = 20 # number of times to run Random Cascade S = [] # set of selected nodes # add node to S if achieves maximum propagation for current chosen + this node for i in range(k): s = PQ() # priority queue for v in G.nodes(): if v not in S: s.add_task(v, 0) # initialize spread value for j in range(R): # run R times Random Cascade [priority, count, task] = s.entry_finder[v] s.add_task(v, priority - float(len(runIC(G, S + [v], p))) / R) # add normalized spread value task, priority = s.pop_item() S.append(task) print i, k, time.time() - start return S
start = time.time() # read in graph G = nx.Graph() with open('graphdata/../graphdata/hep.txt') as f: n, m = f.readline().split() for line in f: u, v = map(int, line.split()) try: G[u][v]['weight'] += 1 except: G.add_edge(u,v, weight=1) print 'Built graph G' print time.time() - start # # read in T # with open('lemma1.txt') as f: # T = [] # k = int(f.readline()) # for line in f: # T.append(int(line)) # print 'Read %s activated nodes' %k # print time.time() - start S = [131, 639, 287, 267, 608, 100, 559, 124, 359, 66] k = len(S) T = runIC(G,S) highdegreeS = highdegreeSet(G,T,k) console = []
def spreadNewGreedyIC(G, targeted_size, step=1, p=.01, S0=[], iterations=200): ''' Finds initial set of nodes to propagate in Independent Cascade. Input: G -- networkx graph object k -- number of nodes needed p -- propagation probability Output: S -- set of k nodes chosen TODO: add step functionality ''' import time start = time.time() assert type( S0) == list, "S0 must be a list. %s provided instead" % type(S0) S = S0 # set of selected nodes tsize = 0 R = iterations for i in range(R): T = runIC(G, S, p) tsize += float(len(T)) / R while tsize <= targeted_size: s = PQ( ) # number of additional nodes each remained mode will bring to the set S in R iterations Rv = dict() # number of reachable nodes for node v # initialize values of s for v in G.nodes(): if v not in S: s.add_task(v, 0) # calculate potential additional spread for each vertex not in S prg_idx = 1 idx = 1 prcnt = .1 # for progress to print R = iterations # number of iterations to run RanCas for j in range(R): # create new pruned graph E E = deepcopy(G) edge_rem = [] # edges to remove for (u, v) in E.edges(): w = G[u][v]['weight'] if random() < 1 - (1 - p)**w: edge_rem.append((u, v)) E.remove_edges_from(edge_rem) # find reachable vertices from S Rs = bfs(E, S) # find additional nodes each vertex would bring to the set S for v in G.nodes(): if v not in S + Rs: # if node has not chosen in S and has chosen by spread from S [priority, c, task] = s.entry_finder[v] s.add_task(v, priority - float(len(bfs(E, [v]))) / R) if idx == int(prg_idx * prcnt * R): print '%s%%...' % (int(prg_idx * prcnt * 100)) prg_idx += 1 idx += 1 # add vertex with maximum potential spread task, priority = s.pop_item() S.append(task) print i, len(S), task, -priority, time.time() - start tsize = 0 for j in range(R): T = runIC(G, S, p) tsize += float(len(T)) / R return S
start = time.time() # read in graph G = nx.Graph() with open('graphdata/../graphdata/hep.txt') as f: n, m = f.readline().split() for line in f: u, v = map(int, line.split()) try: G[u][v]['weight'] += 1 except: G.add_edge(u, v, weight=1) print 'Built graph G' print time.time() - start # # read in T # with open('lemma1.txt') as f: # T = [] # k = int(f.readline()) # for line in f: # T.append(int(line)) # print 'Read %s activated nodes' %k # print time.time() - start S = [131, 639, 287, 267, 608, 100, 559, 124, 359, 66] k = len(S) T = runIC(G, S) highdegreeS = highdegreeSet(G, T, k) console = []
def spreadNewGreedyIC(G, targeted_size, step=1, p=.01, S0=[], iterations = 200): ''' Finds initial set of nodes to propagate in Independent Cascade. Input: G -- networkx graph object k -- number of nodes needed p -- propagation probability Output: S -- set of k nodes chosen TODO: add step functionality ''' import time start = time.time() assert type(S0) == list, "S0 must be a list. %s provided instead" % type(S0) S = S0 # set of selected nodes tsize = 0 R = iterations for i in range(R): T = runIC(G, S, p) tsize += float(len(T))/R while tsize <= targeted_size: s = PQ() # number of additional nodes each remained mode will bring to the set S in R iterations Rv = dict() # number of reachable nodes for node v # initialize values of s for v in G.nodes(): if v not in S: s.add_task(v, 0) # calculate potential additional spread for each vertex not in S prg_idx = 1 idx = 1 prcnt = .1 # for progress to print R = iterations # number of iterations to run RanCas for j in range(R): # create new pruned graph E E = deepcopy(G) edge_rem = [] # edges to remove for (u,v) in E.edges(): w = G[u][v]['weight'] if random() < 1 - (1 - p)**w: edge_rem.append((u,v)) E.remove_edges_from(edge_rem) # find reachable vertices from S Rs = bfs(E, S) # find additional nodes each vertex would bring to the set S for v in G.nodes(): if v not in S + Rs: # if node has not chosen in S and has chosen by spread from S [priority, c, task] = s.entry_finder[v] s.add_task(v, priority - float(len(bfs(E, [v])))/R) if idx == int(prg_idx*prcnt*R): print '%s%%...' %(int(prg_idx*prcnt*100)) prg_idx += 1 idx += 1 # add vertex with maximum potential spread task, priority = s.pop_item() S.append(task) print i, len(S), task, -priority, time.time() - start tsize = 0 for j in range(R): T = runIC(G, S, p) tsize += float(len(T))/R return S