def binarySearchBoundary(G, k, Tsize, targeted_size, step, p, iterations): # initialization for binary search R = iterations stepk = -int(math.ceil(float(step)/2)) k += stepk if k not in Tsize: S = degreeDiscountIC(G, k, p) avg = 0 for i in range(R): T = runIC(G, S, p) avg += float(len(T))/R Tsize[k] = avg # check values of Tsize in between last 2 calculated steps while stepk != 1: print k, stepk, Tsize[k] if Tsize[k] >= targeted_size: stepk = -int(math.ceil(float(abs(stepk))/2)) else: stepk = int(math.ceil(float(abs(stepk))/2)) k += stepk if k not in Tsize: S = degreeDiscountIC(G, k, p) avg = 0 for i in range(R): T = runIC(G, S, p) avg += float(len(T))/R Tsize[k] = avg return S, Tsize
def binarySearchBoundary(G, k, Tsize, targeted_size, step, p, iterations): # initialization for binary search R = iterations stepk = -int(math.ceil(float(step) / 2)) k += stepk if k not in Tsize: S = degreeDiscountIC(G, k, p) avg = 0 for i in range(R): T = runIC(G, S, p) avg += float(len(T)) / R Tsize[k] = avg # check values of Tsize in between last 2 calculated steps while stepk != 1: print k, stepk, Tsize[k] if Tsize[k] >= targeted_size: stepk = -int(math.ceil(float(abs(stepk)) / 2)) else: stepk = int(math.ceil(float(abs(stepk)) / 2)) k += stepk if k not in Tsize: S = degreeDiscountIC(G, k, p) avg = 0 for i in range(R): T = runIC(G, S, p) avg += float(len(T)) / R Tsize[k] = avg return S, Tsize
def getDDData(G, maxk, p): data = dict() for i in range(1, maxk + 1): S = degreeDiscountIC(G, i, p) size = avgSize(G, S, p, 200) data[i] = size return data
def spreadDegreeDiscount(G, targeted_size, step=1, p=.01, iterations=200): ''' Finds initial set of nodes to propagate in Independent Cascade model (with priority queue) Input: G -- networkx graph object targeted_size -- desired size of targeted set step -- step after each to calculate spread p -- propagation probability R -- number of iterations to average influence spread Output: S -- seed set that achieves targeted_size Tsize -- averaged targeted size for different sizes of seed set ''' Tsize = dict() k = 0 Tsize[k] = 0 R = iterations while Tsize[k] <= targeted_size: k += step S = degreeDiscountIC(G, k, p) avg = 0 for i in range(R): T = runIC(G, S, p) avg += float(len(T))/R Tsize[k] = avg print k, Tsize[k] # binary search for optimal solution return binarySearchBoundary(G, k, Tsize, targeted_size, step, p, iterations)
def spreadDegreeDiscount(G, targeted_size, step=1, p=.01, iterations=200): ''' Finds initial set of nodes to propagate in Independent Cascade model (with priority queue) Input: G -- networkx graph object targeted_size -- desired size of targeted set step -- step after each to calculate spread p -- propagation probability R -- number of iterations to average influence spread Output: S -- seed set that achieves targeted_size Tsize -- averaged targeted size for different sizes of seed set ''' Tsize = dict() k = 0 Tsize[k] = 0 R = iterations while Tsize[k] <= targeted_size: k += step S = degreeDiscountIC(G, k, p) avg = 0 for i in range(R): T = runIC(G, S, p) avg += float(len(T)) / R Tsize[k] = avg print k, Tsize[k] # binary search for optimal solution return binarySearchBoundary(G, k, Tsize, targeted_size, step, p, iterations)
def spreadDegreeDiscount(G, targeted_size, step=1, p=.01, iterations=200, degreeDiscount_Heuristic='degreeDiscount'): ''' Finds initial set of nodes to propagate in Independent Cascade model Input: G -- networkx graph object targeted_size -- desired size of targeted set step -- step after each to calculate spread p -- propagation probability R -- number of iterations to average influence spread Output: S -- seed set that achieves targeted_size Tsize -- averaged targeted size for different sizes of seed set ''' # calculate the time of selecting the first initial nodes (afterwards we will select some of them) start_time = time.time() Tsize = dict() k = 0 Tsize[k] = 0 R = iterations while Tsize[k] <= targeted_size: k += step if (degreeDiscount_Heuristic == 'degreeDiscount'): S = degreeDiscountIC(G, k, p) else: S = degreeHeuristic(G, k, p) avg = 0 for i in range(R): T = runIC(G, S, p) avg += float(len(T))/R Tsize[k] = avg print(k, Tsize[k]) # binary search for optimal solution return binarySearchBoundary(G, k, Tsize, targeted_size, step, p, iterations, initial_time=time.time()-start_time)
G = nx.Graph() with open('graphdata/../graphdata/hep.txt') as f: n, m = f.readline().split() for line in f: u, v = map(int, line.split()) try: G[u][v]['weight'] += 1 except: G.add_edge(u, v, weight=1) # G.add_edge(u, v, weight=1) print 'Built graph G' print time.time() - start #calculate initial set seed_size = 10 S = degreeDiscountIC(G, seed_size) print 'Initial set of', seed_size, 'nodes chosen' print time.time() - start # write results S to file with open('visualisation.txt', 'w') as f: for node in S: f.write(str(node) + os.linesep) # calculate average activated set size iterations = 200 # number of iterations avg = 0 for i in range(iterations): T = runIC(G, S) avg += float(len(T)) / iterations # print i, 'iteration of IC'
def binarySearchBoundary(G, k, Tsize, targeted_size, step, p, iterations, degreeDiscount_Heuristic='degreeDiscount', initial_time=0): # Calculate the time it takes to select each node, initial_time is added in order keep track of the time needed for # the selection of initial nodes start_time = time.time() # keep a list for time needed to select each nodes timer_each_node = [] timer_each_node.append(initial_time) # initialization for binary search R = iterations stepk = -int(math.ceil(float(step)/2)) k += stepk if k not in Tsize: if (degreeDiscount_Heuristic == 'degreeDiscount'): S = degreeDiscountIC(G, k, p) else: S = degreeHeuristic(G, k, p) avg = 0 for i in range(R): T = runIC(G, S, p) avg += float(len(T))/R timer_each_node.append(time.time() + initial_time - start_time) Tsize[k] = avg # check values of Tsize in between last 2 calculated steps while stepk != 1: print(k, stepk, Tsize[k]) if Tsize[k] >= targeted_size: stepk = -int(math.ceil(float(abs(stepk))/2)) else: stepk = int(math.ceil(float(abs(stepk))/2)) k += stepk if k not in Tsize: if (degreeDiscount_Heuristic == 'degreeDiscount'): S = degreeDiscountIC(G, k, p) else: S = degreeHeuristic(G, k, p) # stores the influence spread of each node influence_spread = [] avg = 0 for i in range(R): T = runIC(G, S, p) avg += float(len(T))/R influence_spread.append(avg) # keep time for each NEW node selected timer_each_node.append(time.time() + initial_time - start_time) Tsize[k] = avg print("datafaq: ", Tsize) print("datafaq[k]: ", Tsize[k]) print("leeeen: ", len(timer_each_node)) return S, influence_spread, timer_each_node
def binaryDegreeDiscount(G, tsize, p=.01, a=0.38, step=5, iterations=200): ''' Finds minimal number of nodes necessary to reach tsize number of nodes using degreeDiscount algorithms and binary search. Input: G -- networkx graph object tsize -- number of nodes necessary to reach p -- propagation probability a -- fraction of tsize to use as initial seed set size step -- step between iterations of binary search iterations -- number of iterations to average independent cascade Output: S -- seed set Tspread -- spread values for different sizes of seed set ''' Tspread = dict() # find initial total spread k0 = int(a * tsize) S = degreeDiscountIC(G, k0, p) t = avgSize(G, S, p, iterations) Tspread[k0] = t # find bound (lower or upper) of total spread k = k0 print k, step, Tspread[k] if t >= tsize: # find the value of k that doesn't spread influence up to tsize nodes step *= -1 while t >= tsize: # reduce step if necessary while k + step < 0: step = int(math.ceil(float(step) / 2)) k += step S = degreeDiscountIC(G, k, p) t = avgSize(G, S, p, iterations) Tspread[k] = t print k, step, Tspread[k] else: # find the value of k that spreads influence up to tsize nodes while t < tsize: k += step S = degreeDiscountIC(G, k, p) t = avgSize(G, S, p, iterations) Tspread[k] = t print k, step, Tspread[k] if Tspread[k] < Tspread[k - step]: k -= step step = abs(step) # search precise boundary stepk = step while abs(stepk) != 1: if Tspread[k] >= tsize: stepk = -int(math.ceil(float(abs(stepk)) / 2)) else: stepk = int(math.ceil(float(abs(stepk)) / 2)) k += stepk if k not in Tspread: S = degreeDiscountIC(G, k, p) Tspread[k] = avgSize(G, S, p, iterations) print k, stepk, Tspread[k] return S, Tspread
#S = randomHeuristic(G, seed_size, p=.05) #S = newGreedyICRev(G, seed_size, I, pi, pa, p=.05) #S = newGreedyIC(G, seed_size, p=.05) time1 = time.clock() - start iterations = 200 # number of iterations avg1 = 0 avg2 = 0 for i in range(iterations): T, In = runIC(G, S, I) avg1 += float(len(T)) / iterations avg2 += float(len(In)) / iterations pr_res = str(seed_size) l1 = len(S) rev1 = int(round(avg1)) + pi * int(round(avg2)) - c1 * l1 c2 = 1.2 #seed node cost=1.2 S = degreeDiscountIC(G, seed_size, p=.05) #S = Rev(G, seed_size, I, pi, pa, c2) avg1 = 0 avg2 = 0 for i in range(iterations): T, In = runIC(G, S, I) avg1 += float(len(T)) / iterations avg2 += float(len(In)) / iterations l2 = len(S) rev2 = int(round(avg1)) + pi * int(round(avg2)) - c2 * l2 pr_res += '\t' + str(round(rev1,3)) + '\t' + str(round(rev2,3)) + '\t' + str(l1) + '\t' + str(l2)+ '\t' + str(time1) + '\n' print(pr_res)
def binaryDegreeDiscount(G, tsize, p=0.01, a=0.38, step=5, iterations=200): """ Finds minimal number of nodes necessary to reach tsize number of nodes using degreeDiscount algorithms and binary search. Input: G -- networkx graph object tsize -- number of nodes necessary to reach p -- propagation probability a -- fraction of tsize to use as initial seed set size step -- step between iterations of binary search iterations -- number of iterations to average independent cascade Output: S -- seed set Tspread -- spread values for different sizes of seed set """ Tspread = dict() # find initial total spread k0 = int(a * tsize) S = degreeDiscountIC(G, k0, p) t = avgSize(G, S, p, iterations) Tspread[k0] = t # find bound (lower or upper) of total spread k = k0 print k, step, Tspread[k] if t >= tsize: # find the value of k that doesn't spread influence up to tsize nodes step *= -1 while t >= tsize: # reduce step if necessary while k + step < 0: step = int(math.ceil(float(step) / 2)) k += step S = degreeDiscountIC(G, k, p) t = avgSize(G, S, p, iterations) Tspread[k] = t print k, step, Tspread[k] else: # find the value of k that spreads influence up to tsize nodes while t < tsize: k += step S = degreeDiscountIC(G, k, p) t = avgSize(G, S, p, iterations) Tspread[k] = t print k, step, Tspread[k] if Tspread[k] < Tspread[k - step]: k -= step step = abs(step) # search precise boundary stepk = step while abs(stepk) != 1: if Tspread[k] >= tsize: stepk = -int(math.ceil(float(abs(stepk)) / 2)) else: stepk = int(math.ceil(float(abs(stepk)) / 2)) k += stepk if k not in Tspread: S = degreeDiscountIC(G, k, p) Tspread[k] = avgSize(G, S, p, iterations) print k, stepk, Tspread[k] return S, Tspread
G = nx.Graph() with open('graphdata/../graphdata/hep.txt') as f: n, m = f.readline().split() for line in f: u, v = map(int, line.split()) try: G[u][v]['weight'] += 1 except: G.add_edge(u,v, weight=1) # G.add_edge(u, v, weight=1) print 'Built graph G' print time.time() - start #calculate initial set seed_size = 10 S = degreeDiscountIC(G, seed_size) print 'Initial set of', seed_size, 'nodes chosen' print time.time() - start # write results S to file with open('visualisation.txt', 'w') as f: for node in S: f.write(str(node) + os.linesep) # calculate average activated set size iterations = 200 # number of iterations avg = 0 for i in range(iterations): T = runIC(G, S) avg += float(len(T))/iterations # print i, 'iteration of IC'
def binaryDegreeDiscount(G, tsize, p=.01, a=0.38, step=5, iterations=200, degreeDiscount_Heuristic='degreeDiscount'): ''' Finds minimal number of nodes necessary to reach tsize number of nodes using degreeDiscount algorithms and binary search. Input: G -- networkx graph object tsize -- number of nodes necessary to reach p -- propagation probability a -- fraction of tsize to use as initial seed set size step -- step between iterations of binary search iterations -- number of iterations to average independent cascade degreeDiscount_Heuristic -- whether to select degree Discount or degree Heuristic algorithm Output: S -- seed set Tspread -- spread values for different sizes of seed set ''' # Calculate the time it takes to select each node start_time = time.time() # keep a list for time needed to select each nodes timer_each_node = [] Tspread = dict() # find initial total spread k0 = int(a * tsize) if (degreeDiscount_Heuristic == 'degreeDiscount'): S = degreeDiscountIC(G, k0, p) else: S = degreeHeuristic(G, k0, p) t = avgSize(G, S, p, iterations) Tspread[k0] = t # find bound (lower or upper) of total spread k = k0 print(k, step, Tspread[k]) # keep time for each NEW node selected timer_each_node.append(time.time() - start_time) if t >= tsize: # find the value of k that doesn't spread influence up to tsize nodes step *= -1 while t >= tsize: # reduce step if necessary while k + step < 0: step = int(math.ceil(float(step) / 2)) k += step if (degreeDiscount_Heuristic == 'degreeDiscount'): S = degreeDiscountIC(G, k, p) else: S = degreeHeuristic(G, k, p) t = avgSize(G, S, p, iterations) Tspread[k] = t print(k, step, Tspread[k]) # keep time for each NEW node selected timer_each_node.append(time.time() - start_time) else: # find the value of k that spreads influence up to tsize nodes while t < tsize: k += step if (degreeDiscount_Heuristic == 'degreeDiscount'): S = degreeDiscountIC(G, k, p) else: S = degreeHeuristic(G, k, p) t = avgSize(G, S, p, iterations) Tspread[k] = t print(k, step, Tspread[k]) # keep time for each NEW node selected timer_each_node.append(time.time() - start_time) if Tspread[k] < Tspread[k - step]: k -= step step = abs(step) # search precise boundary stepk = step while abs(stepk) != 1: if Tspread[k] >= tsize: stepk = -int(math.ceil(float(abs(stepk)) / 2)) else: stepk = int(math.ceil(float(abs(stepk)) / 2)) k += stepk if k not in Tspread: if (degreeDiscount_Heuristic == 'degreeDiscount'): S = degreeDiscountIC(G, k, p) else: S = degreeHeuristic(G, k, p) Tspread[k] = avgSize(G, S, p, iterations) # keep time for each NEW node selected timer_each_node.append(time.time() - start_time) print(k, stepk, Tspread[k]) print("(number of nodes) - (spread) : ", Tspread) # ====================================================================================================================== # stores the influence spread of each node influence_spread = [] avg = 0 for i in range(iterations): T = runIC(G, S, p) avg += float(len(T)) / iterations influence_spread.append(avg) print("(spread) : ", influence_spread) return S, influence_spread, timer_each_node
try: G[u][v]['weight'] += 1 except: G.add_edge(u, v, weight=1) # G.add_edge(u, v, weight=1) print 'Built graph G' print time.time() - start seed_size = 5 p = .01 nodes = G.nodes() C = combinations(nodes, seed_size) spread = dict() for candidate in C: print candidate, time2spread = time.time() spread[candidate] = avgSize(G, list(candidate), p, 1000) print spread[candidate], time.time() - time2spread S, val = max(spread.iteritems(), key=lambda (dk, dv): dv) print 'S (by brute-force):', S, ' -->', val S2 = degreeDiscountIC(G, seed_size, p) print 'S (by degree discount):', tuple(S2), ' -->', avgSize(G, S2, p, 1000) print 'S (by degree discount) spreads to %s nodes (according to brute-force)' % ( spread[tuple(sorted(S2))]) print 'Total time:', time.time() - start console = []
u, v = map(int, line.split()) try: G[u][v]['weight'] += 1 except: G.add_edge(u,v, weight=1) # G.add_edge(u, v, weight=1) print 'Built graph G' print time.time() - start seed_size = 5 p = .01 nodes = G.nodes() C = combinations(nodes, seed_size) spread = dict() for candidate in C: print candidate, time2spread = time.time() spread[candidate] = avgSize(G, list(candidate), p, 1000) print spread[candidate], time.time() - time2spread S, val = max(spread.iteritems(), key = lambda (dk, dv): dv) print 'S (by brute-force):', S, ' -->', val S2 = degreeDiscountIC(G, seed_size, p) print 'S (by degree discount):', tuple(S2), ' -->', avgSize(G, S2, p, 1000) print 'S (by degree discount) spreads to %s nodes (according to brute-force)' %(spread[tuple(sorted(S2))]) print 'Total time:', time.time() - start console = []