def generalGreedy(G, k, p=0.01):
    """ Finds initial seed set S using general greedy heuristic
    Input: G -- networkx Graph object
    k -- number of initial nodes needed
    p -- propagation probability
    Output: S -- initial set of k nodes to propagate
    """
    import time

    start = time.time()
    R = 20  # number of times to run Random Cascade
    S = []  # set of selected nodes
    # add node to S if achieves maximum propagation for current chosen + this node
    for i in range(k):
        s = PQ()  # priority queue
        for v in G.nodes():
            if v not in S:
                s.add_task(v, 0)  # initialize spread value
                for j in range(R):  # run R times Random Cascade
                    [priority, count, task] = s.entry_finder[v]
                    s.add_task(v, priority - float(len(runIC(G, S + [v], p))) / R)  # add normalized spread value
        task, priority = s.pop_item()
        S.append(task)
        print i, k, time.time() - start
    return S
Exemplo n.º 2
0
def generalGreedy(G, k, p=.01):
    ''' Finds initial seed set S using general greedy heuristic
    Input: G -- networkx Graph object
    k -- number of initial nodes needed
    p -- propagation probability  传播概率
    Output: S -- initial set of k nodes to propagate
    '''
    import time
    start = time.time()
    R = 20  # number of times to run Random Cascade
    S = []  # set of selected nodes
    # add node to S if achieves maximum propagation for current chosen + this node
    for i in range(k):
        s = PQ()  # priority queue
        for v in G.nodes():  # 遍历G中所有节点
            if v not in S:
                s.add_task(v, 0)  # initialize spread value  0为优先度
                for j in range(R):  # run R times Random Cascade  运行R次随机级联
                    [priority, count, task] = s.entry_finder[v]  # 获取v的优先度
                    # runIC(G, S + [v], p) 表示把S+[v]看做种子集,p为传播概率 返回Influence Spread
                    # priority - float(len(runIC(G, S + [v], p))) / R 为优先度
                    # v由于在上面已经加入pq, 所以会先执行remove_task 将v移出entry_finder 即把上面的task置为<removed-task>
                    # 此时,该方法用于更新v的优先度 如果在IC模型中 S=[v]的影响越大,那么v的优先度越小
                    s.add_task(v, priority - float(len(runIC(G, S + [v], p))) /
                               R)  # add normalized spread value
        task, priority = s.pop_item()  # 移除并返回最低优先度的节点
        S.append(task)  # 将优先度最低的节点加入S  优先度低是因为在IC模型中扩散的很快
        print(i, k, time.time() - start)
    return S
def degreeDiscountIC(G, k, p=.01):
    ''' Finds initial set of nodes to propagate in Independent Cascade model (with priority queue)
    Input: G -- networkx graph object
    k -- number of nodes needed
    p -- propagation probability
    Output:
    S -- chosen k nodes
    '''
    S = []
    dd = PQ()  # degree discount
    t = dict()  # number of adjacent vertices that are in S
    d = dict()  # degree of each vertex

    # initialize degree discount
    for u in G.nodes():
        d[u] = sum([G[u][v]['weight']
                    for v in G[u]])  # each edge adds degree 1
        # d[u] = len(G[u]) # each neighbor adds degree 1
        dd.add_task(u, -d[u])  # add degree of each node
        t[u] = 0

    # add vertices to S greedily
    for i in range(k):
        u, priority = dd.pop_item(
        )  # extract node with maximal degree discount
        S.append(u)
        for v in G[u]:
            if v not in S:
                t[v] += G[u][v][
                    'weight']  # increase number of selected neighbors
                priority = d[v] - 2 * t[v] - (
                    d[v] - t[v]) * t[v] * p  # discount of degree
                dd.add_task(v, -priority)
    return S
Exemplo n.º 4
0
def generalGreedy(G, k, p=.01):
    ''' Finds initial seed set S using general greedy heuristic
    Input: G -- networkx Graph object
    k -- number of initial nodes needed
    p -- propagation probability
    Output: S -- initial set of k nodes to propagate
    '''
    # import time
    # start = time.time()
    R = 200  # number of times to run Random Cascade
    S = []  # set of selected nodes
    # add node to S if achieves maximum propagation for current chosen + this node
    for i in range(k):  # cannot parallellize
        s = PQ()  # priority queue

        for v in G.nodes():
            if v not in S:
                s.add_task(v, 0)  # initialize spread value
                # [priority, count, task] = s.entry_finder[v]
                for j in range(
                        R
                ):  # run R times Random Cascade The gain of parallelizing isn't a lot as the one runIC is not very complex maybe for huge graphs
                    [priority, count, task] = s.entry_finder[v]
                    s.add_task(v, priority - float(len(runIC(G, S + [v], p))) /
                               R)  # add normalized spread value

        task, priority = s.pop_item()
        print(task, priority)
        S.append(task)
        # print(i, k, time.time() - start)
    return S
def degreeDiscountIC(G, k, p=.01):
    ''' Finds initial set of nodes to propagate in Independent Cascade model (with priority queue)
    Input: G -- networkx graph object
    k -- number of nodes needed
    p -- propagation probability
    Output:
    S -- chosen k nodes
    '''
    S = []
    dd = PQ() # degree discount
    t = dict() # number of adjacent vertices that are in S
    d = dict() # degree of each vertex

    # initialize degree discount
    for u in G.nodes():
        d[u] = sum([G[u][v]['weight'] for v in G[u]]) # each edge adds degree 1
        # d[u] = len(G[u]) # each neighbor adds degree 1
        dd.add_task(u, -d[u]) # add degree of each node
        t[u] = 0

    # add vertices to S greedily
    for i in range(k):
        u, priority = dd.pop_item() # extract node with maximal degree discount
        S.append(u)
        for v in G[u]:
            if v not in S:
                t[v] += G[u][v]['weight'] # increase number of selected neighbors
                priority = d[v] - 2*t[v] - (d[v] - t[v])*t[v]*p # discount of degree
                dd.add_task(v, -priority)
    return S
Exemplo n.º 6
0
def NewDiscount(G, k, p):

    S = []
    dd = PQ()  # degree discount
    t = dict()  # number of adjacent vertices that are in S
    d = dict()  # degree of each vertex

    # initialize degree discount
    for u in G.degree():
        d[u] = sum([G[u][v]['weight']
                    for v in G[u]])  # each edge adds degree 1
        d[u] = len(G[u])  # each neighbor adds degree 1
        dd.add_task(u, -d[u])  # add degree of each node
        t[u] = 0

    # add vertices to S greedily
    for i in range(k):
        u, priority = dd.pop_item(
        )  # extract node with maximal degree discount
        S.append(u)
        for v in G[u]:
            if v not in S:
                t[v] += G[u][v][
                    'weight']  # increase number of selected neighbors
                priority = d[v] - 2 * t[v] - (
                    d[v] - t[v]) * t[v] * p[u, v]  # discount of degree
                dd.add_task(v, -priority)
    return S
Exemplo n.º 7
0
def GDD(G, k, Ep):
    ''' Finds initial set of nodes to propagate in Independent Cascade model (with priority queue)
    Input: G -- networkx graph object
    k -- number of nodes needed
    Ep -- propagation probabilities
    Output:
    S -- chosen k nodes
    '''
    S = []
    dd = PQ()  # degree discount
    active = dict()
    inactive = dict()

    # initialize degree discount
    for u in G:
        active[u] = 1
        # inactive[u] = sum([Ep[(u,v)]*G[u][v]['weight'] for v in G[u]])
        inactive[u] = sum(
            [1 - (1 - Ep[(u, v)])**G[u][v]["weight"] for v in G[u]])
        priority = active[u] * (1 + inactive[u])
        dd.add_task(u, -priority)  # add degree of each node

    # add vertices to S greedily
    for i in range(k):
        u, priority = dd.pop_item(
        )  # extract node with maximal degree discount
        S.append(u)
        for v in G[u]:
            if v not in S:
                active[v] *= (1 - Ep[(u, v)])**G[u][v]['weight']
                inactive[v] -= 1 - (1 - Ep[(u, v)])**G[u][v]['weight']
                priority = active[v] * (1 + inactive[v])
                dd.add_task(v, -priority)
    return S
Exemplo n.º 8
0
def GDD(G, k, Ep):
    ''' Finds initial set of nodes to propagate in Independent Cascade model (with priority queue)
    Input: G -- networkx graph object
    k -- number of nodes needed
    Ep -- propagation probabilities
    Output:
    S -- chosen k nodes
    '''
    S = []
    dd = PQ() # degree discount
    active = dict()
    inactive = dict()

    # initialize degree discount
    for u in G:
        active[u] = 1
        # inactive[u] = sum([Ep[(u,v)]*G[u][v]['weight'] for v in G[u]])
        inactive[u] = sum([1 - (1 - Ep[(u,v)])**G[u][v]["weight"] for v in G[u]])
        priority = active[u]*(1 + inactive[u])
        dd.add_task(u, -priority) # add degree of each node

    # add vertices to S greedily
    for i in range(k):
        u, priority = dd.pop_item() # extract node with maximal degree discount
        S.append(u)
        for v in G[u]:
            if v not in S:
                active[v] *= (1-Ep[(u,v)])**G[u][v]['weight']
                inactive[v] -= 1 - (1 - Ep[(u,v)])**G[u][v]['weight']
                priority = active[v]*(1 + inactive[v])
                dd.add_task(v, -priority)
    return S
Exemplo n.º 9
0
def bipartite(w, discrepancy):
    Ebp_edges = []
    Q = PriorityQueue()
    incident_edges = dict()
    for e in w:
        Q.add_task(e, -w[e])
        incident_edges.setdefault(e[0], []).append((e[1], w[e]))
        incident_edges.setdefault(e[1], []).append((e[0], w[e]))
    processed_edges = []
    while len(processed_edges) < len(w):
        (e, weight) = Q.pop_item()
        processed_edges.append(e)
        try:
            incident_edges[e[0]].remove((e[1], -weight))
            incident_edges[e[1]].remove((e[0], -weight))
        except ValueError:
            pass
        Ebp_edges.append(e)

        # discard all edges in Q incident to b (i.e. e[1])
        for (a, weight) in incident_edges[e[1]]:
            try:
                Q.remove_task((a, e[1]))
                processed_edges.append((a, e[1]))
            except KeyError:
                pass
            try:
                incident_edges[a].remove((e[1], weight))
                incident_edges[e[1]].remove((a, weight))
            except ValueError:
                pass
        discrepancy[e[0]] += 1
        discrepancy[e[1]] += 1

        if -1 < discrepancy[e[0]] < .5:
            for (x, _) in incident_edges[e[0]]:
                try:
                    Q.remove_task((e[0], x))
                except KeyError:
                    pass
                new_weight = abs(
                    discrepancy[e[0]]) + 2 * abs(discrepancy[x]) - abs(
                        discrepancy[e[0]]) - 1
                if new_weight > 0:
                    Q.add_task((e[0], x), -new_weight)
                else:
                    processed_edges.append((e[0], x))
        elif discrepancy[e[0]] > .5:
            for (x, _) in incident_edges[e[0]]:
                try:
                    Q.remove_task((e[0], x))
                    processed_edges.append((e[0], x))
                except KeyError:
                    pass
    return Ebp_edges
def degreeHeuristicSeed(G, k):
    S = []
    d = PQ()
    for u in G:
        degree = sum([1 for v in G[u] if G[u][v]['weight']])
        # degree = len(G[u])
        d.add_task(u, -degree)
    for i in range(k):
        u, priority = d.pop_item()
        S.append(u)
    return S
Exemplo n.º 11
0
def stopDegreeDiscount(G, tsize, ic_step=1, p=.01, iterations=200):
    ''' Finds initial set of nodes to propagate in Independent Cascade model (with priority queue)
    Input: G -- networkx graph object
    tsize -- number of nodes necessary to reach
    ic_step -- step of change in k between 2 iterations of IC
    p -- propagation probability
    Output:
    S -- seed set
    Tspread -- spread values for different sizes of seed set
    '''
    S = []
    dd = PQ()  # degree discount
    t = dict()  # number of adjacent vertices that are in S
    d = dict()  # degree of each vertex

    # initialize degree discount
    for u in G.nodes():
        d[u] = sum([G[u][v]['weight']
                    for v in G[u]])  # each edge adds degree 1
        # d[u] = len(G[u]) # each neighbor adds degree 1
        dd.add_task(u, -d[u])  # add degree of each node
        t[u] = 0

    # add vertices to S greedily
    # until necessary number of nodes can be reached
    Tspread = dict()  # spread for different k
    k = 0
    Tspread[k] = 0
    stepk = 1
    while Tspread[k] < tsize:
        u, priority = dd.pop_item(
        )  # extract node with maximal degree discount
        S.append(u)
        for v in G[u]:
            if v not in S:
                t[v] += G[u][v][
                    'weight']  # increase number of selected neighbors
                priority = d[v] - 2 * t[v] - (
                    d[v] - t[v]) * t[v] * p  # discount of degree
                dd.add_task(v, -priority)
        # calculate IC spread with ic_step
        if stepk == ic_step:
            k = len(S)
            Tspread[k] = avgSize(G, S, p, iterations)
            print k, Tspread[k]
            stepk = 0
        stepk += 1

    # search precise boundary
    if abs(int(math.ceil(float(ic_step) / 2))) == 1:
        return S, Tspread
    else:
        return binarySearchBoundary(G, k, Tspread, tsize, ic_step, p,
                                    iterations)
def bipartite(w, discrepancy):
    Ebp_edges = []
    Q = PriorityQueue()
    incident_edges = dict()
    for e in w:
        Q.add_task(e, -w[e])
        incident_edges.setdefault(e[0], []).append((e[1], w[e]))
        incident_edges.setdefault(e[1], []).append((e[0], w[e]))
    processed_edges = []
    while len(processed_edges) < len(w):
        (e, weight) = Q.pop_item()
        processed_edges.append(e)
        try:
            incident_edges[e[0]].remove((e[1], -weight))
            incident_edges[e[1]].remove((e[0], -weight))
        except ValueError:
            pass
        Ebp_edges.append(e)

        # discard all edges in Q incident to b (i.e. e[1])
        for (a, weight) in incident_edges[e[1]]:
            try:
                Q.remove_task((a,e[1]))
                processed_edges.append((a,e[1]))
            except KeyError:
                pass
            try:
                incident_edges[a].remove((e[1], weight))
                incident_edges[e[1]].remove((a, weight))
            except ValueError:
                pass
        discrepancy[e[0]] += 1
        discrepancy[e[1]] += 1

        if -1 < discrepancy[e[0]] < .5:
            for (x, _) in incident_edges[e[0]]:
                try:
                    Q.remove_task((e[0], x))
                except KeyError:
                    pass
                new_weight = abs(discrepancy[e[0]]) + 2*abs(discrepancy[x]) - abs(discrepancy[e[0]]) - 1
                if new_weight > 0:
                    Q.add_task((e[0], x), -new_weight)
                else:
                    processed_edges.append((e[0], x))
        elif discrepancy[e[0]] > .5:
            for (x, _) in incident_edges[e[0]]:
                try:
                    Q.remove_task((e[0], x))
                    processed_edges.append((e[0], x))
                except KeyError:
                    pass
    return Ebp_edges
def representativeNodes(G, k, metric=1):
    ''' Finds the most distinguishable (representative) nodes in graph G greedily.
    Takes the most furthest node to the already chosen nodes at each step.

    Input: G -- networkx object graph with weighted edges
    k -- number of nodes needed
    metric -- parameter for differentiating representative qualities
    metric == 1 trying to maximize total distance in the chosen set of k nodes
    metric == 2 trying to maximize minimal distance between a pair of k nodes
    Output:
    S -- chosen k nodes
    objv -- objective value according to the chosen metric and set of nodes
    '''

    S = [] # set of chosen nodes
    S_dist = PQ() # distances from each node in G to set S according to metric

    # initialize S with furthest vertices
    try:
        u,v,d = max(G.edges(data=True), key=lambda (u, v, d): d['weight'])
    except KeyError:
        raise KeyError, 'Most likely you have no weight attribute'
    S.extend([u,v])

    # compute distances from each node in G to S
    for v in G.nodes():
        if v not in S: # calculate only for nodes in G
            if metric == 1:
                S_dist.add_task(v, - _sumDist(G, S, v)) # take minus to pop the maximum value from priority queue
            elif metric == 2:
                S_dist.add_task(v, - _minDist(G, S, v)) # take minus to pop the maximum value from priority queue

    # add new nodes to the set greedily
    while len(S) < k:
        u, priority = S_dist.pop_item() # find maximum value of distance to set S
        S.append(u) # append that node to S

        # only increase distance for nodes that are connected to u
        for v in G[u].keys():
            if v not in S: # add only remained nodes
                [priority, count, task] = S_dist.entry_finder[v] # finds distance for the previous step
                try:
                    if metric == 1:
                        S_dist.add_task(v, priority-G[u][v]['weight']) # adds distance to the new member of S
                    elif metric == 2:
                        S_dist.add_task(v, max(priority, -G[u][v]['weight'])) # update min distance to the set S
                except:
                    raise u,v, "These are vertices that caused the problem"

    # extract objective value of the chosen set
    if metric == 1:
        objv = 0
        for u in S:
            objv += _sumDist(G, S, u)
    elif metric == 2:
        objv = float('Inf')
        for u in S:
            objv = min(objv, _minDist(G, S, u))

    return S, objv
def stopDegreeDiscount(G, tsize, ic_step=1, p=.01, iterations=200):
    ''' Finds initial set of nodes to propagate in Independent Cascade model (with priority queue)
    Input: G -- networkx graph object
    tsize -- number of nodes necessary to reach
    ic_step -- step of change in k between 2 iterations of IC
    p -- propagation probability
    Output:
    S -- seed set
    Tspread -- spread values for different sizes of seed set
    '''
    S = []
    dd = PQ() # degree discount
    t = dict() # number of adjacent vertices that are in S
    d = dict() # degree of each vertex

    # initialize degree discount
    for u in G.nodes():
        d[u] = sum([G[u][v]['weight'] for v in G[u]]) # each edge adds degree 1
        # d[u] = len(G[u]) # each neighbor adds degree 1
        dd.add_task(u, -d[u]) # add degree of each node
        t[u] = 0

    # add vertices to S greedily
    # until necessary number of nodes can be reached
    Tspread = dict() # spread for different k
    k = 0
    Tspread[k] = 0
    stepk = 1
    while Tspread[k] < tsize:
        u, priority = dd.pop_item() # extract node with maximal degree discount
        S.append(u)
        for v in G[u]:
            if v not in S:
                t[v] += G[u][v]['weight'] # increase number of selected neighbors
                priority = d[v] - 2*t[v] - (d[v] - t[v])*t[v]*p # discount of degree
                dd.add_task(v, -priority)
        # calculate IC spread with ic_step
        if stepk == ic_step:
            k = len(S)
            Tspread[k] = avgSize(G, S, p, iterations)
            print k, Tspread[k]
            stepk = 0
        stepk += 1

    # search precise boundary
    if abs(int(math.ceil(float(ic_step)/2))) == 1:
        return S, Tspread
    else:
        return binarySearchBoundary(G, k, Tspread, tsize, ic_step, p, iterations)
Exemplo n.º 15
0
def read(G):

    # id to name & name to id dictionary
    id_to_name = dict()
    name_to_id = dict()
    # distance dictionary
    distance = dict()
    # predecessor dictionary
    predecessor = dict()
    # priority queue
    pq = PQ()

    # read nodes from movie_nodes.txt
    with open('movie_nodes.txt') as fn:
        # read data rows from file
        rows = fn.readlines()
        # for each row
        for row in rows:
            # string token
            tokens = row.strip('\n').split('\t')
            # dictionary for id -> name
            id_to_name[tokens[0]] = tokens[1]
            # dictionary for name -> id
            name_to_id[tokens[1]] = tokens[0]
            # graph add node
            G.add_node(tokens[0])
            # initialize all nodes distance
            distance[tokens[0]] = MAXINT
            # initialize priority queue
            pq.add_task(tokens[0], MAXINT)
            # initialize all nodes predecessor
            predecessor[tokens[0]] = None
        # close file
        fn.close()

    # read edges from movie_edgesw.txt
    with open('movie_edgesw.txt') as fn:
        # read data rows from file
        rows = fn.readlines()
        # for each row
        for row in rows:
            # string token
            tokens = row.strip('\n').split('\t')
            # graph add edges
            G.add_edge(tokens[0], tokens[1], weight=float(tokens[2]))
        # close file
        fn.close()

    return id_to_name, name_to_id, distance, predecessor, pq, G
def farthestNodes(k, G, m=1):
    S=[]
    S_dist=PQ()
    for v in G.nodes():
        if v not in S:
            if m ==1:
               S_dist.add_task(v, cumulativeSum(G, S, v))
    while len(S)<k:
           u, priority= S_dist.pop_item()
           S.append(u)
           for v in G[u].keys():
              if v not in S:
                 [priority, count, task] = S_dist.entry_finder[v]
                 if m == 1:
                       S_dist.add_task(v, priority-1)
Exemplo n.º 17
0
def getScores(G, Ep):
    '''Finds scores for GDD.
    Scores are degree for each node.
    '''

    scores = PQ() # degree discount
    active = dict()
    inactive = dict()

    # initialize degree discount
    for u in G:
        active[u] = 1
        # inactive[u] = sum([Ep[(u,v)]*G[u][v]['weight'] for v in G[u]])
        inactive[u] = sum([1 - (1 - Ep[(u,v)])**G[u][v]["weight"] for v in G[u]])
        priority = active[u]*(1 + inactive[u])
        scores.add_task(u, -priority) # add degree of each node

    return scores, active, inactive
Exemplo n.º 18
0
def degreeHeuristic(G, k, p=.01):
    ''' Finds initial set of nodes to propagate in Independent Cascade model (with priority queue)
    Input: G -- networkx graph object
    k -- number of nodes needed
    p -- propagation probability
    Output:
    S -- chosen k nodes
    '''
    S = []
    d = PQ()
    for u in G:
        degree = sum([G[u][v]['weight'] for v in G[u]])
        # degree = len(G[u])
        d.add_task(u, -degree)
    for i in range(k):
        u, priority = d.pop_item()
        S.append(u)
    return S
Exemplo n.º 19
0
def generalGreedy(G, k, edgeProb, flag='N'):
    import time
    start = time.time()
    R = 5
    S = []
    for i in range(k):
        s = PQ()
        for v in G.nodes():
            if v not in S:
                s.add_task(v, 0)
                for j in range(R):
                    [priority, count, task] = s.entry_finder[v]
                    s.add_task(v, priority -
                               float(len(runIC(G, S + [v], edgeProb, flag))) /
                               R)  # add normalized spread value
        task, priority = s.pop_item()
        S.append(task)
    return S
Exemplo n.º 20
0
def FIND_LDAG(G, v, t, Ew):
    '''
    Compute local DAG for vertex v.
    Reference: W. Chen "Scalable Influence Maximization in Social Networks under LT model" Algorithm 3
    INPUT:
        G -- networkx DiGraph object
        v -- vertex of G
        t -- parameter theta
        Ew -- influence weights of G
        NOTE: Since graph G can have multiple edges between u and v,
        total influence weight between u and v will be
        number of edges times influence weight of one edge.
    OUTPUT:
        D -- networkx DiGraph object that is also LDAG
    '''
    # intialize Influence of nodes
    Inf = PQ()
    Inf.add_task(v, -1)
    x, priority = Inf.pop_item()
    M = -priority
    X = [x]

    D = nx.DiGraph()
    while M >= t:
        out_edges = G.out_edges([x], data=True)
        for (v1,v2,edata) in out_edges:
            if v2 in X:
                D.add_edge(v1, v2, edata)
        in_edges = G.in_edges([x])
        for (u,_) in in_edges:
            if u not in X:
                try:
                    [pr, _, _] = Inf.entry_finder[u]
                except KeyError:
                    pr = 0
                Inf.add_task(u, pr - G[u][x]['weight']*Ew[(u,x)]*M)
        try:
            x, priority = Inf.pop_item()
        except KeyError:
            return D
        M = -priority
        X.append(x)

    return D
Exemplo n.º 21
0
def FIND_LDAG(G, v, t, Ew):
    '''
    Compute local DAG for vertex v.
    Reference: W. Chen "Scalable Influence Maximization in Social Networks under LT model" Algorithm 3
    INPUT:
        G -- networkx DiGraph object
        v -- vertex of G
        t -- parameter theta
        Ew -- influence weights of G
        NOTE: Since graph G can have multiple edges between u and v,
        total influence weight between u and v will be
        number of edges times influence weight of one edge.
    OUTPUT:
        D -- networkx DiGraph object that is also LDAG
    '''
    # intialize Influence of nodes
    Inf = PQ()
    Inf.add_task(v, -1)
    x, priority = Inf.pop_item()
    M = -priority
    X = [x]

    D = nx.DiGraph()
    while M >= t:
        out_edges = G.out_edges([x], data=True)
        for (v1, v2, edata) in out_edges:
            if v2 in X:
                D.add_edge(v1, v2, edata)
        in_edges = G.in_edges([x])
        for (u, _) in in_edges:
            if u not in X:
                try:
                    [pr, _, _] = Inf.entry_finder[u]
                except KeyError:
                    pr = 0
                Inf.add_task(u, pr - G[u][x]['weight'] * Ew[(u, x)] * M)
        try:
            x, priority = Inf.pop_item()
        except KeyError:
            return D
        M = -priority
        X.append(x)

    return D
Exemplo n.º 22
0
def getScores(G, Ep):
    '''Finds scores for GDD.
    Scores are degree for each node.
    '''

    scores = PQ()  # degree discount
    active = dict()
    inactive = dict()

    # initialize degree discount
    for u in G:
        active[u] = 1
        # inactive[u] = sum([Ep[(u,v)]*G[u][v]['weight'] for v in G[u]])
        inactive[u] = sum(
            [1 - (1 - Ep[(u, v)])**G[u][v]["weight"] for v in G[u]])
        priority = active[u] * (1 + inactive[u])
        scores.add_task(u, -priority)  # add degree of each node

    return scores, active, inactive
Exemplo n.º 23
0
def degreeDiscountStar(G, k, p=.01):
    S = []
    scores = PQ()
    d = dict()
    t = dict()
    for u in G:
        d[u] = sum([G[u][v]['weight'] for v in G[u]])
        t[u] = 0
        score = -((1 - p)**t[u]) * (1 + (d[u] - t[u]) * p)
        scores.add_task(u, )
    for iteration in range(k):
        u, priority = scores.pop_item()
        print(iteration, -priority)
        S.append(u)
        for v in G[u]:
            if v not in S:
                t[v] += G[u][v]['weight']
                score = -((1 - p)**t[u]) * (1 + (d[u] - t[u]) * p)
                scores.add_task(v, score)
    return S
def singleDiscount(G, k, p=.1):
    ''' Finds initial set of nodes to propagate in Independent Cascade model (with priority queue)
    Input: G -- networkx graph object
    k -- number of nodes needed
    p -- propagation probability
    Output:
    S -- chosen k nodes
    '''
    S = [] # set of activated nodes
    d = PQ() # degrees
    for u in G:
        degree = sum([G[u][v]['weight'] for v in G[u]])
        d.add_task(u, -degree)
    for i in range(k):
        u, priority = d.pop_item()
        S.append(u)
        for v in G[u]:
            if v not in S:
                [priority, count, task] = d.entry_finder[v]
                d.add_task(v, priority + G[u][v]['weight']) # discount degree by the weight of the edge
    return S
def degreeDiscountStar(G,k,p=.01):
    
    S = []
    scores = PQ()
    d = dict()
    t = dict()
    for u in G:
        d[u] = sum([G[u][v]['weight'] for v in G[u]])
        t[u] = 0
        score = -((1-p)**t[u])*(1+(d[u]-t[u])*p)
        scores.add_task(u, )
    for iteration in range(k):
        u, priority = scores.pop_item()
        print iteration, -priority
        S.append(u)
        for v in G[u]:
            if v not in S:
                t[v] += G[u][v]['weight']
                score = -((1-p)**t[u])*(1+(d[u]-t[u])*p)
                scores.add_task(v, score)
    return S
Exemplo n.º 26
0
def singleDiscount(G, k, p=.1):
    ''' Finds initial set of nodes to propagate in Independent Cascade model (with priority queue)
    Input: G -- networkx graph object
    k -- number of nodes needed
    p -- propagation probability
    Output:
    S -- chosen k nodes
    '''
    S = []  # set of activated nodes
    d = PQ()  # degrees
    for u in G:
        degree = sum([G[u][v]['weight'] for v in G[u]])
        d.add_task(u, -degree)
    for i in range(k):
        u, priority = d.pop_item()
        S.append(u)
        for v in G[u]:
            if v not in S:
                [priority, count, task] = d.entry_finder[v]
                d.add_task(v, priority + G[u][v]['weight']
                           )  # discount degree by the weight of the edge
    return S
Exemplo n.º 27
0
def generalGreedy_parallel_inf(G, k, p=.01):
    ''' Finds initial seed set S using general greedy heuristic
    Input: G -- networkx Graph object
    k -- number of initial nodes needed
    p -- propagation probability
    Output: S -- initial set of k nodes to propagate
    parallel computation of influence of the node, but, probably, since the computation is not that complex
    '''
    # import time
    # start = time.time()
    # define map function
    # CC_parallel(G, seed_size, .01)

    # results = []#np.asarray([])
    R = 500  # number of times to run Random Cascade
    S = []  # set of selected nodes
    # add node to S if achieves maximum propagation for current chosen + this node
    for i in range(k):
        s = PQ()  # priority queue

        for v in G.nodes():
            if v not in S:
                s.add_task(v, 0)  # initialize spread value
                [priority, count, task] = s.entry_finder[v]
                pool = multiprocessing.Pool(multiprocessing.cpu_count() / 2)
                results = pool.map(map_IC, [(G, S + [v], p)] * R)
                pool.close()
                pool.join()
                s.add_task(v, priority - float(np.sum(results)) / R)
                # for j in range(R): # run R times Random Cascade
                # [priority, count, task] = s.entry_finder[v]
                #  s.add_task(v, priority - float(len(runIC(G, S + [v], p)))/R) # add normalized spread value
        task, priority = s.pop_item()
        S.append(task)
        # print(i, k, time.time() - start)
    return S
def spreadNewGreedyIC(G, targeted_size, step=1, p=.01, S0=[], iterations=200):
    ''' Finds initial set of nodes to propagate in Independent Cascade.
    Input: G -- networkx graph object
    k -- number of nodes needed
    p -- propagation probability
    Output: S -- set of k nodes chosen

    TODO: add step functionality
    '''

    import time
    start = time.time()

    assert type(
        S0) == list, "S0 must be a list. %s provided instead" % type(S0)
    S = S0  # set of selected nodes
    tsize = 0
    R = iterations
    for i in range(R):
        T = runIC(G, S, p)
        tsize += float(len(T)) / R

    while tsize <= targeted_size:
        s = PQ(
        )  # number of additional nodes each remained mode will bring to the set S in R iterations
        Rv = dict()  # number of reachable nodes for node v
        # initialize values of s
        for v in G.nodes():
            if v not in S:
                s.add_task(v, 0)
        # calculate potential additional spread for each vertex not in S
        prg_idx = 1
        idx = 1
        prcnt = .1  # for progress to print
        R = iterations  # number of iterations to run RanCas
        for j in range(R):
            # create new pruned graph E
            E = deepcopy(G)
            edge_rem = []  # edges to remove
            for (u, v) in E.edges():
                w = G[u][v]['weight']
                if random() < 1 - (1 - p)**w:
                    edge_rem.append((u, v))
            E.remove_edges_from(edge_rem)
            # find reachable vertices from S
            Rs = bfs(E, S)
            # find additional nodes each vertex would bring to the set S
            for v in G.nodes():
                if v not in S + Rs:  # if node has not chosen in S and has chosen by spread from S
                    [priority, c, task] = s.entry_finder[v]
                    s.add_task(v, priority - float(len(bfs(E, [v]))) / R)

            if idx == int(prg_idx * prcnt * R):
                print '%s%%...' % (int(prg_idx * prcnt * 100))
                prg_idx += 1
            idx += 1
        # add vertex with maximum potential spread
        task, priority = s.pop_item()
        S.append(task)
        print i, len(S), task, -priority, time.time() - start

        tsize = 0
        for j in range(R):
            T = runIC(G, S, p)
            tsize += float(len(T)) / R
    return S
Exemplo n.º 29
0
def generalGreedy_node_set_cover(filename,
                                 G,
                                 budget,
                                 h_l=0,
                                 color='all',
                                 seed_size_budget=14,
                                 gamma_a=1e-2,
                                 gamma_b=0,
                                 type_algo=1):
    ''' Finds initial seed set S using general greedy heuristic
    Input: G -- networkx Graph object
    k -- fraction of population needs to be influenced in all three groups
    p -- propagation probability
    Output: S -- initial set of k nodes to propagate
    '''
    # import time
    # start = time.time()
    # R = 200 # number of times to run Random Cascade

    stats = ut.graph_stats(G, print_stats=False)

    if type_algo == 1:
        filename = filename + '_set_cover_reach_' + str(budget)
    elif type_algo == 2:
        filename = filename + '_set_cover_timings_reach_{budget}_gamma_a_{gamma_a}_gamma_b_{gamma_b}_'
    elif type_algo == 3:
        filename = filename + '_set_cover_timings_reach_{budget}_gamma_a_{gamma_a}_gamma_b_{gamma_a}_'

    reach = 0.0
    S = []  # set of selected nodes
    # add node to S if achieves maximum propagation for current chosen + this node
    influenced = []
    influenced_r = []
    influenced_b = []
    influenced_n = []
    seeds_r = []
    seeds_b = []
    seeds_n = []

    # try:
    #
    #     influenced, influenced_r, influenced_b, influenced_n, seeds_r, seeds_b, seeds_n = ut.read_files(filename)
    #     reach = min(influenced_r[-1] / stats['group_r'], budget) + min(influenced_b[-1] / stats['group_b'])+ min(influenced_n[-1] / stats['group_r'], budget)
    #     S = seeds_r[-1] + seeds_b[-1]+ seeds_n[-1]
    #     if reach >= budget:
    #         # ut.write_files(filename,influenced, influenced_a, influenced_b, seeds_a, seeds_b)
    #         print(influenced_r)
    #         print("\n\n")
    #         print(influenced_b)
    #         print("\n\n")
    #         print(influenced_n)
    #         print(f" reach: {reach}")
    #         ut.plot_influence(influenced_r, influenced_b, influenced_n, len(S), filename, stats['group_a'], stats['group_b'], stats['group_c'],
    #                           [len(S_a) for S_a in seeds_r], [len(S_b) for S_b in seeds_b], [len(S_c) for S_c in seeds_n])
    #         return (influenced, influenced_r, influenced_b, influenced_n, seeds_r, seeds_b, seeds_n)
    #
    # except FileNotFoundError:
    #     print(f'{filename} not Found ')

    i = 0
    S = []
    while reach < 3 * budget:
        # while len(S) < seed_size_budget:  # cannot parallellize

        pool = multiprocessing.Pool(multiprocessing.cpu_count() - 1)
        # pool = multiprocessing.Pool(1)

        # for v in G.nodes():
        #     results = pool.map(map_select_next_seed_set_cover, (G, S, v))

        if type_algo == 1:
            # results = pool.map(map_select_next_seed_set_cover, ((G, S, v) for v in G.nodes()))
            # results = pool.starmap(map_select_next_seed_set_cover, zip(repeat(G), repeat(S), list(G.nodes()),repeat(h_l), repeat(color)))
            results = pool.map(map_select_next_seed_set_cover,
                               ((G, S, v, h_l, color) for v in G.nodes()))
        elif type_algo == 2:
            results = pool.map(map_IC_timing, ((G, S, v, gamma_a, gamma_b)
                                               for v in G.nodes()))
        elif type_algo == 3:
            results = pool.map(map_IC_timing, ((G, S, v, gamma_a, gamma_a)
                                               for v in G.nodes()))

        pool.close()
        pool.join()

        s = PQ()  # priority queue
        for v, p, p_a, p_b, p_c in results:  #
            # s.add_task(v, -(min(p_a / stats['group_r'], budget) + min(p_b / stats['group_b'], budget)))
            s.add_task(
                v, -(min(p_a / stats['group_r'], budget) +
                     min(p_b / stats['group_b'], budget) +
                     min(p_b / stats['group_n'], budget)))

        node, priority = s.pop_item()
        # priority = -priority # as the current priority is negative fraction
        S.append(node)

        # results = map_select_next_seed_set_cover, ((G, S, v) for v in G.nodes())

        I, I_a, I_b, I_c = map_fair_IC((G, S, h_l))
        influenced.append(I)
        influenced_r.append(I_a)
        influenced_b.append(I_b)
        influenced_n.append(I_c)
        S_red = []
        S_blue = []
        S_purple = []
        group = G.nodes[node]['color']

        for n in S:
            if G.nodes[n]['color'] == 'red':
                S_red.append(n)
            elif G.nodes[n]['color'] == 'blue':
                S_blue.append(n)
            else:
                S_purple.append(n)

        seeds_r.append(
            S_red)  # id's of the seeds so the influence can be recreated
        seeds_b.append(S_blue)
        seeds_n.append(S_purple)

        # reach += -priority both are fine
        reach_a = I_a / stats['group_r']
        reach_b = I_b / stats['group_b']
        reach_c = I_c / stats['group_n']
        reach = (min(reach_a, budget) + min(reach_b, budget) +
                 min(reach_c, budget))

        print(
            str(i + 1) + ' Node ID ' + str(node) + ' group ' + str(group) +
            ' Ia  = ' + str(I_a) + ' Ib ' + str(I_b) + ' Ic ' + str(I_c) +
            ' each: ' + str(reach) + ' reach_a ' + str(reach_a) + ' reach_b ' +
            str(reach_b) + ' reach_c ' + str(reach_c))
        # print(i, k, time.time() - start)
        i += 1

    # ut.plot_influence(influenced_r, influenced_b, influenced_n, len(S), filename, stats['group_r'], stats['group_b'], stats['group_n'],
    #                   [len(S_r) for S_r in seeds_r], [len(S_b) for S_b in seeds_b], [len(S_n) for S_n in seeds_n])

    # ut.plot_influence_diff(influenced_r, influenced_b, influenced_n, len(S), ['Rep','Dem','Neut'], filename,
    #                     stats['group_r'], stats['group_b'], stats['group_n'])

    ut.write_files(filename, influenced, influenced_r, influenced_b,
                   influenced_n, seeds_r, seeds_b, seeds_n)

    return (influenced, influenced_r, influenced_b, influenced_n, seeds_r,
            seeds_b, seeds_n)
Exemplo n.º 30
0
def generalGreedy_node_parallel(filename,
                                G,
                                budget,
                                h_l,
                                gamma1,
                                gamma2,
                                beta1=1.0,
                                beta2=1.0,
                                type_algo=1):
    ''' Finds initial seed set S using general greedy heuristic
    Input: G -- networkx Graph object
    k -- number of initial nodes needed
    p -- propagation probability
    Output: S -- initial set of k nodes to propagate
    '''
    # import time
    # start = time.time()
    # R = 200 # number of times to run Random Cascade
    S = []  # set of selected nodes
    influenced = []
    influenced_a = []
    influenced_b = []
    influenced_c = []
    seeds_a = []
    seeds_b = []
    seeds_c = []
    seed_range = []
    if type_algo == 1:
        filename = filename + '_greedy_'

    elif type_algo == 2:
        filename = filename + '_log_gamma_{gamma1,gamma2}_'

    elif type_algo == 3:
        filename = filename + '_root_gamma_{gamma1}_beta_{beta1,beta2}_'

    elif type_algo == 4:
        filename = filename + '_root_majority_gamma_{gamma1}_beta_{beta1,beta2}_'

    stats = ut.graph_stats(G, print_stats=False)

    try:

        influenced, influenced_a, influenced_b, influenced_c, seeds_a, seeds_b, seeds_c = ut.read_files(
            filename)
        S = seeds_a[-1] + seeds_b[-1] + seeds_c[-1]

        if len(S) >= budget:
            # ut.write_files(filename,influenced, influenced_a, influenced_b, seeds_a, seeds_b)
            print(influenced_a)
            print("\n\n")
            print(influenced_b)
            print("\n\n")
            print(influenced_c)
            print(" Seed length ", len(S))

            ut.plot_influence(influenced_a, influenced_b, influenced_c, len(S),
                              filename, stats['group_a'], stats['group_b'],
                              stats['group_c'], [len(S_a) for S_a in seeds_a],
                              [len(S_b) for S_b in seeds_b],
                              [len(S_c) for S_c in seeds_c])

            return (influenced, influenced_a, influenced_b, influenced_c,
                    seeds_a, seeds_b, seeds_c)
        else:
            seed_range = range(budget - len(S))

    except FileNotFoundError:
        print('{filename} not Found ')

        seed_range = range(budget)

    # add node to S if achieves maximum propagation for current chosen + this node
    for i in seed_range:  # cannot parallellize

        pool = multiprocessing.Pool(multiprocessing.cpu_count())
        # results = None
        if type_algo == 1:
            results = pool.starmap(
                map_select_next_seed_set_cover,
                zip(repeat(G), repeat(S), list(G.nodes()), repeat(h_l)))
            # results = pool.map(map_select_next_seed_greedy, ((G, S, v,h_l) for v in G.nodes()))
        elif type_algo == 2:
            results = pool.map(map_select_next_seed_log_greedy,
                               ((G, S, v, gamma1, gamma2) for v in G.nodes()))
        elif type_algo == 3:
            results = pool.map(map_select_next_seed_root_greedy,
                               ((G, S, v, gamma1, beta1, beta2)
                                for v in G.nodes()))
        elif type_algo == 4:
            results = pool.map(map_select_next_seed_root_majority_greedy,
                               ((G, S, v, gamma1) for v in G.nodes()))

        pool.close()
        pool.join()

        s = PQ()  # priority queue
        # if results == None:

        for v, priority, p_a, p_b, p_c in results:  # run R times Random Cascade The gain of parallelizing isn't a lot as the one runIC is not very complex maybe for huge graphs
            s.add_task(v, -priority)

        node, priority = s.pop_item()
        S.append(node)
        I, I_a, I_b, I_c = map_fair_IC((G, S, h_l))
        influenced.append(I)
        influenced_a.append(I_a)
        influenced_b.append(I_b)
        influenced_c.append(I_c)
        S_red = []
        S_blue = []
        S_purple = []
        group = G.nodes[node]['color']
        print(
            str(i + 1) + ' Selected Node is ' + str(node) + ' group ' +
            str(group) + ' Ia = ' + str(I_a) + ' Ib = ' + str(I_b) + ' Ic = ' +
            str(I_c))
        for n in S:
            if G.nodes[n]['color'] == 'red':
                S_red.append(n)
            if G.nodes[n]['color'] == 'blue':
                S_blue.append(n)
            else:
                S_purple.append(n)

        seeds_a.append(
            S_red)  # id's of the seeds so the influence can be recreated
        seeds_b.append(S_blue)
        seeds_c.append(S_purple)
        # print(i, k, time.time() - start)
    # print ( "\n \n  I shouldn't be here.   ********* \n \n ")
    ut.plot_influence(influenced_a, influenced_b, influenced_c, len(S),
                      filename, stats['group_r'], stats['group_b'],
                      stats['group_n'], [len(S_a) for S_a in seeds_a],
                      [len(S_b)
                       for S_b in seeds_b], [len(S_c) for S_c in seeds_c])

    ut.write_files(filename, influenced, influenced_a, influenced_b,
                   influenced_c, seeds_a, seeds_b, seeds_c)

    return (influenced, influenced_a, influenced_b, influenced_c, seeds_a,
            seeds_b, seeds_c)
Exemplo n.º 31
0
def generalGreedy_node_parallel(filename,
                                G,
                                budget,
                                gamma,
                                beta=1.0,
                                type_algo=1,
                                G_greedy=None):
    ''' Finds initial seed set S using general greedy heuristic
    Input: G -- networkx Graph object
    k -- number of initial nodes needed
    p -- propagation probability
    Output: S -- initial set of k nodes to propagate
    '''

    if G_greedy is None:
        G_greedy = G

    # import time
    # start = time.time()
    # R = 200 # number of times to run Random Cascade
    S = []  # set of selected nodes
    influenced = []
    influenced_grouped = []
    seeds = []
    seed_range = []
    if type_algo == 1:
        filename = filename + f'_greedy_'

    elif type_algo == 2:
        filename = filename + f'_log_gamma_{gamma}_'

    elif type_algo == 3:
        filename = filename + f'_root_gamma_{gamma}_beta_{beta}_'

    elif type_algo == 4:
        filename = filename + f'_root_majority_gamma_{gamma}_beta_{beta}_'

    # stats = ut.graph_stats(G, print_stats=False)

    try:

        influenced, influenced_a, influenced_b, seeds_a, seeds_b = ut.read_files(
            filename)

        raise Exception('It was supposed not to be reached.')

        S = seeds_a[-1] + seeds_b[-1]

        if len(S) >= budget:
            # ut.write_files(filename,influenced, influenced_a, influenced_b, seeds_a, seeds_b)
            print(influenced_a)
            print("\n\n")
            print(influenced_b)
            print(" Seed length ", len(S))

            ut.plot_influence(influenced_a, influenced_b, len(S), filename,
                              stats['group_a'], stats['group_b'],
                              [len(S_a) for S_a in seeds_a],
                              [len(S_b) for S_b in seeds_b])

            return (influenced, influenced_a, influenced_b, seeds_a, seeds_b)
        else:
            seed_range = range(budget - len(S))

    except FileNotFoundError:
        print(f'{filename} not Found ')

        seed_range = range(budget)

    # add node to S if achieves maximum propagation for current chosen + this node
    for i in seed_range:  # cannot parallellize
        print('--------', i)
        pool = multiprocessing.Pool(multiprocessing.cpu_count())
        # results = None
        if type_algo == 1:
            results = pool.map(map_select_next_seed_greedy,
                               ((G_greedy, S, v) for v in G_greedy.nodes()))
        elif type_algo == 2:
            results = pool.map(map_select_next_seed_log_greedy,
                               ((G_greedy, S, v, gamma)
                                for v in G_greedy.nodes()))
        elif type_algo == 3:
            results = pool.map(map_select_next_seed_root_greedy,
                               ((G_greedy, S, v, gamma, beta)
                                for v in G_greedy.nodes()))
        elif type_algo == 4:
            results = pool.map(map_select_next_seed_root_majority_greedy,
                               ((G_greedy, S, v, gamma)
                                for v in G_greedy.nodes()))

        pool.close()
        pool.join()

        s = PQ()  # priority queue
        # if results == None:

        for v, priority in results:  # run R times Random Cascade The gain of parallelizing isn't a lot as the one runIC is not very complex maybe for huge graphs
            s.add_task(v, priority)

        node, priority = s.pop_item()
        S.append(node)
        I, I_grouped = map_fair_IC((G, S))
        influenced.append(I)
        influenced_grouped.append(I_grouped)
        group = G.nodes[node]['color']
        print(
            f'{i + 1} Selected Node is {node} group {group} I_grouped = {I_grouped}'
        )

        S_g = {
            c: []
            for c in np.unique([G.nodes[v]['color'] for v in G.nodes])
        }
        for n in S:
            c = G.nodes[n]['color']
            S_g[c].append(n)

        seeds.append(
            S_g)  # id's of the seeds so the influence can be recreated
        # print(i, k, time.time() - start)
    # print ( "\n \n  I shouldn't be here.   ********* \n \n ")
    # ut.plot_influence(influenced_a, influenced_b, len(S), filename, stats['group_a'], stats['group_b'],
    #                   [len(S_a) for S_a in seeds_a], [len(S_b) for S_b in seeds_b])

    ut.write_files(filename, influenced, influenced_grouped, seeds)

    return (influenced, influenced_grouped, seeds)
Exemplo n.º 32
0
def generalGreedy_node_set_cover(filename,
                                 G,
                                 budget,
                                 gamma_a=1e-2,
                                 gamma_b=0,
                                 type_algo=1):
    ''' Finds initial seed set S using general greedy heuristic
    Input: G -- networkx Graph object
    k -- fraction of population needs to be influenced in both groups 
    p -- propagation probability
    Output: S -- initial set of k nodes to propagate
    '''
    #import time
    #start = time.time()
    #R = 200 # number of times to run Random Cascade

    stats = ut.graph_stats(G, print_stats=False)

    if type_algo == 1:
        filename = filename + f'_set_cover_reach_{budget}_'
    elif type_algo == 2:
        filename = filename + f'_set_cover_timings_reach_{budget}_gamma_a_{gamma_a}_gamma_b_{gamma_b}_'
    elif type_algo == 3:
        filename = filename + f'_set_cover_timings_reach_{budget}_gamma_a_{gamma_a}_gamma_b_{gamma_a}_'

    reach = 0.0
    S = []  # set of selected nodes
    # add node to S if achieves maximum propagation for current chosen + this node
    influenced = []
    influenced_a = []
    influenced_b = []
    seeds_a = []
    seeds_b = []

    try:

        influenced, influenced_a, influenced_b, seeds_a, seeds_b = ut.read_files(
            filename)
        reach = min(influenced_a[-1] / stats['group_a'], budget) + min(
            influenced_b[-1] / stats['group_b'], budget)
        S = seeds_a[-1] + seeds_b[-1]
        if reach >= budget:
            #ut.write_files(filename,influenced, influenced_a, influenced_b, seeds_a, seeds_b)
            print(influenced_a)
            print("\n\n")
            print(influenced_b)
            print(f" reach: {reach}")
            ut.plot_influence(influenced_a, influenced_b, len(S), filename,
                              stats['group_a'], stats['group_b'],
                              [len(S_a) for S_a in seeds_a],
                              [len(S_b) for S_b in seeds_b])
            return (influenced, influenced_a, influenced_b, seeds_a, seeds_b)

    except FileNotFoundError:
        print(f'{filename} not Found ')

    i = 0
    while reach < 2 * budget:  # cannot parallellize

        pool = multiprocessing.Pool(multiprocessing.cpu_count() - 1)

        if type_algo == 1:
            results = pool.map(map_select_next_seed_set_cover,
                               ((G, S, v) for v in G.nodes()))
        elif type_algo == 2:
            results = pool.map(map_IC_timing, ((G, S, v, gamma_a, gamma_b)
                                               for v in G.nodes()))
        elif type_algo == 3:
            results = pool.map(map_IC_timing, ((G, S, v, gamma_a, gamma_a)
                                               for v in G.nodes()))

        pool.close()
        pool.join()

        s = PQ()  # priority queue
        for v, p, p_a, p_b in results:  #
            s.add_task(
                v, -(min(p_a / stats['group_a'], budget) +
                     min(p_b / stats['group_b'], budget)))

        node, priority = s.pop_item()
        #priority = -priority # as the current priority is negative fraction
        S.append(node)

        I, I_a, I_b = map_fair_IC((G, S))
        influenced.append(I)
        influenced_a.append(I_a)
        influenced_b.append(I_b)
        S_red = []
        S_blue = []
        group = G.nodes[node]['color']

        for n in S:
            if G.nodes[n]['color'] == 'red':
                S_red.append(n)
            else:
                S_blue.append(n)

        seeds_a.append(
            S_red)  # id's of the seeds so the influence can be recreated
        seeds_b.append(S_blue)

        #reach += -priority both are fine
        reach_a = I_a / stats['group_a']
        reach_b = I_b / stats['group_b']
        reach = (min(reach_a, budget) + min(reach_b, budget))

        print(
            f'{i+1} Node ID {node} group {group} Ia = {I_a} Ib {I_b} reach: {reach} reach_a {reach_a} reach_b {reach_b}'
        )
        #print(i, k, time.time() - start)
        i += 1

    ut.plot_influence(influenced_a, influenced_b, len(S), filename,
                      stats['group_a'], stats['group_b'],
                      [len(S_a)
                       for S_a in seeds_a], [len(S_b) for S_b in seeds_b])

    ut.write_files(filename, influenced, influenced_a, influenced_b, seeds_a,
                   seeds_b)

    return (influenced, influenced_a, influenced_b, seeds_a, seeds_b)
Exemplo n.º 33
0
def LDAG_heuristic(G, Ew, k, t):
    ''' LDAG algorithm for seed selection.
    Reference: [1] Algorithm 5
    Input:
    G -- directed graph (nx.DiGraph)
    Ew -- inlfuence weights of edges (eg. uniform, random) (dict)
    k -- size of seed set (int)
    t -- parameter theta for finding LDAG (0 <= t <= 1; typical value: 1/320) (int)
    Output:
    S -- seed set (list)
    '''
    # define variables
    S = []
    IncInf = PQ()
    for node in G:
        IncInf.add_task(node, 0)
    # IncInf = dict(zip(G.nodes(), [0]*len(G))) # in case of usage dict instead of PQ
    LDAGs = dict()
    InfSet = dict()
    ap = dict()
    A = dict()

    print 'Initialization phase'
    for v in G:
        LDAGs[v] = FIND_LDAG(G, v, t, Ew)
        # update influence set for each node in LDAGs[v] with its root
        for u in LDAGs[v]:
            InfSet.setdefault(u, []).append(v)
        alpha = computeAlpha(LDAGs[v], Ew, S, v)
        A.update(alpha)  # add new linear coefficients to A
        # update incremental influence of all nodes in LDAGs[v] with alphas
        for u in LDAGs[v]:
            ap[(
                v, u
            )] = 0  # additionally set initial activation probability (line 7)
            priority, _, _ = IncInf.entry_finder[
                u]  # find previous value of IncInf
            IncInf.add_task(u, priority - A[(v, u)])  # and add alpha
            # IncInf[u] += A[(v, u)] # in case of using dict instead of PQ

    print 'Main loop'
    for it in range(k):
        s, priority = IncInf.pop_item(
        )  # chose node with biggest incremental influence
        print it + 1, s, -priority
        for v in InfSet[s]:  # for all nodes that s can influence
            if v not in S:
                D = LDAGs[v]
                # update alpha_v_u for all u that can reach s in D (lines 17-22)
                alpha_v_s = A[(v, s)]
                dA = computeAlpha(D, Ew, S, s, val=-alpha_v_s)
                for (s, u) in dA:
                    if u not in S + [
                            s
                    ]:  # don't update IncInf if it's already in S
                        A[(v, u)] += dA[(s, u)]
                        priority, _, _ = IncInf.entry_finder[
                            u]  # find previous value of incremental influence of u
                        IncInf.add_task(
                            u, priority - dA[(s, u)] *
                            (1 - ap[(v, u)]))  # and update it accordingly
                # update ap_v_u for all u reachable from s in D (liens 23-28)
                dap = computeActProb(D, Ew, S + [s], s, val=1 - ap[(v, s)])
                for (s, u) in dap:
                    if u not in S + [s]:
                        ap[(v, u)] += dap[(s, u)]
                        priority, _, _ = IncInf.entry_finder[
                            u]  # find previous value of incremental influence of u
                        IncInf.add_task(
                            u, priority + A[(v, u)] *
                            dap[(s, u)])  # and update it accordingly
        S.append(s)
    return S
Exemplo n.º 34
0
def representativeNodes(G, k, metric=1):
    ''' Finds the most distinguishable (representative) nodes in graph G greedily.
    Takes the most furthest node to the already chosen nodes at each step.

    Input: G -- networkx object graph with weighted edges
    k -- number of nodes needed
    metric -- parameter for differentiating representative qualities
    metric == 1 trying to maximize total distance in the chosen set of k nodes
    metric == 2 trying to maximize minimal distance between a pair of k nodes
    Output:
    S -- chosen k nodes
    objv -- objective value according to the chosen metric and set of nodes
    '''

    S = []  # set of chosen nodes
    S_dist = PQ()  # distances from each node in G to set S according to metric

    # initialize S with furthest vertices
    try:
        u, v, d = max(G.edges(data=True), key=lambda (u, v, d): d['weight'])
    except KeyError:
        raise KeyError, 'Most likely you have no weight attribute'
    S.extend([u, v])

    # compute distances from each node in G to S
    for v in G.nodes():
        if v not in S:  # calculate only for nodes in G
            if metric == 1:
                S_dist.add_task(
                    v, -_sumDist(G, S, v)
                )  # take minus to pop the maximum value from priority queue
            elif metric == 2:
                S_dist.add_task(
                    v, -_minDist(G, S, v)
                )  # take minus to pop the maximum value from priority queue

    # add new nodes to the set greedily
    while len(S) < k:
        u, priority = S_dist.pop_item(
        )  # find maximum value of distance to set S
        S.append(u)  # append that node to S

        # only increase distance for nodes that are connected to u
        for v in G[u].keys():
            if v not in S:  # add only remained nodes
                [priority, count, task] = S_dist.entry_finder[
                    v]  # finds distance for the previous step
                try:
                    if metric == 1:
                        S_dist.add_task(
                            v, priority - G[u][v]['weight']
                        )  # adds distance to the new member of S
                    elif metric == 2:
                        S_dist.add_task(v, max(priority, -G[u][v]['weight'])
                                        )  # update min distance to the set S
                except:
                    raise u, v, "These are vertices that caused the problem"

    # extract objective value of the chosen set
    if metric == 1:
        objv = 0
        for u in S:
            objv += _sumDist(G, S, u)
    elif metric == 2:
        objv = float('Inf')
        for u in S:
            objv = min(objv, _minDist(G, S, u))

    return S, objv
def spreadNewGreedyIC(G, targeted_size, step=1, p=.01, S0=[], iterations = 200):
    ''' Finds initial set of nodes to propagate in Independent Cascade.
    Input: G -- networkx graph object
    k -- number of nodes needed
    p -- propagation probability
    Output: S -- set of k nodes chosen

    TODO: add step functionality
    '''

    import time
    start = time.time()

    assert type(S0) == list, "S0 must be a list. %s provided instead" % type(S0)
    S = S0 # set of selected nodes
    tsize = 0
    R = iterations
    for i in range(R):
        T = runIC(G, S, p)
        tsize += float(len(T))/R

    while tsize <= targeted_size:
        s = PQ() # number of additional nodes each remained mode will bring to the set S in R iterations
        Rv = dict() # number of reachable nodes for node v
        # initialize values of s
        for v in G.nodes():
            if v not in S:
                s.add_task(v, 0)
        # calculate potential additional spread for each vertex not in S
        prg_idx = 1
        idx = 1
        prcnt = .1 # for progress to print
        R = iterations # number of iterations to run RanCas
        for j in range(R):
            # create new pruned graph E
            E = deepcopy(G)
            edge_rem = [] # edges to remove
            for (u,v) in E.edges():
                w = G[u][v]['weight']
                if random() < 1 - (1 - p)**w:
                    edge_rem.append((u,v))
            E.remove_edges_from(edge_rem)
            # find reachable vertices from S
            Rs = bfs(E, S)
            # find additional nodes each vertex would bring to the set S
            for v in G.nodes():
                if v not in S + Rs: # if node has not chosen in S and has chosen by spread from S
                    [priority, c, task] = s.entry_finder[v]
                    s.add_task(v, priority - float(len(bfs(E, [v])))/R)

            if idx == int(prg_idx*prcnt*R):
                print '%s%%...' %(int(prg_idx*prcnt*100))
                prg_idx += 1
            idx += 1
        # add vertex with maximum potential spread
        task, priority = s.pop_item()
        S.append(task)
        print i, len(S), task, -priority, time.time() - start

        tsize = 0
        for j in range(R):
            T = runIC(G, S, p)
            tsize += float(len(T))/R
    return S
Exemplo n.º 36
0
def LDAG_heuristic(G, Ew, k, t):
    ''' LDAG algorithm for seed selection.
    Reference: [1] Algorithm 5
    Input:
    G -- directed graph (nx.DiGraph)
    Ew -- inlfuence weights of edges (eg. uniform, random) (dict)
    k -- size of seed set (int)
    t -- parameter theta for finding LDAG (0 <= t <= 1; typical value: 1/320) (int)
    Output:
    S -- seed set (list)
    '''
    # define variables
    S = []
    IncInf = PQ()
    for node in G:
        IncInf.add_task(node, 0)
    # IncInf = dict(zip(G.nodes(), [0]*len(G))) # in case of usage dict instead of PQ
    LDAGs = dict()
    InfSet = dict()
    ap = dict()
    A = dict()

    print 'Initialization phase'
    for v in G:
        LDAGs[v] = FIND_LDAG(G, v, t, Ew)
        # update influence set for each node in LDAGs[v] with its root
        for u in LDAGs[v]:
            InfSet.setdefault(u, []).append(v)
        alpha = computeAlpha(LDAGs[v], Ew, S, v)
        A.update(alpha) # add new linear coefficients to A
        # update incremental influence of all nodes in LDAGs[v] with alphas
        for u in LDAGs[v]:
            ap[(v, u)] = 0 # additionally set initial activation probability (line 7)
            priority, _, _ = IncInf.entry_finder[u] # find previous value of IncInf
            IncInf.add_task(u, priority - A[(v, u)]) # and add alpha
            # IncInf[u] += A[(v, u)] # in case of using dict instead of PQ

    print 'Main loop'
    for it in range(k):
        s, priority = IncInf.pop_item() # chose node with biggest incremental influence
        print it+1, s, -priority
        for v in InfSet[s]: # for all nodes that s can influence
            if v not in S:
                D = LDAGs[v]
                # update alpha_v_u for all u that can reach s in D (lines 17-22)
                alpha_v_s = A[(v,s)]
                dA = computeAlpha(D, Ew, S, s, val=-alpha_v_s)
                for (s,u) in dA:
                    if u not in S + [s]: # don't update IncInf if it's already in S
                        A[(v,u)] += dA[(s,u)]
                        priority, _, _ = IncInf.entry_finder[u] # find previous value of incremental influence of u
                        IncInf.add_task(u, priority - dA[(s,u)]*(1 - ap[(v,u)])) # and update it accordingly
                # update ap_v_u for all u reachable from s in D (liens 23-28)
                dap = computeActProb(D, Ew, S + [s], s, val=1-ap[(v,s)])
                for (s,u) in dap:
                    if u not in S + [s]:
                        ap[(v,u)] += dap[(s,u)]
                        priority, _, _ = IncInf.entry_finder[u] # find previous value of incremental influence of u
                        IncInf.add_task(u, priority + A[(v,u)]*dap[(s,u)]) # and update it accordingly
        S.append(s)
    return S