def effect_of_group_sizes(self):
        '''
        This generate the evaluation graphs for

        ii) varrying p_g_a
        '''
        influenced_a_list = []
        influenced_b_list = []
        seeds_a_list = []
        seeds_b_list = []
        seed_list = [11223344, 11224433, 33112244, 22113344]
        for group_ratio in self.group_ratios:
            # group_ratio = 0.5 #0.7
            # A loop here to run multiple times on 5 seeds
            # for seed in SEED_list:
            filename = '{self.filename}_{self.num_nodes}_{self.p_with}_{self.p_across}_{group_ratio}'

            # read in graph
            G = ut.load_graph(filename, self.p_with, self.p_across,
                              group_ratio, self.num_nodes)

            influenced, influenced_a, influenced_b, seeds_a, seeds_b = self.calculate_greedy(
                filename, G)

            stats = ut.graph_stats(G, print_stats=True)

            influenced_a_list.append(influenced_a)
            influenced_b_list.append(influenced_b)
            seeds_a_list.append(seeds_a)
            seeds_b_list.append(seeds_b)

        print(" ******* Finished group size analysis *******")

        return (influenced_a_list, influenced_b_list, seeds_a_list,
                seeds_b_list)
    def compare_with_greedy(self):
        '''
        compares greedy with log [with different gammas]
        and root with different gammas
        '''
        influenced_a_list = []
        influenced_b_list = []
        labels = []
        filename = '{self.filename}_{self.num_nodes}_{self.p_with}_{self.p_across}_{self.group_ratio}'
        # self.G = ut.load_graph(filename, self.p_with, self.p_across,  group_ratio ,self.num_nodes)

        stats = ut.graph_stats(self.G, print_stats=False)
        for t in self.types:
            if t == 1:
                gammas = [1.0]  # , 1.1, 1.2, 1.3, 1.4, 1.5, 2.0, 2.5]
            elif t == 2:
                gammas = self.gammas_log
            elif t == 3:
                gammas = self.gammas_root

            for gamma in gammas:

                influenced, influenced_a, influenced_b, seeds_a, seeds_b = generalGreedy_node_parallel(
                    filename, self.G, self.seed_size, gamma, type_algo=t)

                ut.plot_influence(influenced_a, influenced_b, self.seed_size,
                                  filename, stats['group_a'], stats['group_b'],
                                  [len(S_a) for S_a in seeds_a],
                                  [len(S_b) for S_b in seeds_b])
                influenced_a_list.append(influenced_a)
                influenced_b_list.append(influenced_b)

                if t == 1:
                    label = "Greedy"
                elif t == 2:
                    label = 'Log_gamma{gamma}'
                elif t == 3:
                    label = 'Root_gamma{gamma}'

                labels.append(label)

        filename = "results/greedy_log_root_"
        ut.plot_influence_diff(influenced_a_list, influenced_b_list,
                               self.seed_size, labels, filename,
                               stats['group_a'], stats['group_b'])
    def effect_of_across_group_connectivity(self):
        '''
        This generate the evaluation graphs for
        i) varrying p_across with p_g_a = 0.5

        '''
        # Have to do this for multiple runs, and or multiple graphs
        influenced_a_list = []
        influenced_b_list = []
        seeds_a_list = []
        seeds_b_list = []
        group_ratio = 0.5  # just to bring out the effect of p_across

        for p_across in self.p_acrosses:
            filename = '{self.filename}_{self.num_nodes}_{self.p_with}_{p_across}_{group_ratio}'

            # read in graph
            G = ut.load_graph(filename, self.p_with, p_across, group_ratio,
                              self.num_nodes)

            influenced, influenced_a, influenced_b, seeds_a, seeds_b = self.calculate_greedy(
                filename)  #

            stats = ut.graph_stats(G, print_stats=True)

            ut.plot_influence(influenced_a, influenced_b, self.seed_size,
                              filename, stats['group_a'], stats['group_b'],
                              [len(S_a) for S_a in seeds_a],
                              [len(S_b) for S_b in seeds_b])

            influenced_a_list.append(influenced_a)
            influenced_b_list.append(influenced_b)
            seeds_a_list.append(seeds_a)
            seeds_b_list.append(seeds_b)

        print(" ******* Finished connectivity analysis *******")

        return (influenced_a_list, influenced_b_list, seeds_a_list,
                seeds_b_list)
Exemplo n.º 4
0
def generalGreedy_node_set_cover(filename,
                                 G,
                                 budget,
                                 h_l=0,
                                 color='all',
                                 seed_size_budget=14,
                                 gamma_a=1e-2,
                                 gamma_b=0,
                                 type_algo=1):
    ''' Finds initial seed set S using general greedy heuristic
    Input: G -- networkx Graph object
    k -- fraction of population needs to be influenced in all three groups
    p -- propagation probability
    Output: S -- initial set of k nodes to propagate
    '''
    # import time
    # start = time.time()
    # R = 200 # number of times to run Random Cascade

    stats = ut.graph_stats(G, print_stats=False)

    if type_algo == 1:
        filename = filename + '_set_cover_reach_' + str(budget)
    elif type_algo == 2:
        filename = filename + '_set_cover_timings_reach_{budget}_gamma_a_{gamma_a}_gamma_b_{gamma_b}_'
    elif type_algo == 3:
        filename = filename + '_set_cover_timings_reach_{budget}_gamma_a_{gamma_a}_gamma_b_{gamma_a}_'

    reach = 0.0
    S = []  # set of selected nodes
    # add node to S if achieves maximum propagation for current chosen + this node
    influenced = []
    influenced_r = []
    influenced_b = []
    influenced_n = []
    seeds_r = []
    seeds_b = []
    seeds_n = []

    # try:
    #
    #     influenced, influenced_r, influenced_b, influenced_n, seeds_r, seeds_b, seeds_n = ut.read_files(filename)
    #     reach = min(influenced_r[-1] / stats['group_r'], budget) + min(influenced_b[-1] / stats['group_b'])+ min(influenced_n[-1] / stats['group_r'], budget)
    #     S = seeds_r[-1] + seeds_b[-1]+ seeds_n[-1]
    #     if reach >= budget:
    #         # ut.write_files(filename,influenced, influenced_a, influenced_b, seeds_a, seeds_b)
    #         print(influenced_r)
    #         print("\n\n")
    #         print(influenced_b)
    #         print("\n\n")
    #         print(influenced_n)
    #         print(f" reach: {reach}")
    #         ut.plot_influence(influenced_r, influenced_b, influenced_n, len(S), filename, stats['group_a'], stats['group_b'], stats['group_c'],
    #                           [len(S_a) for S_a in seeds_r], [len(S_b) for S_b in seeds_b], [len(S_c) for S_c in seeds_n])
    #         return (influenced, influenced_r, influenced_b, influenced_n, seeds_r, seeds_b, seeds_n)
    #
    # except FileNotFoundError:
    #     print(f'{filename} not Found ')

    i = 0
    S = []
    while reach < 3 * budget:
        # while len(S) < seed_size_budget:  # cannot parallellize

        pool = multiprocessing.Pool(multiprocessing.cpu_count() - 1)
        # pool = multiprocessing.Pool(1)

        # for v in G.nodes():
        #     results = pool.map(map_select_next_seed_set_cover, (G, S, v))

        if type_algo == 1:
            # results = pool.map(map_select_next_seed_set_cover, ((G, S, v) for v in G.nodes()))
            # results = pool.starmap(map_select_next_seed_set_cover, zip(repeat(G), repeat(S), list(G.nodes()),repeat(h_l), repeat(color)))
            results = pool.map(map_select_next_seed_set_cover,
                               ((G, S, v, h_l, color) for v in G.nodes()))
        elif type_algo == 2:
            results = pool.map(map_IC_timing, ((G, S, v, gamma_a, gamma_b)
                                               for v in G.nodes()))
        elif type_algo == 3:
            results = pool.map(map_IC_timing, ((G, S, v, gamma_a, gamma_a)
                                               for v in G.nodes()))

        pool.close()
        pool.join()

        s = PQ()  # priority queue
        for v, p, p_a, p_b, p_c in results:  #
            # s.add_task(v, -(min(p_a / stats['group_r'], budget) + min(p_b / stats['group_b'], budget)))
            s.add_task(
                v, -(min(p_a / stats['group_r'], budget) +
                     min(p_b / stats['group_b'], budget) +
                     min(p_b / stats['group_n'], budget)))

        node, priority = s.pop_item()
        # priority = -priority # as the current priority is negative fraction
        S.append(node)

        # results = map_select_next_seed_set_cover, ((G, S, v) for v in G.nodes())

        I, I_a, I_b, I_c = map_fair_IC((G, S, h_l))
        influenced.append(I)
        influenced_r.append(I_a)
        influenced_b.append(I_b)
        influenced_n.append(I_c)
        S_red = []
        S_blue = []
        S_purple = []
        group = G.nodes[node]['color']

        for n in S:
            if G.nodes[n]['color'] == 'red':
                S_red.append(n)
            elif G.nodes[n]['color'] == 'blue':
                S_blue.append(n)
            else:
                S_purple.append(n)

        seeds_r.append(
            S_red)  # id's of the seeds so the influence can be recreated
        seeds_b.append(S_blue)
        seeds_n.append(S_purple)

        # reach += -priority both are fine
        reach_a = I_a / stats['group_r']
        reach_b = I_b / stats['group_b']
        reach_c = I_c / stats['group_n']
        reach = (min(reach_a, budget) + min(reach_b, budget) +
                 min(reach_c, budget))

        print(
            str(i + 1) + ' Node ID ' + str(node) + ' group ' + str(group) +
            ' Ia  = ' + str(I_a) + ' Ib ' + str(I_b) + ' Ic ' + str(I_c) +
            ' each: ' + str(reach) + ' reach_a ' + str(reach_a) + ' reach_b ' +
            str(reach_b) + ' reach_c ' + str(reach_c))
        # print(i, k, time.time() - start)
        i += 1

    # ut.plot_influence(influenced_r, influenced_b, influenced_n, len(S), filename, stats['group_r'], stats['group_b'], stats['group_n'],
    #                   [len(S_r) for S_r in seeds_r], [len(S_b) for S_b in seeds_b], [len(S_n) for S_n in seeds_n])

    # ut.plot_influence_diff(influenced_r, influenced_b, influenced_n, len(S), ['Rep','Dem','Neut'], filename,
    #                     stats['group_r'], stats['group_b'], stats['group_n'])

    ut.write_files(filename, influenced, influenced_r, influenced_b,
                   influenced_n, seeds_r, seeds_b, seeds_n)

    return (influenced, influenced_r, influenced_b, influenced_n, seeds_r,
            seeds_b, seeds_n)
Exemplo n.º 5
0
def generalGreedy_node_parallel(filename,
                                G,
                                budget,
                                h_l,
                                gamma1,
                                gamma2,
                                beta1=1.0,
                                beta2=1.0,
                                type_algo=1):
    ''' Finds initial seed set S using general greedy heuristic
    Input: G -- networkx Graph object
    k -- number of initial nodes needed
    p -- propagation probability
    Output: S -- initial set of k nodes to propagate
    '''
    # import time
    # start = time.time()
    # R = 200 # number of times to run Random Cascade
    S = []  # set of selected nodes
    influenced = []
    influenced_a = []
    influenced_b = []
    influenced_c = []
    seeds_a = []
    seeds_b = []
    seeds_c = []
    seed_range = []
    if type_algo == 1:
        filename = filename + '_greedy_'

    elif type_algo == 2:
        filename = filename + '_log_gamma_{gamma1,gamma2}_'

    elif type_algo == 3:
        filename = filename + '_root_gamma_{gamma1}_beta_{beta1,beta2}_'

    elif type_algo == 4:
        filename = filename + '_root_majority_gamma_{gamma1}_beta_{beta1,beta2}_'

    stats = ut.graph_stats(G, print_stats=False)

    try:

        influenced, influenced_a, influenced_b, influenced_c, seeds_a, seeds_b, seeds_c = ut.read_files(
            filename)
        S = seeds_a[-1] + seeds_b[-1] + seeds_c[-1]

        if len(S) >= budget:
            # ut.write_files(filename,influenced, influenced_a, influenced_b, seeds_a, seeds_b)
            print(influenced_a)
            print("\n\n")
            print(influenced_b)
            print("\n\n")
            print(influenced_c)
            print(" Seed length ", len(S))

            ut.plot_influence(influenced_a, influenced_b, influenced_c, len(S),
                              filename, stats['group_a'], stats['group_b'],
                              stats['group_c'], [len(S_a) for S_a in seeds_a],
                              [len(S_b) for S_b in seeds_b],
                              [len(S_c) for S_c in seeds_c])

            return (influenced, influenced_a, influenced_b, influenced_c,
                    seeds_a, seeds_b, seeds_c)
        else:
            seed_range = range(budget - len(S))

    except FileNotFoundError:
        print('{filename} not Found ')

        seed_range = range(budget)

    # add node to S if achieves maximum propagation for current chosen + this node
    for i in seed_range:  # cannot parallellize

        pool = multiprocessing.Pool(multiprocessing.cpu_count())
        # results = None
        if type_algo == 1:
            results = pool.starmap(
                map_select_next_seed_set_cover,
                zip(repeat(G), repeat(S), list(G.nodes()), repeat(h_l)))
            # results = pool.map(map_select_next_seed_greedy, ((G, S, v,h_l) for v in G.nodes()))
        elif type_algo == 2:
            results = pool.map(map_select_next_seed_log_greedy,
                               ((G, S, v, gamma1, gamma2) for v in G.nodes()))
        elif type_algo == 3:
            results = pool.map(map_select_next_seed_root_greedy,
                               ((G, S, v, gamma1, beta1, beta2)
                                for v in G.nodes()))
        elif type_algo == 4:
            results = pool.map(map_select_next_seed_root_majority_greedy,
                               ((G, S, v, gamma1) for v in G.nodes()))

        pool.close()
        pool.join()

        s = PQ()  # priority queue
        # if results == None:

        for v, priority, p_a, p_b, p_c in results:  # run R times Random Cascade The gain of parallelizing isn't a lot as the one runIC is not very complex maybe for huge graphs
            s.add_task(v, -priority)

        node, priority = s.pop_item()
        S.append(node)
        I, I_a, I_b, I_c = map_fair_IC((G, S, h_l))
        influenced.append(I)
        influenced_a.append(I_a)
        influenced_b.append(I_b)
        influenced_c.append(I_c)
        S_red = []
        S_blue = []
        S_purple = []
        group = G.nodes[node]['color']
        print(
            str(i + 1) + ' Selected Node is ' + str(node) + ' group ' +
            str(group) + ' Ia = ' + str(I_a) + ' Ib = ' + str(I_b) + ' Ic = ' +
            str(I_c))
        for n in S:
            if G.nodes[n]['color'] == 'red':
                S_red.append(n)
            if G.nodes[n]['color'] == 'blue':
                S_blue.append(n)
            else:
                S_purple.append(n)

        seeds_a.append(
            S_red)  # id's of the seeds so the influence can be recreated
        seeds_b.append(S_blue)
        seeds_c.append(S_purple)
        # print(i, k, time.time() - start)
    # print ( "\n \n  I shouldn't be here.   ********* \n \n ")
    ut.plot_influence(influenced_a, influenced_b, influenced_c, len(S),
                      filename, stats['group_r'], stats['group_b'],
                      stats['group_n'], [len(S_a) for S_a in seeds_a],
                      [len(S_b)
                       for S_b in seeds_b], [len(S_c) for S_c in seeds_c])

    ut.write_files(filename, influenced, influenced_a, influenced_b,
                   influenced_c, seeds_a, seeds_b, seeds_c)

    return (influenced, influenced_a, influenced_b, influenced_c, seeds_a,
            seeds_b, seeds_c)
Exemplo n.º 6
0
    influenced_a_list = []
    influenced_b_list = []
    labels = []
    seed_size = 30
    for p_across in p_acrosses:
        group_ratios = [0.7]  #,0.5,0.55, 0.6, 0.65]
        for group_ratio in group_ratios:
            #group_ratio = 0.5 #0.7

            filename = f'results/synthetic_data_{num_nodes}_{p_with}_{p_across}_{group_ratio}'

            # read in graph
            G = ut.load_graph(filename, p_with, p_across, group_ratio,
                              num_nodes)

            ut.graph_stats(G)

            gammas = [1.0, 1.1, 1.2, 1.3, 1.4, 1.5, 2.0, 2.5]
            #gamma = 2.5
            types = [1, 2]
            for t in types:
                if t == 1:
                    gammas = [1.0]  #, 1.1, 1.2, 1.3, 1.4, 1.5, 2.0, 2.5]
                elif t == 2:
                    gammas = [1.0, 1.1, 1.2, 1.3, 1.4, 1.5, 2.0, 2.5]

                for gamma in gammas:
                    influenced, influenced_a, influenced_b = generalGreedy_node_parallel(
                        filename, G, seed_size, gamma, type_algo=t)
                    influenced_a_list.append(influenced_a)
                    influenced_b_list.append(influenced_b)
Exemplo n.º 7
0
def generalGreedy_node_set_cover(filename,
                                 G,
                                 budget,
                                 gamma_a=1e-2,
                                 gamma_b=0,
                                 type_algo=1):
    ''' Finds initial seed set S using general greedy heuristic
    Input: G -- networkx Graph object
    k -- fraction of population needs to be influenced in both groups 
    p -- propagation probability
    Output: S -- initial set of k nodes to propagate
    '''
    #import time
    #start = time.time()
    #R = 200 # number of times to run Random Cascade

    stats = ut.graph_stats(G, print_stats=False)

    if type_algo == 1:
        filename = filename + f'_set_cover_reach_{budget}_'
    elif type_algo == 2:
        filename = filename + f'_set_cover_timings_reach_{budget}_gamma_a_{gamma_a}_gamma_b_{gamma_b}_'
    elif type_algo == 3:
        filename = filename + f'_set_cover_timings_reach_{budget}_gamma_a_{gamma_a}_gamma_b_{gamma_a}_'

    reach = 0.0
    S = []  # set of selected nodes
    # add node to S if achieves maximum propagation for current chosen + this node
    influenced = []
    influenced_a = []
    influenced_b = []
    seeds_a = []
    seeds_b = []

    try:

        influenced, influenced_a, influenced_b, seeds_a, seeds_b = ut.read_files(
            filename)
        reach = min(influenced_a[-1] / stats['group_a'], budget) + min(
            influenced_b[-1] / stats['group_b'], budget)
        S = seeds_a[-1] + seeds_b[-1]
        if reach >= budget:
            #ut.write_files(filename,influenced, influenced_a, influenced_b, seeds_a, seeds_b)
            print(influenced_a)
            print("\n\n")
            print(influenced_b)
            print(f" reach: {reach}")
            ut.plot_influence(influenced_a, influenced_b, len(S), filename,
                              stats['group_a'], stats['group_b'],
                              [len(S_a) for S_a in seeds_a],
                              [len(S_b) for S_b in seeds_b])
            return (influenced, influenced_a, influenced_b, seeds_a, seeds_b)

    except FileNotFoundError:
        print(f'{filename} not Found ')

    i = 0
    while reach < 2 * budget:  # cannot parallellize

        pool = multiprocessing.Pool(multiprocessing.cpu_count() - 1)

        if type_algo == 1:
            results = pool.map(map_select_next_seed_set_cover,
                               ((G, S, v) for v in G.nodes()))
        elif type_algo == 2:
            results = pool.map(map_IC_timing, ((G, S, v, gamma_a, gamma_b)
                                               for v in G.nodes()))
        elif type_algo == 3:
            results = pool.map(map_IC_timing, ((G, S, v, gamma_a, gamma_a)
                                               for v in G.nodes()))

        pool.close()
        pool.join()

        s = PQ()  # priority queue
        for v, p, p_a, p_b in results:  #
            s.add_task(
                v, -(min(p_a / stats['group_a'], budget) +
                     min(p_b / stats['group_b'], budget)))

        node, priority = s.pop_item()
        #priority = -priority # as the current priority is negative fraction
        S.append(node)

        I, I_a, I_b = map_fair_IC((G, S))
        influenced.append(I)
        influenced_a.append(I_a)
        influenced_b.append(I_b)
        S_red = []
        S_blue = []
        group = G.nodes[node]['color']

        for n in S:
            if G.nodes[n]['color'] == 'red':
                S_red.append(n)
            else:
                S_blue.append(n)

        seeds_a.append(
            S_red)  # id's of the seeds so the influence can be recreated
        seeds_b.append(S_blue)

        #reach += -priority both are fine
        reach_a = I_a / stats['group_a']
        reach_b = I_b / stats['group_b']
        reach = (min(reach_a, budget) + min(reach_b, budget))

        print(
            f'{i+1} Node ID {node} group {group} Ia = {I_a} Ib {I_b} reach: {reach} reach_a {reach_a} reach_b {reach_b}'
        )
        #print(i, k, time.time() - start)
        i += 1

    ut.plot_influence(influenced_a, influenced_b, len(S), filename,
                      stats['group_a'], stats['group_b'],
                      [len(S_a)
                       for S_a in seeds_a], [len(S_b) for S_b in seeds_b])

    ut.write_files(filename, influenced, influenced_a, influenced_b, seeds_a,
                   seeds_b)

    return (influenced, influenced_a, influenced_b, seeds_a, seeds_b)