示例#1
0
    def setUpClass(cls):
        cls.maxDiff = 2000
        # we create the state in a different dir from the one we run our tests
        # on, to verify that the saved state does not depend on any absolute
        # paths
        init_dir = tempfile.mkdtemp()
        cls.repo_dir = tempfile.mkdtemp()
        cls.saved_state_dir = tempfile.mkdtemp()

        touch(os.path.join(init_dir, '.hhconfig'))

        cls.files = {}

        cls.files['foo_1.php'] = """
        <?hh
        function f() {
            return g() + 1;
        }
        """

        cls.files['foo_2.php'] = """
        <?hh
        function g(): string {
            return "a";
        }
        """

        cls.files['foo_3.php'] = """
        <?hh
        function h(): string {
            return 1;
        }

        class Foo {}

        function some_long_function_name() {
            new Foo();
            h();
        }
        """

        cls.initial_errors = [
            '{root}foo_1.php:4:20,22: Typing error (Typing[4110])',
            '  {root}foo_1.php:4:20,22: This is a num (int/float) because this is used in an arithmetic operation',
            '  {root}foo_2.php:3:23,28: It is incompatible with a string',
            '{root}foo_3.php:4:20,20: Invalid return type (Typing[4110])',
            '  {root}foo_3.php:3:23,28: This is a string',
            '  {root}foo_3.php:4:20,20: It is incompatible with an int',
        ]

        write_files(cls.files, init_dir)
        write_files(cls.files, cls.repo_dir)

        subprocess.call([
            cls.hh_server,
            '--check', init_dir,
            '--save', os.path.join(cls.saved_state_dir, 'foo'),
        ])

        shutil.rmtree(init_dir)
示例#2
0
    def setUpClass(cls):
        cls.maxDiff = 2000
        # we create the state in a different dir from the one we run our tests
        # on, to verify that the saved state does not depend on any absolute
        # paths
        init_dir = tempfile.mkdtemp()
        cls.repo_dir = tempfile.mkdtemp()
        cls.saved_state_dir = tempfile.mkdtemp()

        touch(os.path.join(init_dir, '.hhconfig'))

        cls.files = {}

        cls.files['foo_1.php'] = """
        <?hh
        function f() {
            return g() + 1;
        }
        """

        cls.files['foo_2.php'] = """
        <?hh
        function g(): string {
            return "a";
        }
        """

        cls.files['foo_3.php'] = """
        <?hh
        function h(): string {
            return 1;
        }

        class Foo {}

        function some_long_function_name() {
            new Foo();
            h();
        }
        """

        cls.initial_errors = [
            '{root}foo_1.php:4:20,22: Typing error (Typing[4110])',
            '  {root}foo_1.php:4:20,22: This is a num (int/float) because this is used in an arithmetic operation',
            '  {root}foo_2.php:3:23,28: It is incompatible with a string',
            '{root}foo_3.php:4:20,20: Invalid return type (Typing[4110])',
            '  {root}foo_3.php:3:23,28: This is a string',
            '  {root}foo_3.php:4:20,20: It is incompatible with an int',
        ]

        write_files(cls.files, init_dir)
        write_files(cls.files, cls.repo_dir)

        subprocess.call([
            cls.hh_server,
            '--check', init_dir,
            '--save', os.path.join(cls.saved_state_dir, 'foo'),
        ])

        shutil.rmtree(init_dir)
    def test_kmedoids(self, emb_filename, res_filename, budget):

        print(res_filename)

        # stats = ut.graph_stats(self.G, print_stats=False)
        v, em = ut.load_embeddings(emb_filename, self.G.nodes())

        influenced, influenced_grouped = [], []
        seeds = []
        for k in range(1, budget + 1):
            print('--------', k)
            S = ut.get_kmedoids_centers(em, k, v)

            I, I_grouped = map_fair_IC((self.G, S))
            influenced.append(I)
            influenced_grouped.append(I_grouped)

            S_g = {
                c: []
                for c in np.unique(
                    [self.G.nodes[v]['color'] for v in self.G.nodes])
            }
            for n in S:
                c = self.G.nodes[n]['color']
                S_g[c].append(n)

            seeds.append(
                S_g)  # id's of the seeds so the influence can be recreated

        ut.write_files(res_filename, influenced, influenced_grouped, seeds)
示例#4
0
    def setUpClass(cls):
        cls.maxDiff = 2000
        # we create the state in a different dir from the one we run our tests
        # on, to verify that the saved state does not depend on any absolute
        # paths
        init_dir = tempfile.mkdtemp()
        cls.repo_dir = tempfile.mkdtemp()
        cls.config_path = os.path.join(cls.repo_dir, '.hhconfig')
        cls.tmp_dir = tempfile.mkdtemp()
        cls.hh_tmp_dir = tempfile.mkdtemp()
        cls.saved_state_name = 'foo'
        cls.test_env = dict(os.environ, **{
            'HH_TEST_MODE': '1',
            'HH_TMPDIR': cls.hh_tmp_dir,
            'PATH': '%s:/bin:/usr/bin:/usr/local/bin' % cls.tmp_dir,
            'OCAMLRUNPARAM': 'b',
            })

        with open(os.path.join(init_dir, '.hhconfig'), 'w') as f:
            f.write(r"""
# some comment
assume_php = false""")

        cls.files = {}

        cls.files['foo_1.php'] = """
        <?hh
        function f() {
            return g() + 1;
        }
        """

        cls.files['foo_2.php'] = """
        <?hh
        function g(): int {
            return 0;
        }
        """

        cls.files['foo_3.php'] = """
        <?hh
        function h(): string {
            return "a";
        }

        class Foo {}

        function some_long_function_name() {
            new Foo();
            h();
        }
        """

        write_files(cls.files, init_dir)
        write_files(cls.files, cls.repo_dir)

        cls.save_command(init_dir)

        shutil.rmtree(init_dir)
示例#5
0
    def setUpClass(cls):
        cls.maxDiff = 2000
        # we create the state in a different dir from the one we run our tests
        # on, to verify that the saved state does not depend on any absolute
        # paths
        init_dir = tempfile.mkdtemp()
        cls.repo_dir = tempfile.mkdtemp()
        cls.config_path = os.path.join(cls.repo_dir, '.hhconfig')
        cls.tmp_dir = tempfile.mkdtemp()
        cls.hh_tmp_dir = tempfile.mkdtemp()
        cls.saved_state_name = 'foo'
        cls.test_env = dict(
            os.environ, **{
                'HH_TEST_MODE': '1',
                'HH_TMPDIR': cls.hh_tmp_dir,
                'PATH': '%s:/bin:/usr/bin' % cls.tmp_dir,
            })

        with open(os.path.join(init_dir, '.hhconfig'), 'w') as f:
            f.write(r"""
# some comment
assume_php = false""")

        cls.files = {}

        cls.files['foo_1.php'] = """
        <?hh
        function f() {
            return g() + 1;
        }
        """

        cls.files['foo_2.php'] = """
        <?hh
        function g(): int {
            return 0;
        }
        """

        cls.files['foo_3.php'] = """
        <?hh
        function h(): string {
            return "a";
        }

        class Foo {}

        function some_long_function_name() {
            new Foo();
            h();
        }
        """

        write_files(cls.files, init_dir)
        write_files(cls.files, cls.repo_dir)

        cls.save_command(init_dir)

        shutil.rmtree(init_dir)
示例#6
0
    def setUpClass(cls):
        cls.maxDiff = 2000
        # we create the state in a different dir from the one we run our tests
        # on, to verify that the saved state does not depend on any absolute
        # paths
        init_dir = tempfile.mkdtemp()
        cls.repo_dir = tempfile.mkdtemp()
        cls.config_path = os.path.join(cls.repo_dir, '.hhconfig')
        cls.saved_state_dir = tempfile.mkdtemp()
        cls.saved_state_name = 'foo'

        with open(os.path.join(init_dir, '.hhconfig'), 'w') as f:
            f.write(r"""
# some comment
assume_php = false""")

        cls.files = {}

        cls.files['foo_1.php'] = """
        <?hh
        function f() {
            return g() + 1;
        }
        """

        cls.files['foo_2.php'] = """
        <?hh
        function g(): int {
            return 0;
        }
        """

        cls.files['foo_3.php'] = """
        <?hh
        function h(): string {
            return "a";
        }

        class Foo {}

        function some_long_function_name() {
            new Foo();
            h();
        }
        """

        write_files(cls.files, init_dir)
        write_files(cls.files, cls.repo_dir)

        cls.save_command(init_dir)

        shutil.rmtree(init_dir)
示例#7
0
def correlate():
    """
    correlate reads the predownloaded sites.csv and images.csv files and correlates
    the images to the nearest city. It will also write all the images that are correlated to
    a city as <city_name>_challenge.csv in result directory.
    """
    city_map = utils.file_reader(sites_file)
    image_map = utils.file_reader(images_file)
    for img_coord, img_name in image_map.items():
        city = get_closest_match(img_coord, img_name[1], city_map)
        file_name = str(city[0]) + "_challenge.csv"
        utils.write_files(file_name, img_name[0], img_coord, img_name[1])
        print("RESULT: ", img_coord, "IMAGE_NAME: ", img_name[0], " ",
              "IMAGE_DATE: ", img_name[1], "CITY: ", city)
示例#8
0
    def setUpClass(cls):
        cls.maxDiff = 2000
        cls.repo_dir = tempfile.mkdtemp()
        cls.saved_state_dir = tempfile.mkdtemp()

        touch(os.path.join(cls.repo_dir, '.hhconfig'))

        cls.files = {}

        cls.files['foo_1.php'] = """
        <?hh
        function f() {
            return g() + 1;
        }
        """

        cls.files['foo_2.php'] = """
        <?hh
        function g(): string {
            return "a";
        }
        """

        cls.files['foo_3.php'] = """
        <?hh
        function h(): string {
            return 1;
        }
        """

        cls.initial_errors = [
            'foo_1.php:4:20,22: Typing error (Typing[4110])',
            '  foo_1.php:4:20,22: This is a num (int/float) because this is used in an arithmetic operation',
            '  foo_2.php:3:23,28: It is incompatible with a string',
            'foo_3.php:4:20,20: Invalid return type (Typing[4110])',
            '  foo_3.php:3:23,28: This is a string',
            '  foo_3.php:4:20,20: It is incompatible with an int',
        ]

        write_files(cls.files, cls.repo_dir)

        subprocess.call([
            cls.hh_server,
            '--check', cls.repo_dir,
            '--save', os.path.join(cls.saved_state_dir, 'foo'),
        ])
示例#9
0
 def setUp(self):
     write_files(self.files, self.repo_dir)
示例#10
0
def generalGreedy_node_set_cover(filename,
                                 G,
                                 budget,
                                 h_l=0,
                                 color='all',
                                 seed_size_budget=14,
                                 gamma_a=1e-2,
                                 gamma_b=0,
                                 type_algo=1):
    ''' Finds initial seed set S using general greedy heuristic
    Input: G -- networkx Graph object
    k -- fraction of population needs to be influenced in all three groups
    p -- propagation probability
    Output: S -- initial set of k nodes to propagate
    '''
    # import time
    # start = time.time()
    # R = 200 # number of times to run Random Cascade

    stats = ut.graph_stats(G, print_stats=False)

    if type_algo == 1:
        filename = filename + '_set_cover_reach_' + str(budget)
    elif type_algo == 2:
        filename = filename + '_set_cover_timings_reach_{budget}_gamma_a_{gamma_a}_gamma_b_{gamma_b}_'
    elif type_algo == 3:
        filename = filename + '_set_cover_timings_reach_{budget}_gamma_a_{gamma_a}_gamma_b_{gamma_a}_'

    reach = 0.0
    S = []  # set of selected nodes
    # add node to S if achieves maximum propagation for current chosen + this node
    influenced = []
    influenced_r = []
    influenced_b = []
    influenced_n = []
    seeds_r = []
    seeds_b = []
    seeds_n = []

    # try:
    #
    #     influenced, influenced_r, influenced_b, influenced_n, seeds_r, seeds_b, seeds_n = ut.read_files(filename)
    #     reach = min(influenced_r[-1] / stats['group_r'], budget) + min(influenced_b[-1] / stats['group_b'])+ min(influenced_n[-1] / stats['group_r'], budget)
    #     S = seeds_r[-1] + seeds_b[-1]+ seeds_n[-1]
    #     if reach >= budget:
    #         # ut.write_files(filename,influenced, influenced_a, influenced_b, seeds_a, seeds_b)
    #         print(influenced_r)
    #         print("\n\n")
    #         print(influenced_b)
    #         print("\n\n")
    #         print(influenced_n)
    #         print(f" reach: {reach}")
    #         ut.plot_influence(influenced_r, influenced_b, influenced_n, len(S), filename, stats['group_a'], stats['group_b'], stats['group_c'],
    #                           [len(S_a) for S_a in seeds_r], [len(S_b) for S_b in seeds_b], [len(S_c) for S_c in seeds_n])
    #         return (influenced, influenced_r, influenced_b, influenced_n, seeds_r, seeds_b, seeds_n)
    #
    # except FileNotFoundError:
    #     print(f'{filename} not Found ')

    i = 0
    S = []
    while reach < 3 * budget:
        # while len(S) < seed_size_budget:  # cannot parallellize

        pool = multiprocessing.Pool(multiprocessing.cpu_count() - 1)
        # pool = multiprocessing.Pool(1)

        # for v in G.nodes():
        #     results = pool.map(map_select_next_seed_set_cover, (G, S, v))

        if type_algo == 1:
            # results = pool.map(map_select_next_seed_set_cover, ((G, S, v) for v in G.nodes()))
            # results = pool.starmap(map_select_next_seed_set_cover, zip(repeat(G), repeat(S), list(G.nodes()),repeat(h_l), repeat(color)))
            results = pool.map(map_select_next_seed_set_cover,
                               ((G, S, v, h_l, color) for v in G.nodes()))
        elif type_algo == 2:
            results = pool.map(map_IC_timing, ((G, S, v, gamma_a, gamma_b)
                                               for v in G.nodes()))
        elif type_algo == 3:
            results = pool.map(map_IC_timing, ((G, S, v, gamma_a, gamma_a)
                                               for v in G.nodes()))

        pool.close()
        pool.join()

        s = PQ()  # priority queue
        for v, p, p_a, p_b, p_c in results:  #
            # s.add_task(v, -(min(p_a / stats['group_r'], budget) + min(p_b / stats['group_b'], budget)))
            s.add_task(
                v, -(min(p_a / stats['group_r'], budget) +
                     min(p_b / stats['group_b'], budget) +
                     min(p_b / stats['group_n'], budget)))

        node, priority = s.pop_item()
        # priority = -priority # as the current priority is negative fraction
        S.append(node)

        # results = map_select_next_seed_set_cover, ((G, S, v) for v in G.nodes())

        I, I_a, I_b, I_c = map_fair_IC((G, S, h_l))
        influenced.append(I)
        influenced_r.append(I_a)
        influenced_b.append(I_b)
        influenced_n.append(I_c)
        S_red = []
        S_blue = []
        S_purple = []
        group = G.nodes[node]['color']

        for n in S:
            if G.nodes[n]['color'] == 'red':
                S_red.append(n)
            elif G.nodes[n]['color'] == 'blue':
                S_blue.append(n)
            else:
                S_purple.append(n)

        seeds_r.append(
            S_red)  # id's of the seeds so the influence can be recreated
        seeds_b.append(S_blue)
        seeds_n.append(S_purple)

        # reach += -priority both are fine
        reach_a = I_a / stats['group_r']
        reach_b = I_b / stats['group_b']
        reach_c = I_c / stats['group_n']
        reach = (min(reach_a, budget) + min(reach_b, budget) +
                 min(reach_c, budget))

        print(
            str(i + 1) + ' Node ID ' + str(node) + ' group ' + str(group) +
            ' Ia  = ' + str(I_a) + ' Ib ' + str(I_b) + ' Ic ' + str(I_c) +
            ' each: ' + str(reach) + ' reach_a ' + str(reach_a) + ' reach_b ' +
            str(reach_b) + ' reach_c ' + str(reach_c))
        # print(i, k, time.time() - start)
        i += 1

    # ut.plot_influence(influenced_r, influenced_b, influenced_n, len(S), filename, stats['group_r'], stats['group_b'], stats['group_n'],
    #                   [len(S_r) for S_r in seeds_r], [len(S_b) for S_b in seeds_b], [len(S_n) for S_n in seeds_n])

    # ut.plot_influence_diff(influenced_r, influenced_b, influenced_n, len(S), ['Rep','Dem','Neut'], filename,
    #                     stats['group_r'], stats['group_b'], stats['group_n'])

    ut.write_files(filename, influenced, influenced_r, influenced_b,
                   influenced_n, seeds_r, seeds_b, seeds_n)

    return (influenced, influenced_r, influenced_b, influenced_n, seeds_r,
            seeds_b, seeds_n)
示例#11
0
def generalGreedy_node_parallel(filename,
                                G,
                                budget,
                                h_l,
                                gamma1,
                                gamma2,
                                beta1=1.0,
                                beta2=1.0,
                                type_algo=1):
    ''' Finds initial seed set S using general greedy heuristic
    Input: G -- networkx Graph object
    k -- number of initial nodes needed
    p -- propagation probability
    Output: S -- initial set of k nodes to propagate
    '''
    # import time
    # start = time.time()
    # R = 200 # number of times to run Random Cascade
    S = []  # set of selected nodes
    influenced = []
    influenced_a = []
    influenced_b = []
    influenced_c = []
    seeds_a = []
    seeds_b = []
    seeds_c = []
    seed_range = []
    if type_algo == 1:
        filename = filename + '_greedy_'

    elif type_algo == 2:
        filename = filename + '_log_gamma_{gamma1,gamma2}_'

    elif type_algo == 3:
        filename = filename + '_root_gamma_{gamma1}_beta_{beta1,beta2}_'

    elif type_algo == 4:
        filename = filename + '_root_majority_gamma_{gamma1}_beta_{beta1,beta2}_'

    stats = ut.graph_stats(G, print_stats=False)

    try:

        influenced, influenced_a, influenced_b, influenced_c, seeds_a, seeds_b, seeds_c = ut.read_files(
            filename)
        S = seeds_a[-1] + seeds_b[-1] + seeds_c[-1]

        if len(S) >= budget:
            # ut.write_files(filename,influenced, influenced_a, influenced_b, seeds_a, seeds_b)
            print(influenced_a)
            print("\n\n")
            print(influenced_b)
            print("\n\n")
            print(influenced_c)
            print(" Seed length ", len(S))

            ut.plot_influence(influenced_a, influenced_b, influenced_c, len(S),
                              filename, stats['group_a'], stats['group_b'],
                              stats['group_c'], [len(S_a) for S_a in seeds_a],
                              [len(S_b) for S_b in seeds_b],
                              [len(S_c) for S_c in seeds_c])

            return (influenced, influenced_a, influenced_b, influenced_c,
                    seeds_a, seeds_b, seeds_c)
        else:
            seed_range = range(budget - len(S))

    except FileNotFoundError:
        print('{filename} not Found ')

        seed_range = range(budget)

    # add node to S if achieves maximum propagation for current chosen + this node
    for i in seed_range:  # cannot parallellize

        pool = multiprocessing.Pool(multiprocessing.cpu_count())
        # results = None
        if type_algo == 1:
            results = pool.starmap(
                map_select_next_seed_set_cover,
                zip(repeat(G), repeat(S), list(G.nodes()), repeat(h_l)))
            # results = pool.map(map_select_next_seed_greedy, ((G, S, v,h_l) for v in G.nodes()))
        elif type_algo == 2:
            results = pool.map(map_select_next_seed_log_greedy,
                               ((G, S, v, gamma1, gamma2) for v in G.nodes()))
        elif type_algo == 3:
            results = pool.map(map_select_next_seed_root_greedy,
                               ((G, S, v, gamma1, beta1, beta2)
                                for v in G.nodes()))
        elif type_algo == 4:
            results = pool.map(map_select_next_seed_root_majority_greedy,
                               ((G, S, v, gamma1) for v in G.nodes()))

        pool.close()
        pool.join()

        s = PQ()  # priority queue
        # if results == None:

        for v, priority, p_a, p_b, p_c in results:  # run R times Random Cascade The gain of parallelizing isn't a lot as the one runIC is not very complex maybe for huge graphs
            s.add_task(v, -priority)

        node, priority = s.pop_item()
        S.append(node)
        I, I_a, I_b, I_c = map_fair_IC((G, S, h_l))
        influenced.append(I)
        influenced_a.append(I_a)
        influenced_b.append(I_b)
        influenced_c.append(I_c)
        S_red = []
        S_blue = []
        S_purple = []
        group = G.nodes[node]['color']
        print(
            str(i + 1) + ' Selected Node is ' + str(node) + ' group ' +
            str(group) + ' Ia = ' + str(I_a) + ' Ib = ' + str(I_b) + ' Ic = ' +
            str(I_c))
        for n in S:
            if G.nodes[n]['color'] == 'red':
                S_red.append(n)
            if G.nodes[n]['color'] == 'blue':
                S_blue.append(n)
            else:
                S_purple.append(n)

        seeds_a.append(
            S_red)  # id's of the seeds so the influence can be recreated
        seeds_b.append(S_blue)
        seeds_c.append(S_purple)
        # print(i, k, time.time() - start)
    # print ( "\n \n  I shouldn't be here.   ********* \n \n ")
    ut.plot_influence(influenced_a, influenced_b, influenced_c, len(S),
                      filename, stats['group_r'], stats['group_b'],
                      stats['group_n'], [len(S_a) for S_a in seeds_a],
                      [len(S_b)
                       for S_b in seeds_b], [len(S_c) for S_c in seeds_c])

    ut.write_files(filename, influenced, influenced_a, influenced_b,
                   influenced_c, seeds_a, seeds_b, seeds_c)

    return (influenced, influenced_a, influenced_b, influenced_c, seeds_a,
            seeds_b, seeds_c)
示例#12
0
 def setUp(self):
     if os.path.isdir(self.repo_dir) is False:
         os.mkdir(self.repo_dir)
     write_files(self.files, self.repo_dir)
示例#13
0
def generalGreedy_node_parallel(filename,
                                G,
                                budget,
                                gamma,
                                beta=1.0,
                                type_algo=1,
                                G_greedy=None):
    ''' Finds initial seed set S using general greedy heuristic
    Input: G -- networkx Graph object
    k -- number of initial nodes needed
    p -- propagation probability
    Output: S -- initial set of k nodes to propagate
    '''

    if G_greedy is None:
        G_greedy = G

    # import time
    # start = time.time()
    # R = 200 # number of times to run Random Cascade
    S = []  # set of selected nodes
    influenced = []
    influenced_grouped = []
    seeds = []
    seed_range = []
    if type_algo == 1:
        filename = filename + f'_greedy_'

    elif type_algo == 2:
        filename = filename + f'_log_gamma_{gamma}_'

    elif type_algo == 3:
        filename = filename + f'_root_gamma_{gamma}_beta_{beta}_'

    elif type_algo == 4:
        filename = filename + f'_root_majority_gamma_{gamma}_beta_{beta}_'

    # stats = ut.graph_stats(G, print_stats=False)

    try:

        influenced, influenced_a, influenced_b, seeds_a, seeds_b = ut.read_files(
            filename)

        raise Exception('It was supposed not to be reached.')

        S = seeds_a[-1] + seeds_b[-1]

        if len(S) >= budget:
            # ut.write_files(filename,influenced, influenced_a, influenced_b, seeds_a, seeds_b)
            print(influenced_a)
            print("\n\n")
            print(influenced_b)
            print(" Seed length ", len(S))

            ut.plot_influence(influenced_a, influenced_b, len(S), filename,
                              stats['group_a'], stats['group_b'],
                              [len(S_a) for S_a in seeds_a],
                              [len(S_b) for S_b in seeds_b])

            return (influenced, influenced_a, influenced_b, seeds_a, seeds_b)
        else:
            seed_range = range(budget - len(S))

    except FileNotFoundError:
        print(f'{filename} not Found ')

        seed_range = range(budget)

    # add node to S if achieves maximum propagation for current chosen + this node
    for i in seed_range:  # cannot parallellize
        print('--------', i)
        pool = multiprocessing.Pool(multiprocessing.cpu_count())
        # results = None
        if type_algo == 1:
            results = pool.map(map_select_next_seed_greedy,
                               ((G_greedy, S, v) for v in G_greedy.nodes()))
        elif type_algo == 2:
            results = pool.map(map_select_next_seed_log_greedy,
                               ((G_greedy, S, v, gamma)
                                for v in G_greedy.nodes()))
        elif type_algo == 3:
            results = pool.map(map_select_next_seed_root_greedy,
                               ((G_greedy, S, v, gamma, beta)
                                for v in G_greedy.nodes()))
        elif type_algo == 4:
            results = pool.map(map_select_next_seed_root_majority_greedy,
                               ((G_greedy, S, v, gamma)
                                for v in G_greedy.nodes()))

        pool.close()
        pool.join()

        s = PQ()  # priority queue
        # if results == None:

        for v, priority in results:  # run R times Random Cascade The gain of parallelizing isn't a lot as the one runIC is not very complex maybe for huge graphs
            s.add_task(v, priority)

        node, priority = s.pop_item()
        S.append(node)
        I, I_grouped = map_fair_IC((G, S))
        influenced.append(I)
        influenced_grouped.append(I_grouped)
        group = G.nodes[node]['color']
        print(
            f'{i + 1} Selected Node is {node} group {group} I_grouped = {I_grouped}'
        )

        S_g = {
            c: []
            for c in np.unique([G.nodes[v]['color'] for v in G.nodes])
        }
        for n in S:
            c = G.nodes[n]['color']
            S_g[c].append(n)

        seeds.append(
            S_g)  # id's of the seeds so the influence can be recreated
        # print(i, k, time.time() - start)
    # print ( "\n \n  I shouldn't be here.   ********* \n \n ")
    # ut.plot_influence(influenced_a, influenced_b, len(S), filename, stats['group_a'], stats['group_b'],
    #                   [len(S_a) for S_a in seeds_a], [len(S_b) for S_b in seeds_b])

    ut.write_files(filename, influenced, influenced_grouped, seeds)

    return (influenced, influenced_grouped, seeds)
示例#14
0
            sys.exit(1)

    rsids = sorted(rsids)

    gaps = []
    conseq = 0
    gaps.append(rsids[0])
    for pos in range(1, len(rsids)):
        diff = rsids[pos] - rsids[pos - 1]
        if diff > 1:
            # gap
            if conseq:
                gaps.append(-1 * conseq)
            gaps.append(diff)
            conseq = 0
        elif diff == 1:
            # conseq
            conseq += 1
        else:
            print("Found duplicate rsIDs. You've got bad data.")
            sys.exit(1)

    if conseq:
        gaps.append(-1 * conseq)

    files = {
        'snps.json': snps,
        'minussnpgaps.json': gaps,
    }
    utils.write_files(files)
示例#15
0
 def setUp(self):
     if os.path.isdir(self.repo_dir) is False:
         os.mkdir(self.repo_dir)
     write_files(self.files, self.repo_dir)
示例#16
0
def generalGreedy_node_set_cover(filename,
                                 G,
                                 budget,
                                 gamma_a=1e-2,
                                 gamma_b=0,
                                 type_algo=1):
    ''' Finds initial seed set S using general greedy heuristic
    Input: G -- networkx Graph object
    k -- fraction of population needs to be influenced in both groups 
    p -- propagation probability
    Output: S -- initial set of k nodes to propagate
    '''
    #import time
    #start = time.time()
    #R = 200 # number of times to run Random Cascade

    stats = ut.graph_stats(G, print_stats=False)

    if type_algo == 1:
        filename = filename + f'_set_cover_reach_{budget}_'
    elif type_algo == 2:
        filename = filename + f'_set_cover_timings_reach_{budget}_gamma_a_{gamma_a}_gamma_b_{gamma_b}_'
    elif type_algo == 3:
        filename = filename + f'_set_cover_timings_reach_{budget}_gamma_a_{gamma_a}_gamma_b_{gamma_a}_'

    reach = 0.0
    S = []  # set of selected nodes
    # add node to S if achieves maximum propagation for current chosen + this node
    influenced = []
    influenced_a = []
    influenced_b = []
    seeds_a = []
    seeds_b = []

    try:

        influenced, influenced_a, influenced_b, seeds_a, seeds_b = ut.read_files(
            filename)
        reach = min(influenced_a[-1] / stats['group_a'], budget) + min(
            influenced_b[-1] / stats['group_b'], budget)
        S = seeds_a[-1] + seeds_b[-1]
        if reach >= budget:
            #ut.write_files(filename,influenced, influenced_a, influenced_b, seeds_a, seeds_b)
            print(influenced_a)
            print("\n\n")
            print(influenced_b)
            print(f" reach: {reach}")
            ut.plot_influence(influenced_a, influenced_b, len(S), filename,
                              stats['group_a'], stats['group_b'],
                              [len(S_a) for S_a in seeds_a],
                              [len(S_b) for S_b in seeds_b])
            return (influenced, influenced_a, influenced_b, seeds_a, seeds_b)

    except FileNotFoundError:
        print(f'{filename} not Found ')

    i = 0
    while reach < 2 * budget:  # cannot parallellize

        pool = multiprocessing.Pool(multiprocessing.cpu_count() - 1)

        if type_algo == 1:
            results = pool.map(map_select_next_seed_set_cover,
                               ((G, S, v) for v in G.nodes()))
        elif type_algo == 2:
            results = pool.map(map_IC_timing, ((G, S, v, gamma_a, gamma_b)
                                               for v in G.nodes()))
        elif type_algo == 3:
            results = pool.map(map_IC_timing, ((G, S, v, gamma_a, gamma_a)
                                               for v in G.nodes()))

        pool.close()
        pool.join()

        s = PQ()  # priority queue
        for v, p, p_a, p_b in results:  #
            s.add_task(
                v, -(min(p_a / stats['group_a'], budget) +
                     min(p_b / stats['group_b'], budget)))

        node, priority = s.pop_item()
        #priority = -priority # as the current priority is negative fraction
        S.append(node)

        I, I_a, I_b = map_fair_IC((G, S))
        influenced.append(I)
        influenced_a.append(I_a)
        influenced_b.append(I_b)
        S_red = []
        S_blue = []
        group = G.nodes[node]['color']

        for n in S:
            if G.nodes[n]['color'] == 'red':
                S_red.append(n)
            else:
                S_blue.append(n)

        seeds_a.append(
            S_red)  # id's of the seeds so the influence can be recreated
        seeds_b.append(S_blue)

        #reach += -priority both are fine
        reach_a = I_a / stats['group_a']
        reach_b = I_b / stats['group_b']
        reach = (min(reach_a, budget) + min(reach_b, budget))

        print(
            f'{i+1} Node ID {node} group {group} Ia = {I_a} Ib {I_b} reach: {reach} reach_a {reach_a} reach_b {reach_b}'
        )
        #print(i, k, time.time() - start)
        i += 1

    ut.plot_influence(influenced_a, influenced_b, len(S), filename,
                      stats['group_a'], stats['group_b'],
                      [len(S_a)
                       for S_a in seeds_a], [len(S_b) for S_b in seeds_b])

    ut.write_files(filename, influenced, influenced_a, influenced_b, seeds_a,
                   seeds_b)

    return (influenced, influenced_a, influenced_b, seeds_a, seeds_b)
示例#17
0
 def setUp(self):
     write_files(self.files, self.repo_dir)
示例#18
0
    def setUpClass(cls):
        cls.maxDiff = 2000
        # we create the state in a different dir from the one we run our tests
        # on, to verify that the saved state does not depend on any absolute
        # paths
        init_dir = tempfile.mkdtemp()
        cls.repo_dir = tempfile.mkdtemp()
        cls.config_path = os.path.join(cls.repo_dir, ".hhconfig")
        cls.tmp_dir = tempfile.mkdtemp()
        cls.hh_tmp_dir = tempfile.mkdtemp()
        cls.saved_state_name = "foo"
        cls.test_env = dict(
            os.environ, **{"HH_TEST_MODE": "1", "HH_TMPDIR": cls.hh_tmp_dir, "PATH": "%s:/bin:/usr/bin" % cls.tmp_dir}
        )

        with open(os.path.join(init_dir, ".hhconfig"), "w") as f:
            f.write(
                r"""
# some comment
assume_php = false"""
            )

        cls.files = {}

        cls.files[
            "foo_1.php"
        ] = """
        <?hh
        function f() {
            return g() + 1;
        }
        """

        cls.files[
            "foo_2.php"
        ] = """
        <?hh
        function g(): int {
            return 0;
        }
        """

        cls.files[
            "foo_3.php"
        ] = """
        <?hh
        function h(): string {
            return "a";
        }

        class Foo {}

        function some_long_function_name() {
            new Foo();
            h();
        }
        """

        write_files(cls.files, init_dir)
        write_files(cls.files, cls.repo_dir)

        cls.save_command(init_dir)

        shutil.rmtree(init_dir)