示例#1
0
 def test_multiple_executors(self):
     executor1 = futures.MPIPoolExecutor(1).bootup(wait=True)
     executor2 = futures.MPIPoolExecutor(1).bootup(wait=True)
     executor3 = futures.MPIPoolExecutor(1).bootup(wait=True)
     fs1 = [executor1.submit(abs, i) for i in range(100, 200)]
     fs2 = [executor2.submit(abs, i) for i in range(200, 300)]
     fs3 = [executor3.submit(abs, i) for i in range(300, 400)]
     futures.wait(fs3 + fs2 + fs1)
     for i, f in enumerate(fs1):
         self.assertEqual(f.result(), i + 100)
     for i, f in enumerate(fs2):
         self.assertEqual(f.result(), i + 200)
     for i, f in enumerate(fs3):
         self.assertEqual(f.result(), i + 300)
     executor1 = executor2 = executor3 = None
示例#2
0
 def test_mpi_serialized_support(self):
     futures._lib.setup_mpi_threads()
     threading = futures._lib.threading
     serialized = futures._lib.serialized
     lock_save = serialized.lock
     try:
         if lock_save is None:
             serialized.lock = threading.Lock()
             executor = futures.MPIPoolExecutor(1).bootup()
             executor.submit(abs, 0).result()
             executor.shutdown()
             serialized.lock = lock_save
         else:
             serialized.lock = None
             with lock_save:
                 executor = futures.MPIPoolExecutor(1).bootup()
                 executor.submit(abs, 0).result()
                 executor.shutdown()
             serialized.lock = lock_save
     finally:
         serialized.lock = lock_save
示例#3
0
 def test_shared_executors(self):
     executors = [futures.MPIPoolExecutor() for _ in range(16)]
     fs = []
     for i in range(128):
         fs.extend(
             e.submit(abs, i * 16 + j) for j, e in enumerate(executors))
     assert sorted(f.result() for f in fs) == list(range(16 * 128))
     world_size = MPI.COMM_WORLD.Get_size()
     num_workers = max(1, world_size - 1)
     for e in executors:
         self.assertEqual(e._num_workers, num_workers)
     del e, executors
示例#4
0
def mpi():
    pprint("yeah...")
    
    mpi_pool = futures.MPIPoolExecutor(globals=[('g_param', 1)], main=True)
    
    ar = []
    for x in range(16):
        ar.append(mpi_pool.submit(a_func, x))
    for res in ar:
        pprint(res.result())
    
    mpi_pool.shutdown(wait=True)
示例#5
0
    def test_bad_unpickle(self):
        executor = futures.MPIPoolExecutor(1).bootup(wait=True)

        o = BadUnpickle()
        self.assertFalse(o.pickled)
        f = executor.submit(inout, o)
        self.assertRaises(ZeroDivisionError, f.result)
        self.assertTrue(o.pickled)

        f = self.executor.submit(BadUnpickle)
        self.assertRaises(ZeroDivisionError, f.result)

        f = self.executor.submit(abs, 42)
        self.assertEqual(f.result(), 42)
示例#6
0
 def orig_test_mpi_serialized_support(self):
     threading = futures._worker.threading
     serialized = futures._worker.serialized
     lock_save = serialized.lock
     try:
         serialized.lock = threading.Lock()
         executor = futures.MPIPoolExecutor(1).bootup()
         executor.submit(abs, 0).result()
         if lock_save is not None:
             serialized.lock = None
             with lock_save:
                 executor.submit(abs, 0).result()
         serialized.lock = lock_save
         executor.submit(abs, 0).result()
         executor.shutdown()
         if lock_save is not None:
             serialized.lock = None
             with lock_save:
                 executor = futures.MPIPoolExecutor(1).bootup()
                 executor.submit(abs, 0).result()
                 executor.shutdown()
             serialized.lock = lock_save
     finally:
         serialized.lock = lock_save
示例#7
0
 def setUp(self):
     self.executor = futures.MPIPoolExecutor(1)
示例#8
0
def main():

    t_t_0 = time.monotonic()

    err = "{} -c <simulation.sumocfg (path)> -s <searchspace.json (path)> -p <Population size (int)> -g <Number of generations (int)> [-r <Seed (hex string)> -k <crossover points (int)> -x <mutation rate 0..1 (float)> -o <best net (Path)> -t <timeout sec (int)> -v verbose (specify multiple times for more messages) [-l local multiprocessing | -m mpi]]"

    individual_id_ctr = 1

    simulation_cfg_path = False
    searchspace_path = False
    population_size = False
    number_of_generations = False
    seed = False
    best_net_path = False
    v = 0
    use_local_mt = False
    use_mpi = False
    k_num = False
    mutation_rate = False
    timeout_s = None

    try:
        opts, args = getopt.getopt(sys.argv[1:], "vlmc:s:p:g:r:k:x:o:t:")
    except getopt.GetoptError:
        print(err.format(sys.argv[0]))
        sys.exit(1)

    for o, a in opts:
        if o == "-s":
            searchspace_path = a
        elif o == "-c":
            simulation_cfg_path = a
        elif o == "-p":
            population_size = a
        elif o == "-g":
            number_of_generations = a
        elif o == "-r":
            seed = a
        elif o == "-v":
            v += 1
        elif o == "-l":
            use_local_mt = True
        elif o == "-m":
            use_mpi = True
        elif o == "-k":
            k_num = a
        elif o == "-x":
            mutation_rate = a
        elif o == "-o":
            best_net_path = a
        elif o == "-t":
            timeout_s = a

    if simulation_cfg_path is False or searchspace_path is False or population_size is False or number_of_generations is False:
        print(err.format(sys.argv[0]), file=sys.stderr)
        sys.exit(1)

    if use_local_mt and use_mpi:
        print("Only local multiprocessing xor mpi!")
        print(err.format(sys.argv[0]), file=sys.stderr)
        sys.exit(1)
    if use_local_mt:
        op_mode = GenEvoConstants.MODE_LMT
    elif use_mpi:
        op_mode = GenEvoConstants.MODE_MPI
    else:
        op_mode = GenEvoConstants.MODE_LOC

    searchspace_path = Path(searchspace_path).resolve()
    if not searchspace_path.exists() or searchspace_path.is_dir():
        print("No valid searchspace file found!", file=sys.stderr)
        print(err.format(sys.argv[0]), file=sys.stderr)
        sys.exit(1)

    simulation_cfg_path = Path(simulation_cfg_path).resolve()
    if not simulation_cfg_path.exists() or searchspace_path.is_dir():
        print("No valid sumo config file found!", file=sys.stderr)
        print(err.format(sys.argv[0]), file=sys.stderr)
        sys.exit(1)

    if not best_net_path is False:
        best_net_path = Path(best_net_path).resolve()
        if best_net_path.is_dir() or not best_net_path.parent.is_dir():
            print("No valid location for best net specified!", file=sys.stderr)
            print(err.format(sys.argv[0]), file=sys.stderr)
            sys.exit(1)

    conf_tree = ET.parse(simulation_cfg_path)
    conf_root = conf_tree.getroot()
    net_path = Path(
        simulation_cfg_path.parent,
        conf_root.find("input").find("net-file").attrib["value"]).resolve()
    trips_path = Path(
        simulation_cfg_path.parent,
        conf_root.find("input").find("route-files").attrib["value"]).resolve()
    vtypes_path = Path(
        simulation_cfg_path.parent,
        conf_root.find("input").find(
            "additional-files").attrib["value"]).resolve()
    del conf_root
    del conf_tree
    if v >= GenEvoConstants.V_INF:
        print("Using net file: <{}>.".format(str(net_path)))
        print("Using trips file: <{}>.".format(str(trips_path)))

    try:
        population_size = int(population_size)
        number_of_generations = int(number_of_generations)
        if population_size < 3 or population_size % 4 != 0:
            print(
                "Please specify only numbers greater than 2 and dividable by 4 for population size!",
                file=sys.stderr)
            raise ValueError()
        if number_of_generations < 2:
            print("Please specify a number of generations greater than 1!",
                  file=sys.stderr)
            raise ValueError()
    except ValueError:
        print(
            "Population size and number of generations and number of crossover points must be integers!",
            file=sys.stderr)
        print(err.format(sys.argv[0]), file=sys.stderr)
        sys.exit(1)

    if v >= GenEvoConstants.V_INF:
        print("Population size is {}; Number of generations is {}".format(
            population_size, number_of_generations))

    if k_num is False:
        k_num = 20
    else:
        try:
            k_num = int(k_num)
            if k_num < 1:
                raise ValueError()
        except ValueError:
            print("Number of crossover points must be nonnegativ integer",
                  file=sys.stderr)
            print(err.format(sys.argv[0]), file=sys.stderr)
            sys.exit(1)

    if mutation_rate is False:
        mutation_rate = 0.05
    else:
        try:
            mutation_rate = float(mutation_rate)
            if mutation_rate < 0. or mutation_rate > 1.:
                raise ValueError
        except ValueError:
            print(
                "Mutation rate must be a floating point value between 0 and 1!",
                file=sys.stderr)
            print(err.format(sys.argv[0]), file=sys.stderr)
            sys.exit(1)

    if v >= GenEvoConstants.V_INF:
        print("Using {}-point crossover and mutation rate of {}.".format(
            k_num, mutation_rate))

    if seed is False:
        seed = int(binascii.hexlify(os.urandom(16)), 16)
    else:
        try:
            seed = int(seed, 16)
        except ValueError:
            print("Seed has to be a hexadecimal string!", file=sys.stderr)
            print(err.format(sys.argv[0]), file=sys.stderr)
            sys.exit(1)
    stable_random = random.Random()
    stable_random.seed(seed)

    if v >= GenEvoConstants.V_INF:
        print("Using seed: {0:x}".format(seed))

    if not timeout_s is None:
        try:
            timeout_s = int(timeout_s)
            if timeout_s < 1:
                raise ValueError()
        except ValueError:
            print(
                "Please specify timeout as integer and at least 1 second long!",
                file=sys.stderr)
            print(err.format(sys.argv[0]), file=sys.stderr)
            sys.exit(1)
        if v >= GenEvoConstants.V_INF:
            print("Setting timeout for subprocesses to {}s.".format(timeout_s))

    with open(searchspace_path, "r") as f:
        searchspace = json.loads(f.read())

    if v >= GenEvoConstants.V_INF:
        print("Searchspace: {} intersections and {} roundabouts found.".format(
            len(searchspace["intersections"]),
            len(searchspace["roundabouts"])))

    ##Prepare workers
    if v >= GenEvoConstants.V_DBG:
        print("Start preparing workers...", end="", flush=True)

    with open(simulation_cfg_path, "r") as f:
        simulation_cfg_str = f.read()

    with open(trips_path, "r") as f:
        trips_str = f.read()

    with open(vtypes_path, "r") as f:
        vtypes_str = f.read()

    netcnvt = load_netconvert_binary()
    tmpd, plain_files = cnvt_net_to_plain(net_path, netcnvt, "prepare", False)

    with open(plain_files["con"], "r") as f:
        plain_con_str = f.read()
    with open(plain_files["edg"], "r") as f:
        plain_edg_str = f.read()
    with open(plain_files["nod"], "r") as f:
        plain_nod_str = f.read()
    plain_tll_str = "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n<tlLogics version=\"1.1\" xmlns:xsi=\"http://www.w3.org/2001/XMLSchema-instance\" xsi:noNamespaceSchemaLocation=\"http://sumo.dlr.de/xsd/tllogic_file.xsd\">\n</tlLogics>"
    with open(plain_files["typ"], "r") as f:
        plain_typ_str = f.read()

    rm_tmpd_and_files(tmpd)

    if v >= GenEvoConstants.V_DBG:
        print(" initializing workers ...", end="", flush=True)
    if use_local_mt:
        if v >= GenEvoConstants.V_DBG:
            print(" for local multiprocessing.")
        pool_size = cpu_count()
        pool = Pool(pool_size, GenEvoEvaluate.initialize_worker, [
            simulation_cfg_str, trips_str, vtypes_str, plain_con_str,
            plain_edg_str, plain_nod_str, plain_tll_str, plain_typ_str, v
        ])
    elif use_mpi:
        if v >= GenEvoConstants.V_DBG:
            print(" for mpi.")
        GenEvoEvaluate.initialize_worker(simulation_cfg_str, trips_str,
                                         vtypes_str, plain_con_str,
                                         plain_edg_str, plain_nod_str,
                                         plain_tll_str, plain_typ_str, v)
        glb_mpi = [("plain_con_m", GenEvoEvaluate.plain_con_g),
                   ("plain_edg_m", GenEvoEvaluate.plain_edg_g),
                   ("plain_nod_m", GenEvoEvaluate.plain_nod_g),
                   ("plain_tll_m", GenEvoEvaluate.plain_tll_g),
                   ("plain_typ_m", GenEvoEvaluate.plain_typ_g),
                   ("sumo_cfg_m", GenEvoEvaluate.sumo_cfg_g),
                   ("trips_m", GenEvoEvaluate.trips_g),
                   ("vtypes_m", GenEvoEvaluate.vtypes_g),
                   ("v_glb_m", GenEvoEvaluate.v_glb_g),
                   ("netcnvt_m", GenEvoEvaluate.netcnvt_g),
                   ("sumo_bin_m", GenEvoEvaluate.sumo_bin_g)]

        pool = futures.MPIPoolExecutor(globals=glb_mpi, main=True)
        pool.bootup(wait=True)

        del glb_mpi
    else:
        if v >= GenEvoConstants.V_DBG:
            print(" for single threading.")
        GenEvoEvaluate.initialize_worker(simulation_cfg_str, trips_str,
                                         vtypes_str, plain_con_str,
                                         plain_edg_str, plain_nod_str,
                                         plain_tll_str, plain_typ_str, v)
        pool = None

    del plain_files
    del plain_con_str
    del plain_edg_str
    del plain_nod_str
    del plain_tll_str
    del plain_typ_str
    del simulation_cfg_str
    del trips_str
    del vtypes_str
    ##

    genome = generate_genom_from_searchspace(searchspace)

    #possible gen only population_size - 1 individuals and inject a special individual with all genes do_nothing
    generation = initialize_first_generation(genome, population_size,
                                             stable_random, individual_id_ctr)
    individual_id_ctr += population_size

    evaluate_population(generation, op_mode, pool, timeout_s)
    generation_ctr = 0

    fittest_individual = [generation[0], generation_ctr]
    for individual in generation:
        if individual[2] < fittest_individual[0][2]:
            fittest_individual = [individual, generation_ctr]

    if v >= GenEvoConstants.V_STAT:
        print(
            "Current generation is {}. Fittest individuals name is {} and it has a fitness value of {}."
            .format(generation_ctr, fittest_individual[0][0],
                    fittest_individual[0][2]))

    netcnvt_bin = load_netconvert_binary()

    while generation_ctr < number_of_generations:
        if v >= GenEvoConstants.V_STAT:
            t_0 = time.monotonic()
            t_A_0 = t_0
        generation = generate_new_generation(generation, population_size,
                                             genome, k_num, mutation_rate,
                                             stable_random, individual_id_ctr)
        individual_id_ctr += population_size / 2
        if v >= GenEvoConstants.V_INF:
            t_1 = time.monotonic()
            print("Generation {} created in {}s.".format(
                generation_ctr + 1, t_1 - t_0))

        t_E, t_M = evaluate_population(generation, op_mode, pool, timeout_s)
        if v >= GenEvoConstants.V_DBG:
            print("Evaluating the new individuals took {}s.".format(t_E))
        if v >= GenEvoConstants.V_INF:
            print("Calculated fitness applied to individuals in {}s.".format(
                t_M))
        generation_ctr += 1

        if v >= GenEvoConstants.V_INF:
            t_0 = time.monotonic()
        fittest_individual_in_generation = [generation[0], generation_ctr]
        for individual in generation:
            if individual[2] < fittest_individual_in_generation[0][2]:
                fittest_individual_in_generation = [individual, generation_ctr]

        if fittest_individual_in_generation[0][2] < fittest_individual[0][2]:
            fittest_individual = fittest_individual_in_generation
            if not best_net_path is False:
                tmpd, best_plain_files = cnvt_net_to_plain(
                    net_path, netcnvt_bin, "best", False)
                hack_for_cologne(best_plain_files)
                best_nr = Net_Repr(best_plain_files)
                GenEvoEvaluate.modify_net(fittest_individual[0], best_nr,
                                          best_plain_files, best_net_path,
                                          netcnvt_bin)
                rm_tmpd_and_files(tmpd)

        if v >= GenEvoConstants.V_INF:
            t_1 = time.monotonic()
        if v >= GenEvoConstants.V_STAT:
            print(
                "Current generation is {}. Fittest individuals name is {}, it has a fitness value of {}."
                .format(generation_ctr, fittest_individual_in_generation[0][0],
                        fittest_individual_in_generation[0][2]))
            print(
                "Overall fittest individuals name is {}. It is from generation {} and has a fitness value of {}."
                .format(fittest_individual[0][0], fittest_individual[1],
                        fittest_individual[0][2]))
        if v >= GenEvoConstants.V_INF:
            print("Fittest individual found in {}s.".format(t_1 - t_0))
        if v >= GenEvoConstants.V_STAT:
            t_A_1 = time.monotonic()
            t_A = t_A_1 - t_A_0
            print(
                "Generation took {}s, of which {}s where spend evaluating individuals and {}s managing the generation."
                .format(t_A, t_E, t_A - t_E))

    if use_local_mt:
        pool.close()
        pool.join()
    elif use_mpi:
        pool.shutdown(wait=True)

    if not best_net_path is False:
        netcnvt_bin = load_netconvert_binary()
        tmpd, best_plain_files = cnvt_net_to_plain(net_path, netcnvt_bin,
                                                   "best", False)
        hack_for_cologne(best_plain_files)
        best_nr = Net_Repr(best_plain_files)
        GenEvoEvaluate.modify_net(fittest_individual[0], best_nr,
                                  best_plain_files, best_net_path, netcnvt_bin)
        rm_tmpd_and_files(tmpd)

    t_t_1 = time.monotonic()
    if v >= GenEvoConstants.V_STAT:
        print("\n*** Result ***")
        print(
            "Tested {} individuals in {} generations. Best individual was {} from generation {} with fitness {}."
            .format(individual_id_ctr - 1, generation_ctr,
                    fittest_individual[0][0], fittest_individual[1],
                    fittest_individual[0][2]))
        print("Overall runtime {}s.".format(t_t_1 - t_t_0))
        print("**************")
示例#9
0
def main(use_futures, redirect_out=True):
    comm = MPI.COMM_WORLD
    rank = comm.Get_rank()

    # mode = MPI.MODE_CREATE | MPI.MODE_WRONLY

    if redirect_out:
        sys.stdout = open('out.txt', 'w')
        sys.stderr = open('err.txt', 'w')

    print("Prepare traits and genomes")

    neat_params = neat.Parameters()
    system(
        "grep -v '//' < input/neat_parameters.txt | grep . > input/neat_parameters.filtered.txt"
    )
    neat_params.Load('input/neat_parameters.filtered.txt')
    system("rm input/neat_parameters.filtered.txt")

    # system("grep -v '//' < input/global_parameters.json | grep . > input/global_parameters.filtered.json")
    # network_parameters = json.load(open('input/global_parameters.filtered.json', 'r'))
    # system("rm input/global_parameters.filtered.json")
    # mode = MPI.MODE_RDONLY
    network_parameters = json.load(open('input/global_parameters.json', 'r'))
    # network_parameters = json.load(open(comm, 'input/global_parameters.json', mode))

    for trait_name, trait_value in traits.network_traits.items():
        neat_params.SetGenomeTraitParameters(trait_name, trait_value)
    for trait_name, trait_value in traits.neuron_traits.items():
        # change to SetNeuronTraitParameters to let the neuron parameters mutate individually for each neuron
        neat_params.SetGenomeTraitParameters(trait_name, trait_value)
    for trait_name, trait_value in traits.synapse_traits.items():
        # change to SetLinkTraitParameters to let the synapse parameters mutate individually for each synapse
        neat_params.SetGenomeTraitParameters(trait_name, trait_value)

    genome = neat.Genome(
        0,  # Some genome ID, I don't know what it means.
        network_parameters['inputs_number'],
        2,  # ignored for seed_type == 0, specifies number of hidden units if seed_type == 1
        network_parameters['outputs_number'],
        False,  #fs_neat. If == 1, a minimalistic perceptron is created: each output is connected to a random input and the bias.
        neat.ActivationFunction.
        UNSIGNED_SIGMOID,  # output neurons activation function
        neat.ActivationFunction.
        UNSIGNED_SIGMOID,  # hidden neurons activation function
        0,  # seedtype
        neat_params,  # global parameters object returned by neat.Parameters()
        0  # number of hidden layers
    )

    population = neat.Population(
        genome,
        neat_params,
        True,  # whether to randomize the population
        0.5,  # how much to randomize
        0  # the RNG seed
    )

    # fh = MPI.File.Open(comm, "datafile", mode)
    # line1 = str(comm.rank)*(comm.rank+1) + '\n'
    # line2 = chr(ord('a')+comm.rank)*(comm.rank+1) + '\n'
    # fh.Write_ordered(line1)
    # fh.Write_ordered(line2)
    # fh.Close()
    print("Start solving generations")
    outfile = open('output/fitness.txt', 'w')
    # mode = MPI.MODE_CREATE | MPI.MODE_WRONLY
    # outfile = MPI.File.Open(comm, 'output/fitness.txt', mode)
    # with open('output/fitness.txt', 'w') as outfile:
    for generation_number in range(network_parameters['generations']):
        print("Generation " + str(generation_number) + " started")
        genome_list = neat.GetGenomeList(population)
        fitnesses_list = []

        # map(prepare_genomes, genome_list)
        for genome in genome_list:
            prepare_genomes(genome)

        if use_futures:
            executor = fut.MPIPoolExecutor()
            # fitnesses_list = executor.map(evaluate_futures, genome_list)
            for fitness in executor.map(evaluate_futures, genome_list):
                fitnesses_list.append(fitness)
        else:
            # fitnesses_list = map(evaluate, genome_list)
            for genome in genome_list:
                fitnesses_list.append(evaluate(genome))

        neat.ZipFitness(genome_list, fitnesses_list)

        population.GetBestGenome().Save('output/best_genome.txt')
        # mode = MPI.MODE_APPEND
        # genomefile = MPI.File.Open(comm, 'output/best_genome.txt', mode)
        # genomefile.Write_ordered('\n' + str(population.GetBestGenome().GetNeuronTraits()) +
        #                          '\n' + str(population.GetBestGenome().GetGenomeTraits()))
        # genomefile.Close()
        genomefile = open('output/best_genome.txt', 'a')
        genomefile.write('\n' +
                         str(population.GetBestGenome().GetNeuronTraits()) +
                         '\n' +
                         str(population.GetBestGenome().GetGenomeTraits()))
        genomefile.close()
        # copytree('genome' + str(population.GetBestGenome().GetID()),
        #          'output/generation' + str(generation_number) + '_best_genome')
        try:
            copytree(
                'genome' + str(population.GetBestGenome().GetID()),
                'output/generation' + str(generation_number) + '_best_genome')
        except FileExistsError:
            print('folder generation' + str(generation_number) +
                  '_best_genome exists')

        # outfile.Write_ordered(str(generation_number) + '\t' + str(max(fitnesses_list)) + '\n')
        outfile.write(
            str(generation_number) + '\t' + str(max(fitnesses_list)) + '\n')
        outfile.flush()
        # sys.stderr.write(
        #     '\rGeneration ' + str(generation_number)
        #     + ': fitness = ' + str(population.GetBestGenome().GetFitness())
        # )

        # advance to the next generation
        print("Generation " + str(generation_number) + \
              ": fitness = " + str(population.GetBestGenome().GetFitness()))
        print("Generation " + str(generation_number) + " finished")
        population.Epoch()
    # outfile.Close()
    outfile.close()
示例#10
0
文件: parallel.py 项目: mr-c/cogent3
def imap(f,
         s,
         max_workers=None,
         use_mpi=False,
         if_serial="raise",
         chunksize=None):
    """
    Parameters
    ----------
    f : callable
        function that operates on values in s
    s : iterable
        series of inputs to f
    max_workers : int or None
        maximum number of workers. Defaults to 1-maximum available.
    use_mpi : bool
        use MPI for parallel execution
    if_serial : str
        action to take if conditions will result in serial execution. Valid
        values are 'raise', 'ignore', 'warn'. Defaults to 'raise'.
    chunksize : int or None
        Size of data chunks executed by worker processes. Defaults to None
        where stable chunksize is determined by set_default_chunksize()

    Returns
    -------
    imap is a generator yielding result of f(s[i]), map returns the result
    series
    """

    if_serial = if_serial.lower()
    assert if_serial in ("ignore", "raise",
                         "warn"), f"invalid choice '{if_serial}'"

    # If max_workers is not defined, get number of all processes available
    # minus 1 to leave for master process
    if use_mpi:
        if not USING_MPI:
            raise RuntimeError("Cannot use MPI")

        err_msg = ("Execution in serial. For parallel MPI execution, use:\n"
                   " $ mpirun -n 1 <executable script>")

        if COMM.Get_attr(MPI.UNIVERSE_SIZE) == 1 and if_serial == "raise":
            raise RuntimeError(err_msg)
        elif COMM.Get_attr(MPI.UNIVERSE_SIZE) == 1 and if_serial == "warn":
            warnings.warn(err_msg, UserWarning)

        max_workers = max_workers or 0

        if max_workers > COMM.Get_attr(MPI.UNIVERSE_SIZE):
            warnings.warn("max_workers too large, reducing to UNIVERSE_SIZE-1",
                          UserWarning)

        max_workers = min(max_workers, COMM.Get_attr(MPI.UNIVERSE_SIZE) - 1)

        if not chunksize:
            chunksize = set_default_chunksize(s, max_workers)

        with MPIfutures.MPIPoolExecutor(max_workers=max_workers) as executor:
            for result in executor.map(f, s, chunksize=chunksize):
                yield result
    else:
        if not max_workers:
            max_workers = multiprocessing.cpu_count() - 1
        assert max_workers < multiprocessing.cpu_count()

        if not chunksize:
            chunksize = set_default_chunksize(s, max_workers)

        f = PicklableAndCallable(f)

        with concurrentfutures.ProcessPoolExecutor(max_workers) as executor:
            for result in executor.map(f, s, chunksize=chunksize):
                yield result
            #save the dataframe(pdf) data into csv
            save_to_csv(df, PARSE_DATA_CSVS + t[0] + ".csv")
            print("CSV saved")

    except Exception as e:
        print('ERROR:', e, pdf_file_name_without_ext)
        traceback.print_exc()
    finally:
        print("Clean up working files...")
        shutil.rmtree(input_pdf_images_path, ignore_errors=True)
        shutil.rmtree(input_images_blocks_path, ignore_errors=True)

    end_time = time.time()

    return pdf_file_name_without_ext, end_time - begin_time


if __name__ == '__main__':

    print('Tesseract Version:', pytesseract.get_tesseract_version())
    print('multiprocessing cpu_count:', multiprocessing.cpu_count())
    print('os cpu_count:', os.cpu_count())
    print('sched_getaffinity:', len(os.sched_getaffinity(0)))

    #a_pool = multiprocessing.Pool(multiprocessing.cpu_count())
    #results = a_pool.map(pdf_process, state_pdfs_files)

    with futures.MPIPoolExecutor() as executor:
        results = executor.map(pdf_process, state_pdfs_files)
        for res in results:
            print(res)
示例#12
0
def run_models(network: _Union[Network, Networks],
               variables: VariableSets,
               population: Population,
               nprocs: int, nthreads: int, seed: int,
               nsteps: int, output_dir: OutputFiles,
               iterator: MetaFunction = None,
               extractor: MetaFunction = None,
               mixer: MetaFunction = None,
               mover: MetaFunction = None,
               profiler: Profiler = None,
               parallel_scheme: str = "multiprocessing",
               debug_seeds=False) \
        -> _List[_Tuple[VariableSet, Population]]:
    """Run all of the models on the passed Network that are described
       by the passed VariableSets

       Parameters
       ----------
       network: Network or Networks
         The network(s) to model
       variables: VariableSets
         The sets of VariableSet that represent all of the model
         runs to perform
       population: Population
         The initial population for all of the model runs. This also
         contains the starting date and day for the model outbreak
       nprocs: int
         The number of model runs to perform in parallel
       nthreads: int
         The number of threads to parallelise each model run over
       seed: int
         Random number seed which is used to generate random seeds
         for all model runs
       nsteps: int
         The maximum number of steps to perform for each model - this
         will run until the outbreak is over if this is None
       output_dir: OutputFiles
         The OutputFiles that represents the directory in which all
         output should be placed
       iterator: str
         Iterator to load that will be used to iterate the outbreak
       extractor: str
         Extractor to load that will be used to extract information
       mixer: str
         Mixer to load that will be used to mix demographic data
       mover: str
         Mover to load that will be used to move the population between
         different demographics
       profiler: Profiler
         Profiler used to profile the model run
       parallel_scheme: str
         Which parallel scheme (multiprocessing, mpi4py or scoop) to use
         to run multiple model runs in parallel
       debug_seeds: bool (False)
         Set this parameter to force all runs to use the same seed
         (seed) - this is used for debugging and should never be set
         in production runs

       Returns
       -------
       results: List[ tuple(VariableSet, Population)]
         The set of adjustable variables and final population at the
         end of each run
    """
    from ._console import Console

    if len(variables) == 1:
        # no need to do anything complex - just a single run
        if not variables[0].is_empty():
            Console.print(f"* Adjusting {variables[0]}", markdown=True)

        params = network.params.set_variables(variables[0])

        network.update(params, profiler=profiler)

        trajectory = network.run(population=population,
                                 seed=seed,
                                 nsteps=nsteps,
                                 output_dir=output_dir,
                                 iterator=iterator,
                                 extractor=extractor,
                                 mixer=mixer,
                                 mover=mover,
                                 profiler=profiler,
                                 nthreads=nthreads)

        results = [(variables[0], trajectory)]

        # perform the final summary
        from ._get_functions import get_summary_functions

        if extractor is None:
            from ..extractors._extract_default import extract_default
            extractor = extract_default
        else:
            from ..extractors._extract_custom import build_custom_extractor
            extractor = build_custom_extractor(extractor)

        funcs = get_summary_functions(network=network,
                                      results=results,
                                      output_dir=output_dir,
                                      extractor=extractor,
                                      nthreads=nthreads)

        for func in funcs:
            func(network=network, output_dir=output_dir, results=results)

        return results

    # generate the random number seeds for all of the jobs
    # (for testing, we will use the same seed so that I can check
    #  that they are all working)
    seeds = []

    if seed == 0:
        # this is a special mode that a developer can use to force
        # all jobs to use the same random number seed (15324) that
        # is used for comparing outputs. This should NEVER be used
        # for production code
        Console.warning("Using special mode to fix all random number "
                        "seeds to 15324. DO NOT USE IN PRODUCTION!!!")

        for i in range(0, len(variables)):
            seeds.append(15324)

    elif debug_seeds:
        Console.warning(f"Using special model to make all jobs use the "
                        f"Same random number seed {seed}. "
                        f"DO NOT USE IN PRODUCTION!")

        for i in range(0, len(variables)):
            seeds.append(seed)

    else:
        from ._ran_binomial import seed_ran_binomial, ran_int
        rng = seed_ran_binomial(seed)

        # seed the rngs used for the sub-processes using this rng
        for i in range(0, len(variables)):
            seeds.append(ran_int(rng, 10000, 99999999))

    # set the output directories for all of the jobs - this is based
    # on the fingerprint, so should be unique for each job
    outdirs = []

    for v in variables:
        f = v.output_dir()
        d = _os.path.join(output_dir.get_path(), f)

        i = 1
        base = d

        while d in outdirs:
            i += 1
            d = base + "x%03d" % i

        outdirs.append(d)

    outputs = []

    Console.print(
        f"Running **{len(variables)}** jobs using **{nprocs}** process(es)",
        markdown=True)

    if nprocs == 1:
        # no need to use a pool, as we will repeat this calculation
        # several times
        save_network = network.copy()

        Console.rule("Running models in serial")

        for i, variable in enumerate(variables):
            seed = seeds[i]
            outdir = outdirs[i]

            with output_dir.open_subdir(outdir) as subdir:
                Console.print(
                    f"Running parameter set {i+1} of {len(variables)} "
                    f"using seed {seed}")
                Console.print(f"All output written to {subdir.get_path()}")

                with Console.redirect_output(subdir.get_path(),
                                             auto_bzip=output_dir.auto_bzip()):
                    Console.print(f"Running variable set {i+1}")
                    Console.print(f"Random seed: {seed}")
                    Console.print(f"nthreads: {nthreads}")

                    # no need to do anything complex - just a single run
                    params = network.params.set_variables(variable)

                    Console.rule("Adjustable parameters to scan")
                    Console.print("\n".join(
                        [f"* {x}" for x in params.adjustments]),
                                  markdown=True)
                    Console.rule()

                    network.update(params, profiler=profiler)

                    with Console.spinner("Computing model run") as spinner:
                        try:
                            output = network.run(population=population,
                                                 seed=seed,
                                                 nsteps=nsteps,
                                                 output_dir=subdir,
                                                 iterator=iterator,
                                                 extractor=extractor,
                                                 mixer=mixer,
                                                 mover=mover,
                                                 profiler=profiler,
                                                 nthreads=nthreads)
                            spinner.success()
                        except Exception as e:
                            spinner.failure()
                            Console.print_exception()
                            error = f"FAILED: {e.__class__} {e}"
                            output = None

                    if output is not None:
                        outputs.append((variable, output))
                    else:
                        outputs.append((variable, []))

                if output is not None:
                    Console.panel(
                        f"Completed job {i+1} of {len(variables)}\n"
                        f"{variable}\n"
                        f"{output[-1]}",
                        style="alternate")
                else:
                    Console.error(f"Job {i+1} of {len(variables)}\n"
                                  f"{variable}\n"
                                  f"{error}")
            # end of OutputDirs context manager

            if i != len(variables) - 1:
                # still another run to perform, restore the network
                # to the original state
                network = save_network.copy()
        # end of loop over variable sets
    else:
        from ._worker import run_worker

        # create all of the parameters and options to run
        arguments = []

        if isinstance(network, Networks):
            max_nodes = network.overall.nnodes + 1
            max_links = max(network.overall.nlinks, network.overall.nplay) + 1
        else:
            max_nodes = network.nnodes + 1
            max_links = max(network.nlinks, network.nplay) + 1

        try:
            demographics = network.demographics
        except Exception:
            demographics = None

        # give the workers a clean copy of the profiler
        if profiler is None:
            worker_profiler = None
        else:
            worker_profiler = profiler.__class__()

        for i, variable in enumerate(variables):
            seed = seeds[i]
            outdir = outdirs[i]

            arguments.append({
                "params": network.params.set_variables(variable),
                "demographics": demographics,
                "options": {
                    "seed": seed,
                    "output_dir": outdir,
                    "auto_bzip": output_dir.auto_bzip(),
                    "population": population,
                    "nsteps": nsteps,
                    "iterator": iterator,
                    "extractor": extractor,
                    "mixer": mixer,
                    "mover": mover,
                    "profiler": worker_profiler,
                    "nthreads": nthreads,
                    "max_nodes": max_nodes,
                    "max_links": max_links
                }
            })

        if parallel_scheme == "multiprocessing":
            # run jobs using a multiprocessing pool
            Console.rule("Running models in parallel using multiprocessing")
            from multiprocessing import Pool

            results = []

            with Pool(processes=nprocs) as pool:
                for argument in arguments:
                    results.append(pool.apply_async(run_worker, (argument, )))

                for i, result in enumerate(results):
                    with Console.spinner("Computing model run") as spinner:
                        try:
                            result.wait()
                            output = result.get()
                            spinner.success()
                        except Exception as e:
                            spinner.failure()
                            error = f"FAILED: {e.__class__} {e}"
                            Console.error(error)
                            output = None

                        if output is not None:
                            Console.panel(
                                f"Completed job {i+1} of {len(variables)}\n"
                                f"{variables[i]}\n"
                                f"{output[-1]}",
                                style="alternate")

                            outputs.append((variables[i], output))
                        else:
                            Console.error(f"Job {i+1} of {len(variables)}\n"
                                          f"{variable}\n"
                                          f"{error}")
                            outputs.append((variables[i], []))

        elif parallel_scheme == "mpi4py":
            # run jobs using a mpi4py pool
            Console.rule("Running models in parallel using MPI")
            from mpi4py import futures
            with futures.MPIPoolExecutor(max_workers=nprocs) as pool:
                results = pool.map(run_worker, arguments)

                for i in range(0, len(variables)):
                    with Console.spinner("Computing model run") as spinner:
                        try:
                            output = next(results)
                            spinner.success()
                        except Exception as e:
                            spinner.failure()
                            error = f"FAILED: {e.__class__} {e}"
                            Console.error(error)
                            output = None

                        if output is not None:
                            Console.panel(
                                f"Completed job {i+1} of {len(variables)}\n"
                                f"{variables[i]}\n"
                                f"{output[-1]}",
                                style="alternate")

                            outputs.append((variables[i], output))
                        else:
                            Console.error(f"Job {i+1} of {len(variables)}\n"
                                          f"{variable}\n"
                                          f"{error}")
                            outputs.append((variables[i], []))

        elif parallel_scheme == "scoop":
            # run jobs using a scoop pool
            Console.rule("Running models in parallel using scoop")
            from scoop import futures

            results = []

            for argument in arguments:
                try:
                    results.append(futures.submit(run_worker, argument))
                except Exception as e:
                    Console.error(
                        f"Error submitting calculation: {e.__class__} {e}\n"
                        f"Trying to submit again...")

                    # try again
                    try:
                        results.append(futures.submit(run_worker, argument))
                    except Exception as e:
                        Console.error(
                            f"No - another error: {e.__class__} {e}\n"
                            f"Skipping this job")
                        results.append(None)

            for i in range(0, len(results)):
                with Console.spinner("Computing model run") as spinner:
                    try:
                        output = results[i].result()
                        spinner.success()
                    except Exception as e:
                        spinner.failure()
                        error = f"FAILED: {e.__class__} {e}"
                        Console.error(error)
                        output = None

                    if output is not None:
                        Console.panel(
                            f"Completed job {i+1} of {len(variables)}\n"
                            f"{variables[i]}\n"
                            f"{output[-1]}",
                            style="alternate")

                        outputs.append((variables[i], output))
                    else:
                        Console.error(f"Job {i+1} of {len(variables)}\n"
                                      f"{variable}\n"
                                      f"{error}")
                        outputs.append((variables[i], []))
        else:
            raise ValueError(f"Unrecognised parallelisation scheme "
                             f"{parallel_scheme}.")

    # perform the final summary
    from ._get_functions import get_summary_functions

    if extractor is None:
        from ..extractors._extract_default import extract_default
        extractor = extract_default
    else:
        from ..extractors._extract_custom import build_custom_extractor
        extractor = build_custom_extractor(extractor)

    funcs = get_summary_functions(network=network,
                                  results=outputs,
                                  output_dir=output_dir,
                                  extractor=extractor,
                                  nthreads=nthreads)

    for func in funcs:
        try:
            func(network=network,
                 output_dir=output_dir,
                 results=outputs,
                 nthreads=nthreads)
        except Exception as e:
            Console.error(f"Error calling {func}: {e.__class__} {e}")

    return outputs
示例#13
0
    while True:
        # Generate the different "Individuen" here and pass them as decision vectors to the objective function

        # example:
        decisionVector1 = [
            ["i-cluster_498751183_996951775", "priority 399638313#2 24814407#0"],
            ["i-996951809", "right_before_left"],
            ["i-498751220", "traffic_light"],
            ["i-996951907", "priority 399638313#1 -399638313#2"],
            ["r-292785669-292785688-76182923", "right_before_left"]
        ]
        decisionVector2 = [
            ["i-cluster_498751183_996951775", "priority 399638313#2 -24814407#2"],
            ["i-996951809", "do_nothing"],
            ["i-498751220", "traffic_light_right_on_red"],
            ["i-996951907", "traffic_light_right_on_red"],
            ["r-292785669-292785688-76182923", "do_nothing"]
        ]
        toEvaluate = [decisionVector1, decisionVector2]

        results = []
        with futures.MPIPoolExecutor(max_workers=args.universe_size) as executor:
            fun_evals_left -= len(toEvaluate)  # reduce the number of max fun evals
            results.extend(executor.map(sumo, toEvaluate, [generation] * len(toEvaluate)))

        # this is the results array. It contains the floats from the objective function evaluations
        pprint(results)

        generation += 1
        break  # do as long as we have fun evals left?