def train_population(pop: Population, games: list, unused_cpu: int = 2): """Evaluate the given population on a training set.""" multi_env = get_multi_env(pop=pop, game_config=pop.config) multi_env.set_games(games, noise=False) pool = mp.Pool(mp.cpu_count() - unused_cpu) manager = mp.Manager() return_dict = manager.dict() pbar = tqdm(total=len(pop.population), desc="Evaluating") def update(*_): pbar.update() for genome_id, genome in pop.population.items(): pool.apply_async(func=multi_env.eval_genome, args=((genome_id, genome), return_dict), callback=update) pool.close() # Close the pool pool.join() # Postpone continuation until everything is finished pbar.close() # Calculate the fitness from the given return_dict fitness = calc_pop_fitness( fitness_cfg=pop.config.evaluation, game_cfg=pop.config.game, game_obs=return_dict, gen=pop.generation, ) for i, genome in pop.population.items(): genome.fitness = fitness[i] # Save the results pop.generation += 1 pop.save()
def evaluate_population(pop: Population, cfg: Config, cpu: int, experiment_id: int): """Evaluate the given population.""" pop.log(f"{pop.name} - Evaluating the population...") _, game_ids_eval = get_game_ids(experiment_id=experiment_id) multi_env = get_multi_env(pop=pop, game_config=cfg) multi_env.set_games(game_ids_eval, noise=False) pool = mp.Pool(mp.cpu_count() - cpu) manager = mp.Manager() return_dict = manager.dict() pbar = tqdm(total=len(pop.population), desc="Evaluating") def update(*_): pbar.update() for genome_id, genome in pop.population.items(): pool.apply_async(func=multi_env.eval_genome, args=((genome_id, genome), return_dict), callback=update) pool.close() # Close the pool pool.join() # Postpone continuation until everything is finished pbar.close() # Calculate the fitness from the given return_dict pop.log(f"{pop.name} - Calculating fitness scores...") fitness = calc_pop_fitness( fitness_cfg=pop.config.evaluation, game_cfg=cfg.game, game_obs=return_dict, gen=pop.generation, ) for i, genome in pop.population.items(): genome.fitness = fitness[i] # Get the fittest genome best = None for g in pop.population.values(): if best is None or g.fitness > best.fitness: best = g pop.best_genome = best # Save the results pop.save() # Visualize most fit genome visualize_genome( debug=True, genome=best, population=pop, ) # Trace the most fit genome trace_most_fit( debug=False, games=game_ids_eval, genome=best, population=pop, unused_cpu=cpu, )
def evaluate_same_games_and_evolve( self, games: list, pop: Population, n: int = 1, parallel=True, save_interval: int = 1, ): """ Evaluate the population on the same games. :param games: List of games used for training :param pop: Population object :param n: Number of generations :param parallel: Parallel the code (disable parallelization for debugging purposes) :param save_interval: Indicates how often a population gets saved """ multi_env = get_multi_env(pop=pop, game_config=self.game_config) msg = f"Repetitive evaluating games: {games}" pop.log(msg, print_result=False) multi_env.set_games(games) # Iterate and evaluate over the games saved = True for iteration in range(n): single_evaluation( multi_env=multi_env, parallel=parallel, pop=pop, unused_cpu=self.unused_cpu, ) # Save the population if (iteration + 1) % save_interval == 0: pop.save() saved = True else: saved = False # Make sure that last iterations saves if not saved: pop.save()
def evaluate_and_evolve( self, pop: Population, n: int = 1, parallel=True, save_interval: int = 1, ): """ Evaluate the population for a single evaluation-process. :param pop: Population object :param n: Number of generations :param parallel: Parallel the code (disable parallelization for debugging purposes) :param save_interval: Indicates how often a population gets saved """ multi_env = get_multi_env(pop=pop, game_config=self.game_config) saved = True for iteration in range(n): # Set random set of games self.sample_games(multi_env, pop.log) # Evaluate the population on the newly sampled games single_evaluation( multi_env=multi_env, parallel=parallel, pop=pop, unused_cpu=self.unused_cpu, ) # Save the population if (iteration + 1) % save_interval == 0: pop.save() saved = True else: saved = False # Make sure that last iterations saves if not saved: pop.save()
def train( population: Population, game_config: Config, games: list, iterations: int, unused_cpu: int = 0, save_interval: int = 10, ): """Train the population on the requested number of iterations. Manual adaptation of main's train().""" population.log("\n===> TRAINING <===\n") multi_env = get_multi_env(pop=population, game_config=game_config) msg = f"Repetitive evaluating on games: {games} for {iterations} iterations" population.log(msg, print_result=False) # Iterate and evaluate over the games saved = True for iteration in range(iterations): # Set and randomize the games multi_env.set_games(games, noise=True) # Prepare the generation's reporters for the generation population.reporters.start_generation(gen=population.generation, logger=population.log) # Fetch the dictionary of genomes genomes = list(iteritems(population.population)) # Initialize the evaluation-pool pool = mp.Pool(mp.cpu_count() - unused_cpu) manager = mp.Manager() return_dict = manager.dict() for genome in genomes: pool.apply_async(func=multi_env.eval_genome, args=(genome, return_dict)) pool.close() # Close the pool pool.join() # Postpone continuation until everything is finished # Calculate the fitness from the given return_dict fitness = calc_pop_fitness( fitness_cfg=population.config.evaluation, game_cfg=game_config.game, game_obs=return_dict, gen=population.generation, ) for i, genome in genomes: genome.fitness = fitness[i] # Gather and report statistics best = None for g in itervalues(population.population): if best is None or g.fitness > best.fitness: best = g population.reporters.post_evaluate(population=population.population, species=population.species, best_genome=best, logger=population.log) # Update the population's best_genome genomes = sorted(population.population.items(), key=lambda x: x[1].fitness, reverse=True) population.best_fitness[population.generation] = genomes[0][1].fitness population.best_genome_hist[population.generation] = genomes[0] population.best_genome = best # Let population evolve population.evolve() # Update the genomes such all have one hidden node for g in population.population.values(): n_hidden, _ = g.size() while n_hidden < 1: g.mutate_add_connection(population.config.genome) n_hidden, _ = g.size() # End generation population.reporters.end_generation(population=population.population, name=str(population), species_set=population.species, logger=population.log) # Save the population if (iteration + 1) % save_interval == 0: population.save() saved = True else: saved = False # Make sure that last iterations saves if not saved: population.save()
def main( fitness, prob_gru: float, prob_sru: float, prob_lstm: float, version=0, unused_cpu=1, ): """ Run a population's configuration. :param fitness: Fitness function used to evaluate the population :param prob_gru: Probability of mutating towards a GRU-node :param prob_sru: Probability of mutating towards a SRU-node :param prob_lstm: Probability of mutating towards a LSTM-node :param version: Version of the model :param unused_cpu: Number of CPUs not used during training """ # Re-configure the config-file cfg = Config() cfg.bot.angular_dir = [] cfg.bot.delta_dist_enabled = False cfg.bot.dist_enabled = True cfg.game.duration = 60 # 60 seconds should be enough to reach the target from each starting orientation cfg.population.pop_size = 512 # Let inputs apply to configuration cfg.genome.rnn_prob_gru = prob_gru cfg.genome.rnn_prob_simple_rnn = prob_sru cfg.genome.rnn_prob_lstm = prob_lstm cfg.evaluation.fitness = fitness cfg.update() # Copy population over from experiment1 name = get_name(cfg=cfg, version=version) path_exp1 = f'population/storage/experiment1/{name}/' if not os.path.exists(path_exp1): raise Exception( f"Experiment 1 must be executed first for population {name}, terminating experiment 2..." ) # Population exists in experiment1, copy over to experiment2 (change experiment1 population's folder and save) pop = Population(name=name, config=cfg, folder_name=get_folder(experiment_id=1), use_backup=False) assert pop.generation > 0 # Population is not new (redundant check) folder = get_folder(experiment_id=2) pop.folder_name = folder pop.save() # Overrides pre-existing populations! # Copy over all generations as well, since these are used during population evaluation path = f"population{'_backup' if pop.use_backup else ''}/storage/{pop.folder_name}/{pop}/" copy_tree(f"{path_exp1}generations", f"{path}generations") # Give overview of population gru = cfg.genome.rnn_prob_gru sru = cfg.genome.rnn_prob_simple_rnn lstm = cfg.genome.rnn_prob_lstm msg = f"\n\n\n\n\n===> RUNNING EXPERIMENT 2 FOR THE FOLLOWING CONFIGURATION: <===" \ f"\n\t> fitness: {cfg.evaluation.fitness}" \ f"\n\t> GRU enabled: {gru > 0} (probability={round(gru, 2)})" \ f"\n\t> SRU enabled: {sru > 0} (probability={round(sru, 2)})" \ f"\n\t> LSTM enabled: {lstm > 0} (probability={round(lstm, 2)})" \ f"\n\t> Saving under folder: {folder}\n" pop.log(msg) # Set games used for evaluation _, games_eval = get_game_ids(experiment_id=2) # Execute the requested segments try: # Evaluate the trained population evaluate( games=games_eval, population=pop, unused_cpu=unused_cpu, ) except Exception as e: pop.log(traceback.format_exc(), print_result=False) raise e finally: process_killer('run_population.py') # Close all the terminated files
def main(pop_name: str, version: int, unused_cpu: int = 2, use_backup: bool = False): # Check if valid population name if pop_name not in SUPPORTED: raise Exception(f"Population '{pop_name}' not supported!") # Create the population cfg = get_config() cfg.population.specie_elitism = 1 folder = get_folder(experiment_id=7) pop = Population( name=f'{pop_name}/v{version}', config=cfg, folder_name=folder, use_backup=use_backup, ) # Replace the population's initial population with the requested topologies genomes if pop.generation == 0: for g_id in pop.population.keys(): pop.population[g_id] = get_topology(pop_name, gid=g_id, cfg=cfg) pop.species.speciate(config=pop.config, population=pop.population, generation=pop.generation, logger=pop.log) pop.log(f"\n\n\n===> RUNNING EXPERIMENT 7 <===\n") # Set games and environment used for training and evaluation games_train, games_eval = get_game_ids(experiment_id=7) train_env = get_multi_env(config=cfg) eval_env = get_multi_env(config=cfg) eval_env.set_games(games_eval, noise=False) solution_found = False while not solution_found: # Train the population for a single iteration pop.log("\n===> TRAINING <===") train_env.set_games(games_train, noise=True) # Prepare the generation's reporters for the generation pop.reporters.start_generation(gen=pop.generation, logger=pop.log) # Fetch the dictionary of genomes genomes = list(iteritems(pop.population)) # Initialize the evaluation-pool pool = mp.Pool(mp.cpu_count() - unused_cpu) manager = mp.Manager() return_dict = manager.dict() for genome in genomes: pool.apply_async(func=train_env.eval_genome, args=(genome, return_dict)) pool.close() # Close the pool pool.join() # Postpone continuation until everything is finished # Calculate the fitness from the given return_dict fitness = calc_pop_fitness( fitness_cfg=pop.config.evaluation, game_cfg=cfg.game, game_obs=return_dict, gen=pop.generation, ) for i, genome in genomes: genome.fitness = fitness[i] # Update the population's best_genome best = None for g in itervalues(pop.population): if best is None or g.fitness > best.fitness: best = g pop.reporters.post_evaluate(population=pop.population, species=pop.species, best_genome=best, logger=pop.log) # Update the population's best_genome genomes = sorted(pop.population.items(), key=lambda x: x[1].fitness, reverse=True) pop.best_fitness[pop.generation] = genomes[0][1].fitness pop.best_genome_hist[pop.generation] = genomes[0] pop.best_genome = best # Let population evolve evolve(pop, pop_name) # End generation pop.reporters.end_generation(population=pop.population, name=str(pop), species_set=pop.species, logger=pop.log) # Test if evaluation finds a solution for the new generation, impossible if fitness < 0.7 if pop.best_genome.fitness > 0.7 or pop.generation % 10 == 0: pop.log("\n===> EVALUATING <===") genomes = list(iteritems(pop.population)) pool = mp.Pool(mp.cpu_count() - unused_cpu) manager = mp.Manager() return_dict = manager.dict() for genome in genomes: pool.apply_async(func=eval_env.eval_genome, args=(genome, return_dict)) pool.close() # Close the pool pool.join() # Postpone continuation until everything is finished # Calculate the fitness from the given return_dict finished = calc_finished_ratio( fitness_cfg=cfg.evaluation, game_obs=return_dict, ) best = None for i, genome in genomes: genome.fitness = finished[i] if best is None or finished[i] > best.fitness: best = genome pop.log(f"Best genome:\n{best}\n{best.nodes[2]}") # Solution is found if best.fitness == 1: pop.best_genome = best pop.log(f"Solution found!") solution_found = True # End the outer while-loop # Save the population with their evaluation results pop.save()
def set_population( pop: Population, hops: float, weight_range: float, mutate_bias: bool = False, mutate_reset: bool = False, mutate_update: bool = False, mutate_candidate: bool = False, ): """Set the given population as mutations of the given genome.""" assert weight_range > hops r = int(weight_range / hops) # Re-initialize the weight around the provided genome pop.population = dict() genome_key = 0 # Create genome-mutations if mutate_bias: for i in range(3): for a in range(-r, r + 1): new_genome = copy.deepcopy(pop.best_genome) new_genome.nodes[2].bias_h[ i] = new_genome.nodes[2].bias_h[i] + a * hops new_genome.key = genome_key pop.population[genome_key] = new_genome genome_key += 1 if mutate_reset: for a in range(-r, r + 1): for b in range(-r, r + 1): new_genome = copy.deepcopy(pop.best_genome) new_genome.nodes[2].weight_xh_full[ 0, 0] = new_genome.nodes[2].weight_xh_full[0, 0] + a * hops new_genome.nodes[2].weight_hh[ 0, 0] = new_genome.nodes[2].weight_hh[0, 0] + b * hops new_genome.key = genome_key pop.population[genome_key] = new_genome genome_key += 1 if mutate_update: for a in range(-r, r + 1): for b in range(-r, r + 1): new_genome = copy.deepcopy(pop.best_genome) new_genome.nodes[2].weight_xh_full[ 1, 0] = new_genome.nodes[2].weight_xh_full[1, 0] + a * hops new_genome.nodes[2].weight_hh[ 1, 0] = new_genome.nodes[2].weight_hh[1, 0] + b * hops new_genome.key = genome_key pop.population[genome_key] = new_genome genome_key += 1 if mutate_candidate: for a in range(-r, r + 1): for b in range(-r, r + 1): new_genome = copy.deepcopy(pop.best_genome) new_genome.nodes[2].weight_xh_full[ 2, 0] = new_genome.nodes[2].weight_xh_full[2, 0] + a * hops new_genome.nodes[2].weight_hh[ 2, 0] = new_genome.nodes[2].weight_hh[2, 0] + b * hops new_genome.key = genome_key pop.population[genome_key] = new_genome genome_key += 1 # Save the updated population pop.save()
def evaluate_and_evolve( self, pop: Population, n: int = 1, parallel=True, save_interval: int = 1, ): """ Evaluate the population on the same set of games. :param pop: Population object :param n: Number of generations :param parallel: Parallel the code (disable parallelization for debugging purposes) :param save_interval: Indicates how often a population gets saved """ multi_env = get_multi_env(pop=pop, game_config=self.game_config) msg = f"Repetitive evaluating on games: {self.games} for {n} iterations" pop.log(msg, print_result=False) # Iterate and evaluate over the games saved = True for iteration in range(n): # Set and randomize the games multi_env.set_games(self.games, noise=True) # Prepare the generation's reporters for the generation pop.reporters.start_generation(gen=pop.generation, logger=pop.log) # Fetch the dictionary of genomes genomes = list(iteritems(pop.population)) if parallel: # Initialize the evaluation-pool pool = mp.Pool(mp.cpu_count() - self.unused_cpu) manager = mp.Manager() return_dict = manager.dict() for genome in genomes: pool.apply_async(func=multi_env.eval_genome, args=(genome, return_dict)) pool.close() # Close the pool pool.join( ) # Postpone continuation until everything is finished else: return_dict = dict() for genome in tqdm(genomes, desc="sequential training"): multi_env.eval_genome(genome, return_dict) # Calculate the fitness from the given return_dict fitness = calc_pop_fitness( fitness_cfg=pop.config.evaluation, game_cfg=self.game_config.game, game_obs=return_dict, gen=pop.generation, ) for i, genome in genomes: genome.fitness = fitness[i] # Gather and report statistics best = None for g in itervalues(pop.population): if best is None or g.fitness > best.fitness: best = g pop.reporters.post_evaluate(population=pop.population, species=pop.species, best_genome=best, logger=pop.log) # Update the population's best_genome genomes = sorted(pop.population.items(), key=lambda x: x[1].fitness, reverse=True) pop.best_fitness[pop.generation] = genomes[0][1].fitness pop.best_genome_hist[pop.generation] = genomes[0] pop.best_genome = best # Let population evolve pop.evolve() # End generation pop.reporters.end_generation(population=pop.population, name=str(pop), species_set=pop.species, logger=pop.log) # Save the population if (iteration + 1) % save_interval == 0: pop.save() saved = True else: saved = False # Make sure that last iterations saves if not saved: pop.save()
def create_genomes(genome: Genome, pop: Population, d: int, range_width: int): """Create mutations of the provided genome and inject these in the given population..""" pop.log(f"{pop.name} - Setting up genomes...") pbar = tqdm(range(((range_width + range_width + 1) ** 2) * 3), desc="Generating genomes") genome_key = 0 gru_node_id = None for node_id, node in genome.nodes.items(): if type(node) == GruNodeGene: gru_node_id = node_id break # Create the reset-mutated genomes for a in range(-range_width, range_width + 1): w_xh = np.asarray([[a / d], [0], [0]]) for b in range(-range_width, range_width + 1): w_hh = np.asarray([[b / d], [0], [0]]) # Add the specified genome to the population new_genome = copy.deepcopy(genome) new_genome.nodes[gru_node_id].weight_xh_full += w_xh new_genome.nodes[gru_node_id].weight_hh += w_hh new_genome.fitness = None new_genome.key = genome_key pop.population[genome_key] = new_genome genome_key += 1 pbar.update() # Create the update-mutated genomes for a in range(-range_width, range_width + 1): w_xh = np.asarray([[0], [a / d], [0]]) for b in range(-range_width, range_width + 1): w_hh = np.asarray([[0], [b / d], [0]]) # Add the specified genome to the population new_genome = copy.deepcopy(genome) new_genome.nodes[gru_node_id].weight_xh_full += w_xh new_genome.nodes[gru_node_id].weight_hh += w_hh new_genome.fitness = None new_genome.key = genome_key pop.population[genome_key] = new_genome genome_key += 1 pbar.update() # Create the candidate-mutated genomes for a in range(-range_width, range_width + 1): w_xh = np.asarray([[0], [0], [a / d]]) for b in range(-range_width, range_width + 1): w_hh = np.asarray([[0], [0], [b / d]]) # Add the specified genome to the population new_genome = copy.deepcopy(genome) new_genome.nodes[gru_node_id].weight_xh_full += w_xh new_genome.nodes[gru_node_id].weight_hh += w_hh new_genome.fitness = None new_genome.key = genome_key pop.population[genome_key] = new_genome genome_key += 1 pbar.update() pbar.close() assert len(pop.population) == (((range_width - -range_width + 1) ** 2) * 3) pop.save()
def main(topology_id: int, batch_size: int = 1000, train_batch: int = 3, min_finished: float = MIN_FINISHED, unused_cpu: int = 2, save_pop: bool = False, use_backup: bool = False): """Run a population infinitely long and store all its good genomes.""" # Get the CSV used to store the results in csv_path, csv_name, added = get_csv_path(topology_id, use_backup=use_backup, batch_size=batch_size) # Create the population name = csv_name if save_pop else 'dummy' cfg = get_config() folder = get_folder(experiment_id=6) pop = Population( name=name, config=cfg, folder_name=folder, use_backup=use_backup, overwrite=True, # Every iteration, create a new population from scratch ) # Replace the population's initial population with the requested topologies genomes for g_id in pop.population.keys(): pop.population[g_id] = get_genome(topology_id, g_id=g_id, cfg=cfg) pop.species.speciate(config=pop.config, population=pop.population, generation=pop.generation, logger=pop.log) # Set games and environment used for training and evaluation pop.log(f"\n\n\n===> RUNNING EXPERIMENT 6 <===\n") games_train, games_eval = get_game_ids(experiment_id=6) train_env = get_multi_env(config=cfg) eval_env = get_multi_env(config=cfg) eval_env.set_games(games_eval, noise=False) # Keep training and evolving the network until the complete CSV is filled last_saved = pop.generation try: while added < batch_size: t = time.localtime() pop.log(f"\n\n===> Selective genome creation at {added / batch_size * 100}%, " f"storing in csv '{csv_path.split('/')[-1]}' " f"({t.tm_hour:02d}h-{t.tm_min:02d}m-{t.tm_sec:02d}s) <===") # Train the population pop.log("\n===> Training <===") for _ in tqdm(range(train_batch), desc="Training"): train_env.set_games(games_train, noise=True) genomes = list(iteritems(pop.population)) # Initialize the evaluation-pool pool = mp.Pool(mp.cpu_count() - unused_cpu) manager = mp.Manager() return_dict = manager.dict() for genome in genomes: pool.apply_async(func=train_env.eval_genome, args=(genome, return_dict)) pool.close() # Close the pool pool.join() # Postpone continuation until everything is finished # Calculate the fitness from the given return_dict fitness = calc_pop_fitness( fitness_cfg=pop.config.evaluation, game_cfg=cfg.game, game_obs=return_dict, gen=pop.generation, ) for i, genome in genomes: genome.fitness = fitness[i] # Update the population's best_genome best = None for g in itervalues(pop.population): if best is None or g.fitness > best.fitness: best = g genomes = sorted(pop.population.items(), key=lambda x: x[1].fitness, reverse=True) pop.best_fitness[pop.generation] = genomes[0][1].fitness pop.best_genome_hist[pop.generation] = genomes[0] pop.best_genome = best pop.log(f"Best training fitness: {best.fitness}") # Let population evolve pop.evolve() # Constraint each of the population's new genomes to the given topology for g in pop.population.values(): enforce_topology(g, topology_id=topology_id) # Save the population after training if pop.generation - last_saved >= 100: pop.save() last_saved = pop.generation # Evaluate the current population as was done in experiment6 pop.log("\n===> EVALUATING <===") genomes = list(iteritems(pop.population)) pool = mp.Pool(mp.cpu_count() - unused_cpu) manager = mp.Manager() return_dict = manager.dict() for genome in genomes: pool.apply_async(func=eval_env.eval_genome, args=(genome, return_dict)) pool.close() # Close the pool pool.join() # Postpone continuation until everything is finished # Calculate the fitness from the given return_dict finished = calc_finished_ratio( fitness_cfg=cfg.evaluation, game_obs=return_dict, ) best = None for i, genome in genomes: genome.fitness = finished[i] if best is None or finished[i] > best.fitness: best = genome # Give evaluation overview of population pop.log(f"Best evaluation finish ratio: {round(best.fitness, 2)}") best_str = str(best).replace("\n", "\n\t") best_str += "\n\t" + str(best.nodes[2]).replace("\n", "\n\t") pop.log(f"Best genome: \n\t{best_str}") sids = list(iterkeys(pop.species.species)) sids.sort() msg = f"\nPopulation '{name}' has {len(pop.species.species):d} species:" \ f"\n\t specie age size finished stag " \ f"\n\t======== ===== ====== ========== ======" pop.log(msg) if pop.log else print(msg) for sid in sids: s = pop.species.species[sid] a = pop.generation - s.created n = len(s.members) sf = [g.fitness for g in s.members.values() if g.fitness] f = "--" if len(sf) == 0 else f"{max(sf):.2f}" st = pop.generation - s.last_improved msg = f"\t{sid:^8} {a:^5} {n:^6} {f:^10} {st:^6}" pop.log(msg) if pop.log else print(msg) # Write the result to CSV with open(csv_path, 'a', newline='') as f: writer = csv.writer(f) for _, g in genomes: # Only write the genomes that exceed the minimum 'finished ratio' threshold! if g.fitness >= min_finished: writer.writerow(get_genome_parameters(g, topology_id=topology_id)) added += 1 finally: # Remove the dummy population if it exists pop.save() path = f"population{'_backup' if use_backup else ''}/storage/{pop.folder_name}/dummy/" if os.path.exists(path): shutil.rmtree(path)