def evaluate_candidates(candidates, toolbox): if toolbox.initial_seed: seed_after_map: int = random.randint(1, 10000) if toolbox.conf.fix_seed_for_generation: seed_for_generation = random.randint(1, 10000) seeds_for_evaluation: np.ndarray = seed_for_generation * np.ones(len(candidates), dtype=np.int64) seeds_for_recorded = seed_for_generation * np.ones(len(toolbox.recorded_individuals), dtype=np.int64) else: seeds_for_evaluation: np.ndarray = np.random.randint(1, 10000, size=len(candidates)) seeds_for_recorded: np.ndarray = np.random.randint(1, 10000, size=len(candidates)) else: seed_after_map = 0 seeds_for_evaluation = np.zeros(len(candidates), dtype=np.int64) seeds_for_recorded = np.zeros(len(toolbox.recorded_individuals), dtype=np.int64) brain_genomes = toolbox.strip_strategy_from_population(candidates) brain_genomes_recorded = toolbox.strip_strategy_from_population(toolbox.recorded_individuals) nevals = len(brain_genomes) + len(brain_genomes_recorded) results = toolbox.map(toolbox.evaluate, brain_genomes, seeds_for_evaluation) if toolbox.conf.novelty: results_recorded_orig = list(toolbox.map(toolbox.evaluate, brain_genomes_recorded, seeds_for_recorded)) results_copy, results = tee(results, 2) novelties = toolbox.map(calc_novelty, list(results_copy), [results_recorded_orig] * len(candidates), [toolbox.get_distance] * len(candidates), [toolbox.conf.novelty.novelty_nearest_k] * len(candidates)) else: novelties = [0] * len(candidates) total_steps = 0 for ind, res, nov in zip(candidates, results, novelties): fitness, behavior_compressed, steps = res ind.fitness_orig = fitness ind.novelty = nov ind.steps = steps total_steps += steps # setting seeds must happen after reading all fitnesses from results, because of the async nature of map, it # is possiblethat some evaluations are still running when the first results get processes set_random_seeds(seed_after_map, env=None) toolbox.shape_fitness(candidates) if toolbox.conf.novelty: # drop recorded_individuals, when there are too many overfill = len(toolbox.recorded_individuals) - toolbox.conf.novelty.max_recorded_behaviors if overfill > 0: toolbox.recorded_individuals = toolbox.recorded_individuals[overfill:] return nevals, total_steps, seed_after_map
def eval_fitness(self, individual, seed, render: bool = False, record: str = None, record_force: bool = False, brain_vis_handler=None, neuron_vis=False, slow_down=0, rounds=None, neuron_vis_width=None, neuron_vis_height=None, render_raw_ob=False): env = self._get_env(record, record_force, render) set_random_seeds(seed, env) fitness_total = 0 steps_total = 0 number_of_rounds = self.config.number_fitness_runs if rounds is None else rounds for i in range(number_of_rounds): fitness_current = 0 brain = self.brain_class(self.input_space, self.output_space, individual, self.brain_config) ob = env.reset() done = False t = 0 if render: self._render(env, ob, render_raw_ob) if neuron_vis: brain_vis = brain_vis_handler.launch_new_visualization( brain=brain, brain_config=self.brain_config, env_id=self.env_id, initial_observation=ob, width=neuron_vis_width, height=neuron_vis_height, color_clipping_range=(255, 2.5, 2.5), slow_down=slow_down) else: brain_vis = None while not done: action = brain.step(ob) ob, rew, done, info = env.step(action) t += 1 fitness_current += rew if brain_vis: brain_vis.process_update(in_values=ob, out_values=action) if slow_down: time.sleep(slow_down / 1000.0) if render: self._render(env, ob, render_raw_ob) if render: logging.info("steps: " + str(t) + " \tfitness: " + str(fitness_current)) fitness_total += fitness_current steps_total += t compressed_behavior = None if hasattr(env, "get_compressed_behavior"): # 'get_compressed_behavior' exists if any wrapper is a BehaviorWrapper assert callable(env.get_compressed_behavior) compressed_behavior = env.get_compressed_behavior() return fitness_total / number_of_rounds, compressed_behavior, steps_total
def _setup(self): env_handler = EnvHandler(self.config.episode_runner) env = env_handler.make_env(self.config.environment) # note: the environment defined here is only used to initialize other classes, but the # actual simulation will happen on freshly created local environments on the episode runners # to avoid concurrency problems that would arise from a shared global state self.env_template = env set_random_seeds(self.config.random_seed, env) self.input_space = env.observation_space self.output_space = env.action_space logging.info("Input space: " + str(self.input_space)) logging.info("Output space: " + str(self.output_space)) self.brain_class.generate_and_set_class_state( config=self.config.brain, input_space=self.input_space, output_space=self.output_space) self.individual_size = self.brain_class.get_individual_size( self.config.brain, input_space=self.input_space, output_space=self.output_space) logging.info("Individual size for this experiment: " + str(self.individual_size)) info = self.brain_class.get_free_parameter_usage( self.config.brain, input_space=self.input_space, output_space=self.output_space) logging.info("Usage of free parameters: " + str(info)) self.ep_runner = EpisodeRunner(config=self.config.episode_runner, brain_config=self.config.brain, brain_class=self.brain_class, input_space=self.input_space, output_space=self.output_space, env_id=self.config.environment) stats_fit = tools.Statistics(key=lambda ind: ind.fitness_orig) if self.config.episode_runner.novelty: stats_novel = tools.Statistics(key=lambda ind: ind.novelty) stats = tools.MultiStatistics(fitness=stats_fit, novelty=stats_novel) else: stats = tools.MultiStatistics(fitness=stats_fit) stats.register("min", np.min) stats.register("avg", np.mean) stats.register("std", np.std) stats.register("max", np.max) system_cpu_count = os.cpu_count() if self.number_of_workers <= 0 or self.number_of_workers > system_cpu_count: raise RuntimeError( "{} is an incorrect number of workers for your system, because your CPU only supports " "running between 1 and {} processes in parallel." "".format(self.number_of_workers, system_cpu_count)) if self.processing_framework == "dask": self.processing_handler = DaskHandler( number_of_workers=self.number_of_workers, brain_class=self.brain_class) elif self.processing_framework == "mp": self.processing_handler = MPHandler( number_of_workers=self.number_of_workers) elif self.processing_framework == "sequential": self.processing_handler = SequentialHandler( number_of_workers=self.number_of_workers) else: raise RuntimeError( "The processing framework '{}' is not supported.".format( self.processing_framework)) map_func = self.processing_handler.map self.optimizer = self.optimizer_class( map_func=map_func, individual_size=self.individual_size, eval_fitness=self.ep_runner.eval_fitness, conf=self.config.optimizer, stats=stats, from_checkpoint=self.from_checkpoint, reset_hof=self.reset_hof, random_seed=self.config.random_seed) self.result_handler = ResultHandler( result_path=self.result_path, neural_network_type=self.config.brain.type, config_raw=self.config.raw_dict)
def eval_fitness(self, individual, seed, render=False, record=None, record_force=False, brain_vis_handler=None, neuron_vis=False, slow_down=0, rounds=None, neuron_vis_width=None, neuron_vis_height=None): env = self._get_env(record, record_force, render) set_random_seeds(seed, env) fitness_total = 0 steps_total = 0 number_of_rounds = self.config.number_fitness_runs if rounds is None else rounds brain_state_history = [] for i in range(number_of_rounds): fitness_current = 0 brain = self.brain_class(self.input_space, self.output_space, individual, self.brain_config) ob = env.reset() done = False t = 0 if render: env.render() if neuron_vis: brain_vis = brain_vis_handler.launch_new_visualization(brain=brain, brain_config=self.brain_config, env_id=self.env_id, initial_observation=ob, width=neuron_vis_width, height=neuron_vis_height, color_clipping_range=(255, 2.5, 2.5), slow_down=slow_down) else: brain_vis = None while not done: brain_output = brain.step(ob) action = output_to_action(brain_output, self.output_space) ob, rew, done, info = env.step(action) t += 1 fitness_current += rew if brain_vis: brain_vis.process_update(in_values=ob, out_values=brain_output) if slow_down: time.sleep(slow_down / 1000.0) if render: env.render() if self.config.novelty: if self.config.novelty.behavior_source == 'brain': if isinstance(brain, ContinuousTimeRNN): brain_state_history.append(np.tanh(brain.y)) elif isinstance(brain, CnnCtrnn): brain_state_history.append(np.tanh(brain.ctrnn.y)) else: logging.error('behavior_source == "brain" not yet supported for this kind of brain') if render: logging.info("steps: " + str(t) + " \tfitness: " + str(fitness_current)) fitness_total += fitness_current steps_total += t # print(info['level_seed']) compressed_behavior = None if hasattr(env, 'get_compressed_behavior'): # 'get_compressed_behavior' exists if any wrapper is a BehaviorWrapper if callable(env.get_compressed_behavior): compressed_behavior = env.get_compressed_behavior() if self.config.novelty: if self.config.novelty.behavior_source == 'brain': # todo: remove code duplication. This code is also in BehaviorWrapper compressor = BZ2Compressor(2) compressed_behavior = b'' if self.config.novelty.behavioral_max_length < 0: compressor.compress(brain_state_history[-1].astype(np.float16).tobytes()) compressed_behavior += compressor.flush() else: for i in range(self.config.novelty.behavioral_max_length): aggregate = np.zeros(len(brain_state_history[0]), dtype=np.float32) for j in range(self.config.novelty.behavioral_interval): if len(brain_state_history) > j + i * self.config.novelty.behavioral_interval: state = brain_state_history[j + i * self.config.novelty.behavioral_interval] aggregate += state / self.config.novelty.behavioral_interval else: break compressed_behavior += compressor.compress(aggregate.astype(np.float16).tobytes()) compressed_behavior += compressor.flush() return fitness_total / number_of_rounds, compressed_behavior, steps_total