示例#1
0
def evaluate_candidates(candidates, toolbox):
    if toolbox.initial_seed:
        seed_after_map: int = random.randint(1, 10000)
        if toolbox.conf.fix_seed_for_generation:
            seed_for_generation = random.randint(1, 10000)
            seeds_for_evaluation: np.ndarray = seed_for_generation * np.ones(len(candidates), dtype=np.int64)
            seeds_for_recorded = seed_for_generation * np.ones(len(toolbox.recorded_individuals), dtype=np.int64)
        else:
            seeds_for_evaluation: np.ndarray = np.random.randint(1, 10000, size=len(candidates))
            seeds_for_recorded: np.ndarray = np.random.randint(1, 10000, size=len(candidates))
    else:
        seed_after_map = 0
        seeds_for_evaluation = np.zeros(len(candidates), dtype=np.int64)
        seeds_for_recorded = np.zeros(len(toolbox.recorded_individuals), dtype=np.int64)

    brain_genomes = toolbox.strip_strategy_from_population(candidates)
    brain_genomes_recorded = toolbox.strip_strategy_from_population(toolbox.recorded_individuals)
    nevals = len(brain_genomes) + len(brain_genomes_recorded)
    results = toolbox.map(toolbox.evaluate, brain_genomes, seeds_for_evaluation)

    if toolbox.conf.novelty:
        results_recorded_orig = list(toolbox.map(toolbox.evaluate, brain_genomes_recorded, seeds_for_recorded))
        results_copy, results = tee(results, 2)
        novelties = toolbox.map(calc_novelty,
                                list(results_copy),
                                [results_recorded_orig] * len(candidates),
                                [toolbox.get_distance] * len(candidates),
                                [toolbox.conf.novelty.novelty_nearest_k] * len(candidates))
    else:
        novelties = [0] * len(candidates)

    total_steps = 0
    for ind, res, nov in zip(candidates, results, novelties):
        fitness, behavior_compressed, steps = res
        ind.fitness_orig = fitness
        ind.novelty = nov
        ind.steps = steps
        total_steps += steps
    # setting seeds must happen after reading all fitnesses from results, because of the async nature of map, it
    # is possiblethat some evaluations are still running when the first results get processes
    set_random_seeds(seed_after_map, env=None)

    toolbox.shape_fitness(candidates)

    if toolbox.conf.novelty:
        # drop recorded_individuals, when there are too many
        overfill = len(toolbox.recorded_individuals) - toolbox.conf.novelty.max_recorded_behaviors
        if overfill > 0:
            toolbox.recorded_individuals = toolbox.recorded_individuals[overfill:]

    return nevals, total_steps, seed_after_map
    def eval_fitness(self,
                     individual,
                     seed,
                     render: bool = False,
                     record: str = None,
                     record_force: bool = False,
                     brain_vis_handler=None,
                     neuron_vis=False,
                     slow_down=0,
                     rounds=None,
                     neuron_vis_width=None,
                     neuron_vis_height=None,
                     render_raw_ob=False):
        env = self._get_env(record, record_force, render)
        set_random_seeds(seed, env)
        fitness_total = 0
        steps_total = 0
        number_of_rounds = self.config.number_fitness_runs if rounds is None else rounds
        for i in range(number_of_rounds):
            fitness_current = 0
            brain = self.brain_class(self.input_space, self.output_space,
                                     individual, self.brain_config)
            ob = env.reset()
            done = False
            t = 0

            if render:
                self._render(env, ob, render_raw_ob)

            if neuron_vis:
                brain_vis = brain_vis_handler.launch_new_visualization(
                    brain=brain,
                    brain_config=self.brain_config,
                    env_id=self.env_id,
                    initial_observation=ob,
                    width=neuron_vis_width,
                    height=neuron_vis_height,
                    color_clipping_range=(255, 2.5, 2.5),
                    slow_down=slow_down)
            else:
                brain_vis = None

            while not done:
                action = brain.step(ob)
                ob, rew, done, info = env.step(action)
                t += 1
                fitness_current += rew
                if brain_vis:
                    brain_vis.process_update(in_values=ob, out_values=action)
                if slow_down:
                    time.sleep(slow_down / 1000.0)
                if render:
                    self._render(env, ob, render_raw_ob)
            if render:
                logging.info("steps: " + str(t) + " \tfitness: " +
                             str(fitness_current))

            fitness_total += fitness_current
            steps_total += t

        compressed_behavior = None
        if hasattr(env, "get_compressed_behavior"):
            # 'get_compressed_behavior' exists if any wrapper is a BehaviorWrapper
            assert callable(env.get_compressed_behavior)
            compressed_behavior = env.get_compressed_behavior()

        return fitness_total / number_of_rounds, compressed_behavior, steps_total
    def _setup(self):
        env_handler = EnvHandler(self.config.episode_runner)
        env = env_handler.make_env(self.config.environment)
        # note: the environment defined here is only used to initialize other classes, but the
        # actual simulation will happen on freshly created local  environments on the episode runners
        # to avoid concurrency problems that would arise from a shared global state
        self.env_template = env
        set_random_seeds(self.config.random_seed, env)
        self.input_space = env.observation_space
        self.output_space = env.action_space
        logging.info("Input space: " + str(self.input_space))
        logging.info("Output space: " + str(self.output_space))
        self.brain_class.generate_and_set_class_state(
            config=self.config.brain,
            input_space=self.input_space,
            output_space=self.output_space)

        self.individual_size = self.brain_class.get_individual_size(
            self.config.brain,
            input_space=self.input_space,
            output_space=self.output_space)
        logging.info("Individual size for this experiment: " +
                     str(self.individual_size))
        info = self.brain_class.get_free_parameter_usage(
            self.config.brain,
            input_space=self.input_space,
            output_space=self.output_space)
        logging.info("Usage of free parameters: " + str(info))

        self.ep_runner = EpisodeRunner(config=self.config.episode_runner,
                                       brain_config=self.config.brain,
                                       brain_class=self.brain_class,
                                       input_space=self.input_space,
                                       output_space=self.output_space,
                                       env_id=self.config.environment)

        stats_fit = tools.Statistics(key=lambda ind: ind.fitness_orig)
        if self.config.episode_runner.novelty:
            stats_novel = tools.Statistics(key=lambda ind: ind.novelty)
            stats = tools.MultiStatistics(fitness=stats_fit,
                                          novelty=stats_novel)
        else:
            stats = tools.MultiStatistics(fitness=stats_fit)

        stats.register("min", np.min)
        stats.register("avg", np.mean)
        stats.register("std", np.std)
        stats.register("max", np.max)

        system_cpu_count = os.cpu_count()
        if self.number_of_workers <= 0 or self.number_of_workers > system_cpu_count:
            raise RuntimeError(
                "{} is an incorrect number of workers for your system, because your CPU only supports "
                "running between 1 and {} processes in parallel."
                "".format(self.number_of_workers, system_cpu_count))

        if self.processing_framework == "dask":
            self.processing_handler = DaskHandler(
                number_of_workers=self.number_of_workers,
                brain_class=self.brain_class)
        elif self.processing_framework == "mp":
            self.processing_handler = MPHandler(
                number_of_workers=self.number_of_workers)
        elif self.processing_framework == "sequential":
            self.processing_handler = SequentialHandler(
                number_of_workers=self.number_of_workers)
        else:
            raise RuntimeError(
                "The processing framework '{}' is not supported.".format(
                    self.processing_framework))

        map_func = self.processing_handler.map

        self.optimizer = self.optimizer_class(
            map_func=map_func,
            individual_size=self.individual_size,
            eval_fitness=self.ep_runner.eval_fitness,
            conf=self.config.optimizer,
            stats=stats,
            from_checkpoint=self.from_checkpoint,
            reset_hof=self.reset_hof,
            random_seed=self.config.random_seed)

        self.result_handler = ResultHandler(
            result_path=self.result_path,
            neural_network_type=self.config.brain.type,
            config_raw=self.config.raw_dict)
示例#4
0
    def eval_fitness(self, individual, seed, render=False, record=None, record_force=False, brain_vis_handler=None,
                     neuron_vis=False, slow_down=0, rounds=None, neuron_vis_width=None, neuron_vis_height=None):
        env = self._get_env(record, record_force, render)
        set_random_seeds(seed, env)
        fitness_total = 0
        steps_total = 0
        number_of_rounds = self.config.number_fitness_runs if rounds is None else rounds
        brain_state_history = []
        for i in range(number_of_rounds):
            fitness_current = 0
            brain = self.brain_class(self.input_space, self.output_space, individual, self.brain_config)
            ob = env.reset()
            done = False
            t = 0

            if render:
                env.render()

            if neuron_vis:
                brain_vis = brain_vis_handler.launch_new_visualization(brain=brain, brain_config=self.brain_config,
                                                                       env_id=self.env_id, initial_observation=ob,
                                                                       width=neuron_vis_width, height=neuron_vis_height,
                                                                       color_clipping_range=(255, 2.5, 2.5),
                                                                       slow_down=slow_down)
            else:
                brain_vis = None

            while not done:
                brain_output = brain.step(ob)
                action = output_to_action(brain_output, self.output_space)
                ob, rew, done, info = env.step(action)
                t += 1
                fitness_current += rew

                if brain_vis:
                    brain_vis.process_update(in_values=ob, out_values=brain_output)
                if slow_down:
                    time.sleep(slow_down / 1000.0)
                if render:
                    env.render()
                if self.config.novelty:
                    if self.config.novelty.behavior_source == 'brain':
                        if isinstance(brain, ContinuousTimeRNN):
                            brain_state_history.append(np.tanh(brain.y))
                        elif isinstance(brain, CnnCtrnn):
                            brain_state_history.append(np.tanh(brain.ctrnn.y))
                        else:
                            logging.error('behavior_source == "brain" not yet supported for this kind of brain')

            if render:
                logging.info("steps: " + str(t) + " \tfitness: " + str(fitness_current))

            fitness_total += fitness_current
            steps_total += t
            # print(info['level_seed'])

        compressed_behavior = None
        if hasattr(env, 'get_compressed_behavior'):
            # 'get_compressed_behavior' exists if any wrapper is a BehaviorWrapper
            if callable(env.get_compressed_behavior):
                compressed_behavior = env.get_compressed_behavior()

        if self.config.novelty:
            if self.config.novelty.behavior_source == 'brain':
                # todo: remove code duplication. This code is also in BehaviorWrapper
                compressor = BZ2Compressor(2)
                compressed_behavior = b''
                if self.config.novelty.behavioral_max_length < 0:
                    compressor.compress(brain_state_history[-1].astype(np.float16).tobytes())
                    compressed_behavior += compressor.flush()
                else:
                    for i in range(self.config.novelty.behavioral_max_length):
                        aggregate = np.zeros(len(brain_state_history[0]), dtype=np.float32)
                        for j in range(self.config.novelty.behavioral_interval):
                            if len(brain_state_history) > j + i * self.config.novelty.behavioral_interval:
                                state = brain_state_history[j + i * self.config.novelty.behavioral_interval]
                                aggregate += state / self.config.novelty.behavioral_interval
                            else:
                                break
                        compressed_behavior += compressor.compress(aggregate.astype(np.float16).tobytes())
                    compressed_behavior += compressor.flush()

        return fitness_total / number_of_rounds, compressed_behavior, steps_total