class SoftOrderPersistor():
    """
    A LEAF-ier ploy to separate out the files that are persisted by
    softorder_coevolution -- all except for checkpointing.

    We do this so that the session_server can be the one to do the
    persistence and the files can persist on the session_server machine.
    """
    def __init__(self,
                 experiment_dir,
                 fitness_objectives,
                 save_best=True,
                 draw=True,
                 logger=None):

        self.experiment_dir = experiment_dir
        self.save_best = save_best
        self.draw = draw
        self.fitness_objectives = fitness_objectives
        self.candidate_util = CandidateUtil(fitness_objectives)
        self.advanced_stats = {
            'best_candidate': [],
            'avg_fitness': [],
            'time': []
        }
        self.logger = logger

    def persist(self, population, generation):
        """
        Gather statistics and persist what we want to files
        """

        best_candidate = self.gather_advanced_stats(population)
        self.do_save(generation, best_candidate)
        self.do_draw(generation)

        fitness_persistence = FitnessPersistor(self.experiment_dir, generation,
                                               self.fitness_objectives)
        fitness_persistence.persist(self.advanced_stats)

    def get_candidate_fitness(self, candidate):
        return self.candidate_util.get_candidate_fitness(candidate)

    def average_fitness(self, population):
        """
        Returns the average raw fitness of population
        """
        my_sum = 0.0
        counter = 1e-308
        for candidate in population:
            fitness = self.get_candidate_fitness(candidate)
            if fitness is not None:
                my_sum += fitness
                counter += 1
        return my_sum / counter

    def find_best_candidate(self, population):

        if population is None or len(population) == 0:
            return None

        one = population[0]
        best = None

        if isinstance(one, dict):
            # Candidates are dictionaries
            best_fitness = None
            for candidate in population:
                fitness = self.get_candidate_fitness(candidate)
                if best_fitness is None:
                    best_fitness = fitness
                    best = candidate
                elif fitness > best_fitness:
                    best_fitness = fitness
                    best = candidate
        else:
            # Candidates are ChromosomeData
            best = max(population)

        return best

    def gather_advanced_stats(self, population):
        """
        Populates the advanced_stats member dictionary
        with info about the generation just evaluated.
        """
        best_candidate = self.find_best_candidate(population)
        self.advanced_stats['best_candidate'].append(
            copy.deepcopy(best_candidate))
        self.advanced_stats['avg_fitness'].append(
            self.average_fitness(population))
        self.advanced_stats['time'].append(time.time())
        return best_candidate

    def do_save(self, generation, best_candidate):

        # saves the best candidate from the current generation
        if not self.save_best:
            return

        if best_candidate is not None:
            candidate_id = self.candidate_util.get_candidate_id(best_candidate)
            best_persistence = BestFitnessCandidatePersistence(
                self.experiment_dir,
                candidate_id,
                generation,
                logger=self.logger)
            best_persistence.persist(best_candidate)

    def do_draw(self, generation):

        if self.draw:
            if generation >= 2:
                stats = (self.advanced_stats['best_candidate'],
                         self.advanced_stats['avg_fitness'])
                visualize.plot_stats(stats, self.candidate_util,
                                     self.experiment_dir)
class ReevaluateBestSessionTask(SessionTask):
    """
    SessionTask that performs a re-evaluation of the best candidates
    from each generation.
    """

    # Tied for Public Enemy #5 for too-many-arguments
    # pylint: disable=too-many-arguments
    def __init__(self,
                 session,
                 master_config,
                 experiment_dir,
                 fitness_objectives,
                 generation,
                 experiment_start_time,
                 experiment_id,
                 completion_service,
                 initial_generation,
                 checkpoint_id=None):
        """
        Constructor.

        :param session: The session with which the task can communicate
                    with the service
        :param master_config: The master config for the task
        :param experiment_dir: The experiment directory for results
        :param fitness_objectives: The FitnessObjectives object
        :param generation: the generation number of the population
        :param experiment_start_time: the experiment start time in seconds
        :param experiment_id: the experiment id
                XXX Can this be derived from experiment_dir?
        :param completion_service: A handle to the CompletionService object
                for performing distributed evaluations.
        :param initial_generation: Flag saying whether or not this is the first
                generation.
        :param checkpoint_id: The checkpoint id (if any) relevant to the task.
        """
        super(ReevaluateBestSessionTask,
              self).__init__(session, master_config, experiment_dir,
                             fitness_objectives, checkpoint_id)

        self.generation = generation
        self.experiment_start_time = experiment_start_time
        self.experiment_id = experiment_id
        self.completion_service = completion_service
        self.initial_generation = initial_generation

        self.candidate_util = CandidateUtil(fitness_objectives)
        self.population_response_util = PopulationResponseUtil()

        # These are fields to be populated by unpack_response()
        experiment_config = self.master_config.get('experiment_config')
        self.persistor = SoftOrderPersistor(
            self.experiment_dir,
            self.fitness_objectives,
            draw=experiment_config.get('visualize'),
            logger=self.logger)
        self.server_stats = {}
        self.seen_checkpoint_ids = []

    def run(self):
        """
        Entry point for the session task execution to take over.
        """

        experiment_config = self.master_config.get('experiment_config')
        assert os.path.exists(
            experiment_config.get('reevaluate_checkpoint_dir'))
        print("Re-evaluating top %s chromosomes found from experiment %s" % \
              (experiment_config.get('reevaluate_num'),
               experiment_config.get('reevaluate_checkpoint_dir')))

        candidate_fit_dict = {}

        # Read in the contents of the checkpoint_ids.txt file which contains
        # all references to any checkpoint training has seen.
        # By convention reevalute_checkpoint_dir is where this file is coming
        # from, and self.checkpoint_dir is where new results are being
        # written to.
        restoring_checkpoint_persistence = CheckpointPersistence(
            folder=experiment_config.get('reevaluate_checkpoint_dir'),
            logger=self.logger)
        self.seen_checkpoint_ids = restoring_checkpoint_persistence.restore()

        for checkpoint_id in self.seen_checkpoint_ids:

            print("Analyzing chromos in %s" % checkpoint_id)

            population_response = self.session.get_population(
                experiment_config.get('reevaluate_checkpoint_dir'),
                checkpoint_id)
            pop = self.population_response_util.unpack_response(
                population_response, self)

            for candidate in pop:
                id_key = self.candidate_util.get_candidate_id(candidate)

                # Get the persisted Worker Results dictionaries
                results_dict_persistence = ResultsDictPersistence(
                    experiment_config.get('reevaluate_checkpoint_dir'),
                    self.generation,
                    logger=self.logger)
                results_dict = results_dict_persistence.restore()

                candidate_fitness = None
                if any(results_dict):
                    if id_key in results_dict:
                        candidate_results_dict = results_dict[id_key]
                        # This is not quite a candidate, but the get-mechanism
                        # should be the same
                        candidate_fitness = \
                            self.candidate_util.get_candidate_fitness(
                                candidate_results_dict)
                if candidate_fitness is None:
                    candidate_fitness = 0.0

                if id_key not in candidate_fit_dict:
                    candidate_fit_dict[id_key] = {
                        'candidate': candidate,
                        'fit': [candidate_fitness]
                    }
                else:
                    candidate_fit_dict[id_key]['candidate'] = candidate
                    candidate_fit_dict[id_key]['fit'].append(candidate_fitness)

        avg = [(x['candidate'], np.mean(x['fit'])) \
                for x in list(candidate_fit_dict.values())]
        best = sorted(avg, key=lambda x: x[1],
                      reverse=True)[:experiment_config.get('reevaluate_num')]
        best_candidates = [x[0] for x in best]
        best_candidate_ids = [self.candidate_util.get_candidate_id(x[0]) \
                                for x in best]
        best_fit = [round(x[1], 4) for x in best]

        if len(best_candidates) == 0:
            print("No chromos found, doing nothing")
            return

        for candidate in best_candidates:
            candidate_id = self.candidate_util.get_candidate_id(candidate)
            best_candidate_persistence = BestFitnessCandidatePersistence(
                self.experiment_dir, candidate_id, logger=self.logger)
            best_candidate_persistence.persist(candidate)

        print("Best chromos:")
        print(list(zip(best_candidate_ids, best_fit)))
        print("Best chromo stats:")
        print("Min: %s Mean: %s Max: %s Std: %s" % \
              (round(np.min(best_fit), 4), round(np.mean(best_fit), 4),
               round(np.max(best_fit), 4), round(np.std(best_fit), 4)))

        # We use generation + 1 for reporting here because we are really
        # composing a population of the best candidates across many
        # different previous generations, and as such doesn't really
        # correspond to any generation number of the past.
        reevaluate_candidate_task = ReevaluateCandidateSessionTask(\
            self.session,
            self.master_config,
            self.experiment_dir,
            self.fitness_objectives,
            self.generation,
            self.experiment_start_time,
            self.experiment_id,
            self.completion_service,
            self.initial_generation,
            self.checkpoint_id)
        reevaluate_candidate_task.evaluate_and_analyze_results(
            best_candidates, self.generation + 1)
class FitnessPersistor(Persistor):
    """
    This implementation of the Persistor interface creates the
    fitness.csv file.
    """
    def __init__(self, experiment_dir, generation, fitness_objectives):
        """
        Constructor.

        """
        self.filer = ExperimentFiler(experiment_dir)
        self.generation = generation
        self.fitness_objectives = fitness_objectives
        self.candidate_util = CandidateUtil(fitness_objectives)
        self.basename = 'fitness.csv'
        self.time_format = '%Y-%m-%d-%H:%M:%S'

    def persist(self, obj):
        """
        Persists the object passed in.

        :param obj: an object to persist
                In this case we are expecting an advanced stats dictionary
                from the SoftOrderPersistor
        """
        advanced_stats = obj

        filename = self.filer.experiment_file(self.basename)
        self.write_csv_file(filename, advanced_stats)

    def write_csv_file(self, filename, advanced_stats):
        """
        Writes out the fitness.csv file

        :param filename: The filename to write to
        :param advanced_stats: The advanced_stats dict gathered by the
                            SoftOrderPersistor
        :return: Nothing
        """
        with open(filename, 'w') as csv_file:

            # Prepare dynamic column names
            primary_objective = self.fitness_objectives.get_fitness_objective(
                0)
            fitness_name = primary_objective.get_metric_name()

            best_fitness_field_name = 'Best ' + fitness_name
            best_fitness_id_field_name = best_fitness_field_name + ' id'
            avg_fitness_field_name = 'Avg ' + fitness_name

            field_names = [
                'Generation', 'Timestamp', best_fitness_id_field_name,
                best_fitness_field_name, avg_fitness_field_name
            ]
            csv_writer = csv.DictWriter(csv_file,
                                        fieldnames=field_names,
                                        quoting=csv.QUOTE_MINIMAL,
                                        lineterminator="\n")
            csv_writer.writeheader()
            for gen in range(self.generation + 1):

                # Get timestamp in human-readable format
                timestamp = advanced_stats['time'][gen]
                ts_datetime = datetime.fromtimestamp(timestamp)
                time_string = ts_datetime.strftime(self.time_format)

                # Get best candidate
                # XXX multi-objective
                best_id = None
                best_fitness = None
                candidate = advanced_stats['best_candidate'][gen]
                if candidate is not None:
                    best_id = self.candidate_util.get_candidate_id(candidate)
                    best_fitness = self.candidate_util.get_candidate_fitness(
                        candidate)

                # Get average fitness
                # XXX multi-objective
                avg_fitness = advanced_stats['avg_fitness'][gen]

                row = {
                    'Generation': gen,
                    'Timestamp': time_string,
                    best_fitness_id_field_name: best_id,
                    best_fitness_field_name: best_fitness,
                    avg_fitness_field_name: avg_fitness
                }
                csv_writer.writerow(row)
Пример #4
0
class AnalyzeResultsSessionTask(SessionTask):
    """
    SessionTask that performs the AnalyzeResults task.

    This task doesn't actually use the Session object that talks
    to the server, but instead takes all the results files created
    by a run and does some analysis on them.

    XXX What?
    """
    def __init__(self,
                 session,
                 master_config,
                 experiment_dir,
                 fitness_objectives,
                 checkpoint_id=None):
        """
        Constructor.

        :param session: The session with which the task can communicate
                    with the service
        :param master_config: The master config for the task
        :param experiment_dir: The experiment directory for results
        :param fitness_objectives: The FitnessObjectives object
        :param checkpoint_id: The checkpoint id (if any) relevant to the task.
        """
        super(AnalyzeResultsSessionTask,
              self).__init__(session, master_config, experiment_dir,
                             fitness_objectives, checkpoint_id)

        self.candidate_util = CandidateUtil(fitness_objectives)

    def run(self):
        """
        Entry point for the session task execution to take over.
        """

        print("Running AnalyzeResultsSessionTask")

        # Read the results files for each generation.
        # These are written out by write_results_file()

        filer = ExperimentFiler(self.experiment_dir)
        glob_spec = filer.experiment_file("gen_*/results_dict.json")
        results_dicts = glob.glob(glob_spec)

        worker_results_files = sorted(results_dicts)
        if len(worker_results_files) <= 0:
            raise ValueError("No results_dicts.json files found in {0}".format(
                self.experiment_dir))

        # No generation number needed, we are only looking to
        # parse path components with it.
        generation_filer = GenerationFiler(self.experiment_dir)

        worker_results_dict = {}
        for worker_results_file in worker_results_files:

            generation = generation_filer.get_generation_from_path(
                worker_results_file)

            # This slurps in results information returned by workers from all
            # candidates of a specific generation
            results_dict_persistence = ResultsDictPersistence(
                self.experiment_dir, generation, logger=self.logger)
            one_worker_results_dict = results_dict_persistence.restore()

            # results_dict here will have one entry per candidate over all
            # generations
            worker_results_dict.update(one_worker_results_dict)

        fitness_objective = self.fitness_objectives.get_fitness_objectives(0)
        is_maximize = fitness_objective.is_maximize_fitness()
        best_result = sorted(list(worker_results_dict.items()),
                            key=lambda \
                            x: max(self.candidate_util.get_candidate_fitness(x)),
                            reverse=is_maximize)[0]
        best_id = best_result.get('id')

        # Open the file of the best candidate.
        best_candidate_persistence = BestFitnessCandidatePersistence(
            self.experiment_dir, best_id, logger=self.logger)
        best_candidate = best_candidate_persistence.restore()

        best_id = self.candidate_util.get_candidate_id(best_candidate)

        self.draw_best_candidate_results(best_candidate,
                                         generation,
                                         suffix='abs')

    def draw_best_candidate_results(self,
                                    best_candidate,
                                    generation=None,
                                    suffix=''):
        """
        :param best_candidate: A candidate object comprising the best of a
                        generation.
        :param generation: Default value is None
        :param suffix: Default value is an empty string
        """
        experiment_config = self.master_config.get('experiment_config')
        if not experiment_config.get('visualize'):
            return

        best_id = self.candidate_util.get_candidate_id(best_candidate)
        best_fitness = self.candidate_util.get_candidate_fitness(
            best_candidate)

        fitness = best_fitness if best_fitness is None else \
            round(best_fitness, 4)

        # Determine the output file name basis

        # XXX Use fitness for now.
        #     Later on can address multi-objective goals.
        metric_name = "fitness"
        if generation is not None:
            # Put the file in the gen_NN directory.
            # Call it best_candidate to match the best_candidate.json
            # that gets put there

            base_name = "best_{0}_candidate".format(metric_name)
            filer = GenerationFiler(self.experiment_dir, generation)
            base_path = filer.get_generation_file(base_name)
        else:
            # We do not have a generation that we know about so write out
            # the old-school file name.
            # XXX Not entirely sure when this path would be taken
            base_name = "F{0}_ID-{1}_{2}best_{3}".format(
                fitness, best_id, suffix, metric_name)
            filer = ExperimentFiler(self.experiment_dir)
            base_path = filer.experiment_file(base_name)

        # NetworkVisualizers use the build_training_model() which requires
        # a data_dict of file keys -> file paths to exist.  Domains that
        # wish to visualize their networks that use the data_dict will
        # need to deal with a None value for data dict in the visualization
        # case.
        data_dict = None

        visualizer = NetworkMultiVisualizer(self.master_config,
                                            data_dict,
                                            base_path,
                                            logger=self.logger)
        visualizer.visualize(best_candidate)