def __init__(self, experiment_dir, candidate_id=None, generation=None,
                    base_name=None, logger=None):
        """
        Constructor.

        :param experiment_dir: the directory where experiment results go
        :param candidate_id: the id of the candidate
        :param generation: the generation number for the candidate
        :param base_name: a full base name to use (minus extension)
        :param logger: The logger to use for messaging
        """

        use_base_name = base_name
        if use_base_name is None:
            use_base_name = self.get_base_name(candidate_id, generation)

        use_dir = experiment_dir
        if generation is not None:
            filer = GenerationFiler(experiment_dir, generation)
            use_dir = filer.get_generation_dir()

        dictionary_converter = CandidateDictionaryConverter(
                                        allow_restore_none=False)
        super(CandidatePersistence, self).__init__(
                            base_name=use_base_name,
                            folder=use_dir,
                            dictionary_converter=dictionary_converter,
                            must_exist=True,
                            logger=logger)
def write_training_runs(args):

    filer = ExperimentFiler(args.experiment_dir)
    csv_file = filer.experiment_file("training_runs.csv")
    with open(csv_file, 'wb') as my_file:
        my_file.write('Generation, %s, Fitness\n' % resolve_alt_objective(args))

    results_files = get_results_files(args)

    generation_filer = GenerationFiler(args.experiment_dir)
    for results_file in results_files:
        generation = generation_filer.get_generation_from_path(results_file)
        persistence = ResultsDictPersistence(args.experiment_dir, generation,
                                             logger=None)
        results_dict = persistence.restore()

        if len(results_dict) == 0:
            # File not found
            continue

        for key in results_dict.keys():
            result = results_dict[key]
            try:
                # XXX Use FitnessObjectives prepared from config?
                alt_objective = result['metrics'][args.alt_objective]
                fitness = result['metrics']['fitness'] # XXX not kosher
            except Exception:
                try:
                    alt_objective = result['metrics'][args.alt_objective]
                    fitness = result['fitness'] # XXX not kosher
                except Exception as exception:
                    if args.alt_objective == "num_params":
                        fitness = result['fitness'] # XXX not kosher

                        # XXX What generates this params file?
                        cache_file = generation_filer.get_generation_file(
                                        "candidate_{0}.params".format(key))

                        if os.path.exists(cache_file):
                            with open(cache_file, 'rb') as my_file:
                                alt_objective = my_file.read()
                        else:
                            undefined = 0
                            k = undefined  # XXX
                            print("Extracting num params from network {}".format(k))
                            model = get_model(args.experiment_dir, key, generation)
                            alt_objective = str(model.count_params())
                            with open(cache_file, 'wb') as my_file:
                                my_file.write(alt_objective)
                    else:
                        raise exception

            if args.alt_objective == 'training_time':
                alt_objective = str(float(alt_objective) / 3600.0)
            with open(csv_file, 'ab') as my_file:
                line = '%s %s %s\n' % (generation, alt_objective, fitness)
                my_file.write(line)
    return csv_file
Пример #3
0
    def run(self):
        """
        Entry point for the session task execution to take over.
        """

        print("Running AnalyzeResultsSessionTask")

        # Read the results files for each generation.
        # These are written out by write_results_file()

        filer = ExperimentFiler(self.experiment_dir)
        glob_spec = filer.experiment_file("gen_*/results_dict.json")
        results_dicts = glob.glob(glob_spec)

        worker_results_files = sorted(results_dicts)
        if len(worker_results_files) <= 0:
            raise ValueError("No results_dicts.json files found in {0}".format(
                self.experiment_dir))

        # No generation number needed, we are only looking to
        # parse path components with it.
        generation_filer = GenerationFiler(self.experiment_dir)

        worker_results_dict = {}
        for worker_results_file in worker_results_files:

            generation = generation_filer.get_generation_from_path(
                worker_results_file)

            # This slurps in results information returned by workers from all
            # candidates of a specific generation
            results_dict_persistence = ResultsDictPersistence(
                self.experiment_dir, generation, logger=self.logger)
            one_worker_results_dict = results_dict_persistence.restore()

            # results_dict here will have one entry per candidate over all
            # generations
            worker_results_dict.update(one_worker_results_dict)

        fitness_objective = self.fitness_objectives.get_fitness_objectives(0)
        is_maximize = fitness_objective.is_maximize_fitness()
        best_result = sorted(list(worker_results_dict.items()),
                            key=lambda \
                            x: max(self.candidate_util.get_candidate_fitness(x)),
                            reverse=is_maximize)[0]
        best_id = best_result.get('id')

        # Open the file of the best candidate.
        best_candidate_persistence = BestFitnessCandidatePersistence(
            self.experiment_dir, best_id, logger=self.logger)
        best_candidate = best_candidate_persistence.restore()

        best_id = self.candidate_util.get_candidate_id(best_candidate)

        self.draw_best_candidate_results(best_candidate,
                                         generation,
                                         suffix='abs')
Пример #4
0
    def draw_best_candidate_results(self,
                                    best_candidate,
                                    generation=None,
                                    suffix=''):
        """
        :param best_candidate: A candidate object comprising the best of a
                        generation.
        :param generation: Default value is None
        :param suffix: Default value is an empty string
        """
        experiment_config = self.master_config.get('experiment_config')
        if not experiment_config.get('visualize'):
            return

        best_id = self.candidate_util.get_candidate_id(best_candidate)
        best_fitness = self.candidate_util.get_candidate_fitness(
            best_candidate)

        fitness = best_fitness if best_fitness is None else \
            round(best_fitness, 4)

        # Determine the output file name basis

        # XXX Use fitness for now.
        #     Later on can address multi-objective goals.
        metric_name = "fitness"
        if generation is not None:
            # Put the file in the gen_NN directory.
            # Call it best_candidate to match the best_candidate.json
            # that gets put there

            base_name = "best_{0}_candidate".format(metric_name)
            filer = GenerationFiler(self.experiment_dir, generation)
            base_path = filer.get_generation_file(base_name)
        else:
            # We do not have a generation that we know about so write out
            # the old-school file name.
            # XXX Not entirely sure when this path would be taken
            base_name = "F{0}_ID-{1}_{2}best_{3}".format(
                fitness, best_id, suffix, metric_name)
            filer = ExperimentFiler(self.experiment_dir)
            base_path = filer.experiment_file(base_name)

        # NetworkVisualizers use the build_training_model() which requires
        # a data_dict of file keys -> file paths to exist.  Domains that
        # wish to visualize their networks that use the data_dict will
        # need to deal with a None value for data dict in the visualization
        # case.
        data_dict = None

        visualizer = NetworkMultiVisualizer(self.master_config,
                                            data_dict,
                                            base_path,
                                            logger=self.logger)
        visualizer.visualize(best_candidate)
Пример #5
0
    def __init__(self,
                 experiment_dir,
                 generation,
                 candidate_id,
                 timestamp,
                 logger=None):
        """
        Constructor.

        :param experiment_dir: the directory where experiment results go
        :param generation: the generation number of the results dict
        :param candidate_id: The id of the candidate that had the error
        :param timestamp: A double timestamp of when the error occurred.
        :param logger: A logger to send messaging to
        """

        filer = ExperimentFiler(experiment_dir)
        error_dir = filer.experiment_file("errors")

        ts_datetime = datetime.fromtimestamp(timestamp)
        time_format = '%Y-%m-%d-%H:%M:%S'
        time_string = ts_datetime.strftime(time_format)

        filer = GenerationFiler(experiment_dir, generation)
        gen_name = filer.get_generation_name()

        base_name = "evaluation_error_{0}_candidate_{1}_{2}".format(
            gen_name, candidate_id, time_string)

        super(EvaluationErrorPersistence, self).__init__(
            base_name=base_name,
            folder=error_dir,
            dictionary_converter=CandidateDictionaryConverter(),
            logger=logger)
Пример #6
0
    def __init__(self, experiment_dir, generation, timestamp, logger=None):
        """
        Constructor.

        :param experiment_dir: the directory where experiment results go
        :param generation: the generation number of the results dict
        :param timestamp: A double timestamp of when the error occurred.
        :param logger: A logger to send messaging to
        """

        filer = ExperimentFiler(experiment_dir)
        error_dir = filer.experiment_file("errors")

        ts_datetime = datetime.fromtimestamp(timestamp)
        time_format = '%Y-%m-%d-%H:%M:%S'
        time_string = ts_datetime.strftime(time_format)

        filer = GenerationFiler(experiment_dir, generation)
        gen_name = filer.get_generation_name()

        basename = "experiment_host_error_{0}_{1}".format(
            gen_name, time_string)

        dictionary_converter = PassThroughDictionaryConverter()
        factory = PersistenceFactory(object_type="string",
                                     dictionary_converter=dictionary_converter,
                                     logger=logger)
        self.dict_persistence = factory.create_persistence(
            error_dir,
            basename,
            persistence_mechanism=PersistenceMechanisms.LOCAL,
            serialization_format=SerializationFormats.TEXT,
            must_exist=False)
    def __init__(self, experiment_dir, generation, logger=None):
        """
        Constructor.

        :param experiment_dir: the directory where experiment results go
        :param generation: the generation number of the population results
        :param logger: A logger to send messaging to
        """

        filer = GenerationFiler(experiment_dir, generation)
        generation_dir = filer.get_generation_dir()

        super(PopulationResultsPersistence, self).__init__(
                base_name="population_results",
                folder=generation_dir,
                dictionary_converter=PopulationResultsDictionaryConverter(),
                logger=logger)
def visualize_training_runs(args):
    total_training_times = []
    longest_training_times = []
    average_training_times = []

    results_files = get_results_files(args)
    num_generations = len(results_files)

    generation_filer = GenerationFiler(args.experiment_dir)
    for results_file in results_files:
        generation = generation_filer.get_generation_from_path(results_file)
        persistence = ResultsDictPersistence(args.experiment_dir, generation,
                                            logger=None)
        results_dict = persistence.restore()

        if len(results_dict) == 0:
            # File not found
            continue

        times = []
        for key in results_dict.keys():
            result = results_dict[key]
            try:
                training_time = result['metrics']['training_time']
            except Exception:
                training_time = result['metrics']['training_time']
            times.append(training_time)
        total_training_times.append(np.sum(times) / 3600)
        longest_training_times.append(np.max(times) / 3600)
        average_training_times.append(np.mean(times) / 3600)

    plt.plot(total_training_times)
    plt.title('Total Training Machine Hours per Generation')
    plt.ylabel('Hours')
    plt.xlabel('Generation')
    plt.xlim(0, num_generations)

    filer = ExperimentFiler(args.experiment_dir)
    runs_total_file = filer.experiment_file("training_runs_total.png")
    plt.savefig(runs_total_file, bbox_inches='tight')
    plt.clf()

    plt.plot(longest_training_times)
    plt.title('Longest Training Machine Hours per Generation')
    plt.ylabel('Hours')
    plt.xlabel('Generation')
    plt.xlim(0, num_generations)

    runs_longest_file = filer.experiment_file("training_runs_longest.png")
    plt.savefig(runs_longest_file, bbox_inches='tight')
    plt.clf()

    plt.plot(average_training_times)
    plt.title('Mean Training Machine Hours per Generation')
    plt.ylabel('Hours')
    plt.xlabel('Generation')
    plt.xlim(0, num_generations)

    runs_avg_file = filer.experiment_file("training_runs_avg.png")
    plt.savefig(runs_avg_file, bbox_inches='tight')
    plt.clf()

    total_machine_hours = np.sum(total_training_times)
    print("Total machine hours used: {}".format(total_machine_hours))