def write_training_runs(args):

    filer = ExperimentFiler(args.experiment_dir)
    csv_file = filer.experiment_file("training_runs.csv")
    with open(csv_file, 'wb') as my_file:
        my_file.write('Generation, %s, Fitness\n' % resolve_alt_objective(args))

    results_files = get_results_files(args)

    generation_filer = GenerationFiler(args.experiment_dir)
    for results_file in results_files:
        generation = generation_filer.get_generation_from_path(results_file)
        persistence = ResultsDictPersistence(args.experiment_dir, generation,
                                             logger=None)
        results_dict = persistence.restore()

        if len(results_dict) == 0:
            # File not found
            continue

        for key in results_dict.keys():
            result = results_dict[key]
            try:
                # XXX Use FitnessObjectives prepared from config?
                alt_objective = result['metrics'][args.alt_objective]
                fitness = result['metrics']['fitness'] # XXX not kosher
            except Exception:
                try:
                    alt_objective = result['metrics'][args.alt_objective]
                    fitness = result['fitness'] # XXX not kosher
                except Exception as exception:
                    if args.alt_objective == "num_params":
                        fitness = result['fitness'] # XXX not kosher

                        # XXX What generates this params file?
                        cache_file = generation_filer.get_generation_file(
                                        "candidate_{0}.params".format(key))

                        if os.path.exists(cache_file):
                            with open(cache_file, 'rb') as my_file:
                                alt_objective = my_file.read()
                        else:
                            undefined = 0
                            k = undefined  # XXX
                            print("Extracting num params from network {}".format(k))
                            model = get_model(args.experiment_dir, key, generation)
                            alt_objective = str(model.count_params())
                            with open(cache_file, 'wb') as my_file:
                                my_file.write(alt_objective)
                    else:
                        raise exception

            if args.alt_objective == 'training_time':
                alt_objective = str(float(alt_objective) / 3600.0)
            with open(csv_file, 'ab') as my_file:
                line = '%s %s %s\n' % (generation, alt_objective, fitness)
                my_file.write(line)
    return csv_file
Пример #2
0
    def run(self):
        """
        Entry point for the session task execution to take over.
        """

        print("Running AnalyzeResultsSessionTask")

        # Read the results files for each generation.
        # These are written out by write_results_file()

        filer = ExperimentFiler(self.experiment_dir)
        glob_spec = filer.experiment_file("gen_*/results_dict.json")
        results_dicts = glob.glob(glob_spec)

        worker_results_files = sorted(results_dicts)
        if len(worker_results_files) <= 0:
            raise ValueError("No results_dicts.json files found in {0}".format(
                self.experiment_dir))

        # No generation number needed, we are only looking to
        # parse path components with it.
        generation_filer = GenerationFiler(self.experiment_dir)

        worker_results_dict = {}
        for worker_results_file in worker_results_files:

            generation = generation_filer.get_generation_from_path(
                worker_results_file)

            # This slurps in results information returned by workers from all
            # candidates of a specific generation
            results_dict_persistence = ResultsDictPersistence(
                self.experiment_dir, generation, logger=self.logger)
            one_worker_results_dict = results_dict_persistence.restore()

            # results_dict here will have one entry per candidate over all
            # generations
            worker_results_dict.update(one_worker_results_dict)

        fitness_objective = self.fitness_objectives.get_fitness_objectives(0)
        is_maximize = fitness_objective.is_maximize_fitness()
        best_result = sorted(list(worker_results_dict.items()),
                            key=lambda \
                            x: max(self.candidate_util.get_candidate_fitness(x)),
                            reverse=is_maximize)[0]
        best_id = best_result.get('id')

        # Open the file of the best candidate.
        best_candidate_persistence = BestFitnessCandidatePersistence(
            self.experiment_dir, best_id, logger=self.logger)
        best_candidate = best_candidate_persistence.restore()

        best_id = self.candidate_util.get_candidate_id(best_candidate)

        self.draw_best_candidate_results(best_candidate,
                                         generation,
                                         suffix='abs')
def visualize_training_runs(args):
    total_training_times = []
    longest_training_times = []
    average_training_times = []

    results_files = get_results_files(args)
    num_generations = len(results_files)

    generation_filer = GenerationFiler(args.experiment_dir)
    for results_file in results_files:
        generation = generation_filer.get_generation_from_path(results_file)
        persistence = ResultsDictPersistence(args.experiment_dir, generation,
                                            logger=None)
        results_dict = persistence.restore()

        if len(results_dict) == 0:
            # File not found
            continue

        times = []
        for key in results_dict.keys():
            result = results_dict[key]
            try:
                training_time = result['metrics']['training_time']
            except Exception:
                training_time = result['metrics']['training_time']
            times.append(training_time)
        total_training_times.append(np.sum(times) / 3600)
        longest_training_times.append(np.max(times) / 3600)
        average_training_times.append(np.mean(times) / 3600)

    plt.plot(total_training_times)
    plt.title('Total Training Machine Hours per Generation')
    plt.ylabel('Hours')
    plt.xlabel('Generation')
    plt.xlim(0, num_generations)

    filer = ExperimentFiler(args.experiment_dir)
    runs_total_file = filer.experiment_file("training_runs_total.png")
    plt.savefig(runs_total_file, bbox_inches='tight')
    plt.clf()

    plt.plot(longest_training_times)
    plt.title('Longest Training Machine Hours per Generation')
    plt.ylabel('Hours')
    plt.xlabel('Generation')
    plt.xlim(0, num_generations)

    runs_longest_file = filer.experiment_file("training_runs_longest.png")
    plt.savefig(runs_longest_file, bbox_inches='tight')
    plt.clf()

    plt.plot(average_training_times)
    plt.title('Mean Training Machine Hours per Generation')
    plt.ylabel('Hours')
    plt.xlabel('Generation')
    plt.xlim(0, num_generations)

    runs_avg_file = filer.experiment_file("training_runs_avg.png")
    plt.savefig(runs_avg_file, bbox_inches='tight')
    plt.clf()

    total_machine_hours = np.sum(total_training_times)
    print("Total machine hours used: {}".format(total_machine_hours))