Exemplo n.º 1
0
    def __init__(self,
                 dimension_sizes=(-1, -1, 10),
                 initial_size=50,
                 steps=1000,
                 allow_cpu=True,
                 mutation_rate=0.05,
                 copy_mutation_rate=0,
                 replace_mutation_rate=0,
                 zone_mutation_rate=0,
                 crossover_rate=0.4,
                 selection='random',
                 tournament_size=10,
                 benchmarking_function=None,
                 benchmarking_steps=0,
                 benchmark_before_selection=False,
                 benchmarking_n_keep=None,
                 benchmarking_time_threshold=None,
                 include_trivial_solutions=True,
                 show_score_plot=False,
                 plot_axes=(0, 2),
                 plot_animation=False,
                 animation_fps=1,
                 archive_log_period=None,
                 **kwargs):
        super().__init__(**kwargs)
        self.dimension_sizes = dimension_sizes
        self.initial_size = initial_size
        self.steps = steps
        self.allow_cpu = allow_cpu
        self.mutation_rate = mutation_rate
        self.copy_mutation_rate = copy_mutation_rate
        self.replace_mutation_rate = replace_mutation_rate
        self.zone_mutation_rate = zone_mutation_rate
        self.crossover_rate = crossover_rate
        self.selection = selection
        self.tournament_size = tournament_size
        self.include_trivial_solutions = include_trivial_solutions
        self.benchmarking_steps = benchmarking_steps
        self.benchmark_before_selection = benchmark_before_selection
        self.benchmarking_n_keep = benchmarking_n_keep
        self.benchmarking_time_threshold = benchmarking_time_threshold
        self.benchmarking_function = benchmarking_function
        self.plot_axes = plot_axes
        self.show_score_plot = show_score_plot
        self.plot_animation = plot_animation
        self.animation_fps = animation_fps
        self.archive_log_period = archive_log_period

        if self.archive_log_period is not None:
            if not os.path.exists(os.path.join(get_log_dir(), 'archive_logs')):
                os.makedirs(os.path.join(get_log_dir(), 'archive_logs'))

        self.worker_pool = None
Exemplo n.º 2
0
    def optimize(self, net_string, device_graph):
        n_devices = len(device_graph.devices)

        groups = self.create_colocation_groups(
            get_flattened_layer_names(net_string))

        with open(os.path.join(get_log_dir(), 'time_history.csv'), 'w') as f:
            f.write('generation, time\n')

        i = 0

        with tqdm(total=self.steps, disable=not self.verbose) as t:

            def eval_function(x):
                nonlocal i
                t.update(1)
                new_placement = [int(round(g)) for g in x]
                score = self.evaluate_placement(
                    apply_placement(net_string, new_placement, groups),
                    device_graph)

                i += 1
                return score

            def callback(x, score, context):
                if self.verbose:
                    log(f'[{i + 1}/{self.steps}] Found new minimum: {score:.2f}ms'
                        )
                with open(os.path.join(get_log_dir(), 'time_history.csv'),
                          'a') as f:
                    f.write(f'{i + 1}, {score}\n')

            result = scipy.optimize.dual_annealing(
                eval_function, [(0, n_devices - 1)] * len(groups),
                no_local_search=True,
                maxfun=self.steps,
                callback=callback)

        placement = [int(round(g)) for g in result.x]

        if self.verbose:
            log(f'Best found placement: {placement}')

        solution = json.dumps(apply_placement(net_string, placement, groups),
                              indent=4)

        with open(os.path.join(get_log_dir(), 'sa_solution.json'), 'w') as f:
            f.write(solution)

        return solution
Exemplo n.º 3
0
def run_experiment(lg_dir):
    set_log_dir(lg_dir)
    optimize_with_config(config_path)

    convert_to_placement(
        os.path.join(get_log_dir(), 'checkpoints'),
        os.path.join(get_log_dir(), 'checkpoints', 'placements'))

    benchmark_all_placements(os.path.join(get_log_dir(), 'checkpoints',
                                          'placements'),
                             os.path.join(get_log_dir(), 'batch_times.csv'),
                             model_type,
                             batches=BATCHES,
                             drop_batches=1,
                             format='long')
Exemplo n.º 4
0
def plot_results(sim_path, real_path):
    sim_results = pd.read_csv(sim_path, names=['generation', 'time'])
    sim_results['category'] = 'Simulated'
    real_results = pd.read_csv(real_path, names=['generation', 'time'])
    real_results['category'] = 'Benchmarked'

    all_results = pd.concat([sim_results, real_results], axis=0)

    cmap = sns.cubehelix_palette(2,
                                 start=.5,
                                 rot=-.75,
                                 light=0.5,
                                 reverse=True)
    sns.lineplot(x='generation',
                 y='time',
                 hue='category',
                 style='category',
                 data=all_results,
                 palette=cmap)
    plt.legend(['Simulated', 'Benchmarked'])
    plt.xlabel('Generation')
    plt.ylabel('Batch execution time (ms)')

    plt.tight_layout()
    plt.savefig(os.path.join(get_log_dir(), 'sim_real_comp.pdf'))

    plt.show()
    plt.close()
Exemplo n.º 5
0
 def callback(x, score, context):
     if self.verbose:
         log(f'[{i + 1}/{self.steps}] Found new minimum: {score:.2f}ms'
             )
     with open(os.path.join(get_log_dir(), 'time_history.csv'),
               'a') as f:
         f.write(f'{i + 1}, {score}\n')
Exemplo n.º 6
0
    def optimize(self, net_string, device_graph):
        n_devices = len(device_graph.devices)

        groups = self.create_colocation_groups(
            get_flattened_layer_names(net_string))

        placement = [randint(0, n_devices - 1)
                     for n in range(len(groups))]  # [0] * len(groups)
        score = self.evaluate_placement(
            apply_placement(net_string, placement, groups), device_graph)

        if self.score_save_period:
            with open(os.path.join(get_log_dir(), 'time_history.csv'),
                      'w') as f:
                f.write('step, time\n')

        for i in tqdm(range(self.steps), disable=not self.verbose):
            new_placement = placement[:]
            new_placement[randint(0,
                                  len(new_placement) - 1)] = randint(
                                      0, n_devices - 1)
            new_score = self.evaluate_placement(
                apply_placement(net_string, new_placement, groups),
                device_graph)

            if self.verbose and (i + 1) % self.verbose == 0:
                log(f'[{i + 1}/{self.steps}] Best run time: {score:,.2f}ms')

            if self.score_save_period and i % self.score_save_period == 0:
                with open(os.path.join(get_log_dir(), 'time_history.csv'),
                          'a') as f:
                    f.write(f'{i + 1}, {score}\n')

            if new_score != -1:
                if new_score < score or score == -1 \
                        or random() < expit((score - new_score) / self.temp(i)):
                    score = new_score
                    placement = new_placement

        solution = json.dumps(apply_placement(net_string, placement, groups),
                              indent=4)

        with open(os.path.join(get_log_dir(), 'sa_solution.json'), 'w') as f:
            f.write(solution)

        return solution
Exemplo n.º 7
0
def clear_processor_log():
    with open(os.path.join(get_log_dir(), 'processor_util.csv'), 'w') as f:
        headers = ['timestamp', 'step']
        for gpu in range(len(GPUtil.getGPUs())):
            headers.append(f'gpu:{gpu}')

        for cpu in range(psutil.cpu_count()):
            headers.append(f'cpu:{cpu}')

        f.write(f'{",".join(headers)}\n')
Exemplo n.º 8
0
        def log_archive(file_name):
            indices = list(np.argwhere(np.isfinite(archive_scores)))
            indices = sorted(indices,
                             key=lambda i: -archive_scores[i[0], i[1], i[2]])

            with open(os.path.join(get_log_dir(), 'archive_logs', file_name),
                      'w') as f:
                f.write('niche; time; placement\n')
                for i in indices:
                    niche = tuple(i)
                    time = 1 / archive_scores[i[0], i[1], i[2]]
                    placement = archive_individuals[i[0], i[1], i[2]].tolist()

                    f.write(f'{niche}; {time}; {placement}\n')
Exemplo n.º 9
0
def update_processor_log(step='null'):
    log_file = os.path.join(get_log_dir(), 'processor_util.csv')
    if not os.path.exists(log_file):
        clear_processor_log()

    with open(log_file, 'a') as f:
        log_line = [datetime.now().isoformat(), step]

        for gpu in GPUtil.getGPUs():
            log_line.append(f'{gpu.load*100:.2f}')

        for cpu_util in psutil.cpu_percent(percpu=True):
            log_line.append(f'{cpu_util:.2f}')

        f.write(f'{",".join(map(str, log_line))}\n')
Exemplo n.º 10
0
def optimize_with_config(config_path=None,
                         config=None,
                         verbose=True,
                         set_log_dir=False):
    assert config_path or config, 'Either a config path or a config dictionary must be provided'
    assert config is None or isinstance(config,
                                        dict), 'config must be a dictionary'

    if config_path:
        with open(config_path) as f:
            config = json.load(f)

    device_graph_path = config['device_graph_path']
    net_path = config['net_path']

    log_dir = config.get('log_dir', '')
    if log_dir and set_log_dir:
        exprimo.set_log_dir(log_dir)

    if verbose:
        log('\n\n\n')
        log('=' * 100)
        log('EXPRIMO OPTIMIZATION'.rjust(60))
        log('=' * 100)
        log()

    if verbose:
        if config_path:
            log(f'Using config path {config_path}')
        else:
            log('Using config provided as dictionary')

    args = config.get('optimizer_args', {})

    batches = args.get('batches', 1)
    pipeline_batches = args.get('pipeline_batches', 1)

    args['batches'] = batches
    args['pipeline_batches'] = pipeline_batches

    if 'benchmarking_function' in args and isinstance(
            args['benchmarking_function'], dict):
        args['benchmarking_function'] = create_benchmark_function(
            **args['benchmarking_function'])

    comp_penalty = args.get('simulator_comp_penalty', 1.0)
    comm_penalty = args.get('simulator_comm_penalty', 1.0)

    optimizers = {
        'random_hill_climber': RandomHillClimbingOptimizer,
        'hill_climber': HillClimbingOptimizer,
        'linear_search': LinearSearchOptimizer,
        'simulated_annealing': SimulatedAnnealingOptimizer,
        'sa': SimulatedAnnealingOptimizer,
        'scipy_sa': ScipySimulatedAnnealingOptimizer,
        'scipy_simulated_annealing': ScipySimulatedAnnealingOptimizer,
        'genetic_algorithm': GAOptimizer,
        'ga': GAOptimizer,
        'pso': ParticleSwarmOptimizer,
        'particle_swarm': ParticleSwarmOptimizer,
        'map_elites': MapElitesOptimizer,
        'map-elites': MapElitesOptimizer
    }

    if config['optimizer'] in ['sa', 'simulated_annealing'] and isinstance(
            args['temp_schedule'], list):
        tp = args['temp_schedule']
        args['temp_schedule'] = temp_schedules[tp[0]](*tp[1:])

    optimizer = optimizers[config['optimizer']](**args)

    device_graph = DeviceGraph.load_from_file(device_graph_path)
    with open(net_path) as f:
        net_string = f.read()

    if verbose:
        log(f'Optimizing {net_path} on {device_graph_path} using {optimizer}')
        log(args)
        log()

    best_net = optimizer.optimize(net_string, device_graph)
    net_dict = json.loads(best_net)

    graph = ComputationGraph()
    graph.load_from_string(best_net)
    simulator = Simulator(graph, device_graph)
    simulated_execution_time, events = simulator.simulate(
        batch_size=128,
        print_memory_usage=config.get('print_memory_usage', False),
        print_event_trace=config.get('print_event_trace', False),
        return_event_trace=True,
        batches=batches,
        pipeline_batches=pipeline_batches,
        comm_penalization=comm_penalty,
        comp_penalization=comp_penalty)

    if config.get('plot_event_trace', True):
        save_path = os.path.join(exprimo.get_log_dir(), 'event_trace.pdf')
        plot_event_trace(events, simulator, save_path=save_path)

    if verbose:
        log('\n')
        # print(f'Best discovered configuration: {[layer["device"] for layer in net_dict["layers"].values()]}')
        log(f'Simulated execution time: {simulated_execution_time:.2f}ms')

        if config.get('benchmark_solution', False) and args.get(
                'benchmarking_function', None):
            device_assignment = get_device_assignment(net_dict)
            time = args['benchmarking_function'](device_assignment)
            log(f'Benchmarked execution time: {time:.2f}ms')

    return best_net, simulated_execution_time
Exemplo n.º 11
0
def run_n_times(n):
    for i in tqdm(range(n)):
        run_experiment(lg_dir=os.path.join(log_dir, f'{i:03}'))

        plot_results(os.path.join(get_log_dir(), 'checkpoints', 'scores.csv'),
                     os.path.join(get_log_dir(), 'batch_times.csv'))
Exemplo n.º 12
0
    x, y = all_results['time_simulated'], all_results['time_benchmarked']
    plt.scatter(x, y)

    if plot_regression:
        x_min = np.min(x)
        x_max = np.max(x)
        x1 = np.arange(x_min, x_max, (x_max - x_min) / 1000)
        m, b = np.polyfit(x, y, 1)
        plt.plot(x1, m * x1 + b, c='orange', ls='--')

        corr = np.corrcoef(x, y)
        print(f'Pearson coefficient: R = {corr[0][1]}')

    plt.xlabel('Simulated batch time (ms)')
    plt.ylabel('Benchmarked batch time (ms)')

    plt.tight_layout()

    plt.savefig(os.path.join(lg_dir, 'scatter_plot.pdf'))
    plt.show()


if __name__ == '__main__':
    if repeats == 1:
        run_experiment(log_dir=log_dir)
        plot_results(os.path.join(get_log_dir(), 'checkpoints', 'scores.csv'),
                     os.path.join(get_log_dir(), 'batch_times.csv'))
    else:
        run_n_times(repeats)
Exemplo n.º 13
0
        def run_optimization(steps, benchmarking_function=None, start_step=0):
            nonlocal archive_individuals, archive_scores

            if self.verbose:
                if benchmarking_function:
                    log('Optimizing with benchmarking...')
                else:
                    log('Optimizing with simulator...')

            step_size = 1 if benchmarking_function else self.n_threads

            for i in tqdm(range(0, steps, step_size),
                          disable=not self.verbose):
                init_number = min(max(0, self.initial_size - i),
                                  self.n_threads)

                if self.include_trivial_solutions and i == 0:
                    candidates = create_candidates(init_number,
                                                   create_trivial=True,
                                                   create_random=True)
                else:
                    candidates = create_candidates(init_number,
                                                   create_random=True)
                if init_number > 0:
                    candidates += create_candidates(
                        self.n_threads - init_number,
                        selectable_candidates=candidates[:])
                else:
                    candidates += create_candidates(self.n_threads -
                                                    init_number)

                if benchmarking_function:
                    eval_results = [
                        benchmark(candidates[0], benchmarking_function)
                    ]
                elif self.n_threads == 1:
                    eval_results = [evaluate(candidates[0])]
                else:
                    fn_args = zip(((create_description(c), c)
                                   for c in candidates), repeat(net_string),
                                  repeat(groups), repeat(device_graph),
                                  repeat(self.pipeline_batches),
                                  repeat(self.batches),
                                  repeat(self.simulator_comp_penalty),
                                  repeat(self.simulator_comm_penalty),
                                  repeat(self.device_memory_utilization))

                    eval_results = self.worker_pool.starmap(_evaluate, fn_args)

                for result in eval_results:
                    score, description, individual = result

                    previous_elite_score = archive_scores[description[0],
                                                          description[1],
                                                          description[2]]
                    if np.isnan(previous_elite_score
                                ) or previous_elite_score < score:
                        archive_scores[description[0], description[1],
                                       description[2]] = score
                        archive_individuals[description[0], description[1],
                                            description[2], :] = individual

                if self.verbose and (i + 1) % self.verbose < step_size:
                    best_time = 1 / np.nanmax(archive_scores)
                    log(f'[{i + 1}/{steps}] Best time: {best_time:.4f}ms')

                if self.score_save_period and (i % self.score_save_period == 0
                                               or steps - i < step_size):
                    best_time = 1 / np.nanmax(archive_scores)
                    with open(os.path.join(get_log_dir(), 'time_history.csv'),
                              'a') as f:
                        f.write(f'{i + start_step + 1}, {best_time}\n')

                if self.archive_log_period and (
                        i + 1) % self.archive_log_period < step_size:
                    log_archive(f'step_{i + start_step + 1:06}.csv')
Exemplo n.º 14
0
    def optimize(self, net_string, device_graph, return_full_archive=False):

        if self.n_threads > 1:
            self.worker_pool = Pool(self.n_threads)

        n_devices = len(device_graph.devices)
        groups = self.create_colocation_groups(
            get_flattened_layer_names(net_string))

        if self.dimension_sizes[0] == -1:
            self.dimension_sizes[0] = n_devices

        if self.dimension_sizes[1] == -1:
            self.dimension_sizes[1] = n_devices

        if self.dimension_sizes[2] == -1:
            comp_graph = ComputationGraph()
            comp_graph.load_from_string(net_string)
            _, max_jumps = comp_graph.get_number_of_jumps(
                return_max_jumps=True)
            self.dimension_sizes[2] = max_jumps

        archive_scores = np.empty(self.dimension_sizes)
        archive_scores[:] = np.NaN
        archive_individuals = np.zeros(list(self.dimension_sizes) +
                                       [len(groups)],
                                       dtype=int)

        def evaluate(individual):
            return _evaluate(individual, net_string, groups, device_graph,
                             self.dimension_sizes, self.pipeline_batches,
                             self.batches, self.simulator_comp_penalty,
                             self.simulator_comm_penalty,
                             self.device_memory_utilization)

        def mutate(individual):
            new_individual = []
            if random.random() < self.replace_mutation_rate:
                devices_present = list(set(individual))
                i1 = random.choice(devices_present)
                i2 = random.choice(devices_present)

                new_individual = [i2 if i == i1 else i for i in individual]
            elif random.random() < self.zone_mutation_rate:
                split1 = random.randint(0, len(individual) - 1)
                split2 = split1 + min(np.random.geometric(0.2),
                                      len(individual) - split1)
                dev = random.randint(0 if self.allow_cpu else 1, n_devices - 1)
                new_individual = individual[:split1] + [dev] * (
                    split2 - split1) + individual[split2:]
            else:
                for i, gene in enumerate(individual):
                    if random.random() < self.copy_mutation_rate and i > 0:
                        new_individual.append(individual[i - 1])
                    elif random.random() < self.mutation_rate:
                        if self.allow_cpu:
                            new_individual.append(
                                random.randint(0, n_devices - 1))
                        else:
                            new_individual.append(
                                random.randint(1, n_devices - 1))
                    else:
                        new_individual.append(gene)

            return new_individual

        def crossover(parent1, parent2):
            crossover_point = random.randint(1, len(parent1) - 1)
            return parent1[:crossover_point] + parent2[crossover_point:]

        def create_candidates(n,
                              create_random=False,
                              create_trivial=False,
                              selectable_candidates=None):
            if n <= 0:
                return []
            candidates = []
            if create_trivial:
                candidates.extend([[i] * len(groups)
                                   for i in range(1, n_devices)])
                n -= n_devices - 1

                if self.allow_cpu:
                    candidates.append([0] * len(groups))
                    n -= 1

            if create_random:
                while len(candidates) < n:
                    candidates.append(
                        generate_random_placement(
                            len(groups),
                            n_devices,
                            allow_device_0=self.allow_cpu))
            else:
                selectable_indices = np.argwhere(np.isfinite(archive_scores))
                # selectable_indices = sorted(selectable_indices, key=lambda x: -archive_scores[x[0], x[1], x[2]])
                while len(candidates) < n:
                    c = []
                    if selectable_candidates:
                        for _ in range(1 + int(
                                random.random() < self.crossover_rate)):
                            c.append(random.choice(selectable_candidates))
                    else:
                        if self.selection == 'random':
                            for _ in range(1 + int(
                                    random.random() < self.crossover_rate)):
                                idx = random.choice(selectable_indices)
                                c.append(
                                    archive_individuals[idx[0], idx[1],
                                                        idx[2], :].tolist())
                        elif self.selection == 'tournament':
                            idx = []
                            t = min(self.tournament_size,
                                    len(selectable_indices))
                            while len(idx) < 1 + int(
                                    random.random() < self.crossover_rate):
                                competitors = random.sample(
                                    selectable_indices.tolist(), t)
                                winner = max(competitors,
                                             key=lambda x: archive_scores[x[
                                                 0], x[1], x[2]])
                                idx.append(winner)
                            for i in idx:
                                c.append(archive_individuals[i[0], i[1],
                                                             i[2], :].tolist())

                    if len(c) == 2:
                        candidate = crossover(*c)
                    else:
                        candidate = c[0]
                    candidate = mutate(candidate)
                    candidates.append(candidate)

            return candidates

        def create_description(individual):
            c = Counter(individual)
            device_mode = c.most_common(1)[0][0]
            device_mode = round((device_mode / len(device_graph.devices)) *
                                self.dimension_sizes[0])

            used_devices = round(
                ((len(set(individual)) - 1) /
                 (len(device_graph.devices))) * self.dimension_sizes[1])

            comp_graph_dict = apply_placement(net_string, individual, groups)
            comp_graph = ComputationGraph()
            comp_graph.load_from_string(json.dumps(comp_graph_dict))

            num_jumps, max_jumps = comp_graph.get_number_of_jumps(
                return_max_jumps=True)
            num_jumps = round(
                (num_jumps / max_jumps) * (self.dimension_sizes[2] - 1))

            return (device_mode, used_devices, num_jumps)

        def benchmark(individual, benchmarking_function):
            device_assignment = get_device_assignment(
                apply_placement(net_string, individual, groups))
            time, memory_overflow = benchmarking_function(
                device_assignment, return_memory_overflow=True)

            description = create_description(individual)

            # Time is set to -1 if memory overflows - but we check with memory_overflow instead
            time = max(time, 0)

            if memory_overflow == -1:
                memory_overflow = 1

            if memory_overflow > 0:
                time += memory_overflow * 10**9 * 1

            return 1 / time, description, individual

        def reevaluate_archive(benchmarking_function=None,
                               n_keep=None,
                               time_threshold=None):
            indices = list(np.argwhere(np.isfinite(archive_scores)))

            if time_threshold:
                indices = [
                    i for i in indices
                    if archive_scores[i[0], i[1], i[2]] >= 1 / time_threshold
                ]

            if n_keep:
                indices = sorted(
                    indices, key=lambda i: -archive_scores[i[0], i[1], i[2]])
                indices = indices[:n_keep]

            assert len(
                indices), 'No solutions fulfill the specified requirements'

            archive_scores[:] = np.NaN
            if self.verbose:
                if n_keep:
                    log(f'Reevaluating {n_keep} best individuals in archive (and throwing away the rest)'
                        )
                else:
                    log('Reevaluating all individuals in archive')
                if time_threshold:
                    log(f'Time threshold: {time_threshold}ms')
            for i in tqdm(indices, disable=not self.verbose):
                individual = archive_individuals[i[0], i[1], i[2], :].tolist()
                if benchmarking_function:
                    archive_scores[i[0], i[1],
                                   i[2]] = benchmark(individual,
                                                     benchmarking_function)[0]
                else:
                    archive_scores[i[0], i[1], i[2]] = evaluate(individual)[0]

        def log_archive(file_name):
            indices = list(np.argwhere(np.isfinite(archive_scores)))
            indices = sorted(indices,
                             key=lambda i: -archive_scores[i[0], i[1], i[2]])

            with open(os.path.join(get_log_dir(), 'archive_logs', file_name),
                      'w') as f:
                f.write('niche; time; placement\n')
                for i in indices:
                    niche = tuple(i)
                    time = 1 / archive_scores[i[0], i[1], i[2]]
                    placement = archive_individuals[i[0], i[1], i[2]].tolist()

                    f.write(f'{niche}; {time}; {placement}\n')

        def run_optimization(steps, benchmarking_function=None, start_step=0):
            nonlocal archive_individuals, archive_scores

            if self.verbose:
                if benchmarking_function:
                    log('Optimizing with benchmarking...')
                else:
                    log('Optimizing with simulator...')

            step_size = 1 if benchmarking_function else self.n_threads

            for i in tqdm(range(0, steps, step_size),
                          disable=not self.verbose):
                init_number = min(max(0, self.initial_size - i),
                                  self.n_threads)

                if self.include_trivial_solutions and i == 0:
                    candidates = create_candidates(init_number,
                                                   create_trivial=True,
                                                   create_random=True)
                else:
                    candidates = create_candidates(init_number,
                                                   create_random=True)
                if init_number > 0:
                    candidates += create_candidates(
                        self.n_threads - init_number,
                        selectable_candidates=candidates[:])
                else:
                    candidates += create_candidates(self.n_threads -
                                                    init_number)

                if benchmarking_function:
                    eval_results = [
                        benchmark(candidates[0], benchmarking_function)
                    ]
                elif self.n_threads == 1:
                    eval_results = [evaluate(candidates[0])]
                else:
                    fn_args = zip(((create_description(c), c)
                                   for c in candidates), repeat(net_string),
                                  repeat(groups), repeat(device_graph),
                                  repeat(self.pipeline_batches),
                                  repeat(self.batches),
                                  repeat(self.simulator_comp_penalty),
                                  repeat(self.simulator_comm_penalty),
                                  repeat(self.device_memory_utilization))

                    eval_results = self.worker_pool.starmap(_evaluate, fn_args)

                for result in eval_results:
                    score, description, individual = result

                    previous_elite_score = archive_scores[description[0],
                                                          description[1],
                                                          description[2]]
                    if np.isnan(previous_elite_score
                                ) or previous_elite_score < score:
                        archive_scores[description[0], description[1],
                                       description[2]] = score
                        archive_individuals[description[0], description[1],
                                            description[2], :] = individual

                if self.verbose and (i + 1) % self.verbose < step_size:
                    best_time = 1 / np.nanmax(archive_scores)
                    log(f'[{i + 1}/{steps}] Best time: {best_time:.4f}ms')

                if self.score_save_period and (i % self.score_save_period == 0
                                               or steps - i < step_size):
                    best_time = 1 / np.nanmax(archive_scores)
                    with open(os.path.join(get_log_dir(), 'time_history.csv'),
                              'a') as f:
                        f.write(f'{i + start_step + 1}, {best_time}\n')

                if self.archive_log_period and (
                        i + 1) % self.archive_log_period < step_size:
                    log_archive(f'step_{i + start_step + 1:06}.csv')

        if self.score_save_period:
            with open(os.path.join(get_log_dir(), 'time_history.csv'),
                      'w') as f:
                f.write('step, time\n')

        run_optimization(self.steps)

        if self.worker_pool:
            self.worker_pool.close()

        if self.archive_log_period is not None:
            log_archive('1_simulation_finished.csv')

        if self.benchmarking_steps > 0 or self.benchmark_before_selection:
            reevaluate_archive(self.benchmarking_function,
                               n_keep=self.benchmarking_n_keep,
                               time_threshold=self.benchmarking_time_threshold)

            if self.archive_log_period is not None:
                log_archive('2_reevaluated.csv')

        if self.benchmarking_steps > 0:
            run_optimization(self.benchmarking_steps,
                             self.benchmarking_function, self.steps)
            log_archive('3_benchmarking_finished.csv')

        if self.show_score_plot:
            if self.verbose:
                log('Plotting archive scores...', end='')
            graph = ComputationGraph()
            graph.load_from_string(net_string)
            _, max_jumps = graph.get_number_of_jumps(return_max_jumps=True)
            plot_map_elites_archive(archive_scores,
                                    n_devices,
                                    max_jumps,
                                    self.plot_axes,
                                    save_path=os.path.join(
                                        get_log_dir(), 'archive_plot.pdf'))
            if self.verbose:
                log('Done')

        if self.plot_animation:
            if not self.archive_log_period and self.verbose:
                log('self.plot_animation was set to True, but archive logging was not enabled. '
                    'Skipping animation plot.')
            else:
                if self.verbose:
                    log('Plotting archive animation...', end='')
                paths = glob(
                    os.path.join(get_log_dir(), 'archive_logs', 'step_*.csv'))
                plot_archive_animation(
                    paths,
                    (os.path.join(get_log_dir(), 'archive_animation.mp4'),
                     os.path.join(get_log_dir(), 'archive_animation.gif')),
                    self.dimension_sizes,
                    n_devices=n_devices,
                    max_jumps=max_jumps,
                    axes=self.plot_axes,
                    fps=self.animation_fps)
                if self.verbose:
                    log('Done')

        if return_full_archive:
            return archive_scores, archive_individuals

        best_index = np.nanargmax(archive_scores)
        best_individual = archive_individuals.reshape(
            (-1, len(groups)))[best_index]

        if self.verbose:
            log(f'Best individual: {best_individual.tolist()}')

        solution = json.dumps(apply_placement(net_string,
                                              best_individual.tolist(),
                                              groups),
                              indent=4)

        with open(os.path.join(get_log_dir(), 'me_solution.json'), 'w') as f:
            f.write(solution)

        return solution