Exemplo n.º 1
0
 def callback(x, score, context):
     if self.verbose:
         log(f'[{i + 1}/{self.steps}] Found new minimum: {score:.2f}ms'
             )
     with open(os.path.join(get_log_dir(), 'time_history.csv'),
               'a') as f:
         f.write(f'{i + 1}, {score}\n')
Exemplo n.º 2
0
    def optimize(self, net_string, device_graph):
        n_devices = len(device_graph.devices)

        groups = self.create_colocation_groups(
            get_flattened_layer_names(net_string))

        with open(os.path.join(get_log_dir(), 'time_history.csv'), 'w') as f:
            f.write('generation, time\n')

        i = 0

        with tqdm(total=self.steps, disable=not self.verbose) as t:

            def eval_function(x):
                nonlocal i
                t.update(1)
                new_placement = [int(round(g)) for g in x]
                score = self.evaluate_placement(
                    apply_placement(net_string, new_placement, groups),
                    device_graph)

                i += 1
                return score

            def callback(x, score, context):
                if self.verbose:
                    log(f'[{i + 1}/{self.steps}] Found new minimum: {score:.2f}ms'
                        )
                with open(os.path.join(get_log_dir(), 'time_history.csv'),
                          'a') as f:
                    f.write(f'{i + 1}, {score}\n')

            result = scipy.optimize.dual_annealing(
                eval_function, [(0, n_devices - 1)] * len(groups),
                no_local_search=True,
                maxfun=self.steps,
                callback=callback)

        placement = [int(round(g)) for g in result.x]

        if self.verbose:
            log(f'Best found placement: {placement}')

        solution = json.dumps(apply_placement(net_string, placement, groups),
                              indent=4)

        with open(os.path.join(get_log_dir(), 'sa_solution.json'), 'w') as f:
            f.write(solution)

        return solution
Exemplo n.º 3
0
def benchmark_all_placements(placement_directory, results_file, model_type, generation_divisible_by=None, last_gen=None,
                             verbose=False, batches=50, drop_batches=0, device_map=None, gpu_memory_limit=None,
                             drop_last=True, format='wide'):
    with open(results_file, 'w') as f:
        f.write('')

    def generation_filter(file):
        if not file.endswith('.json') or not file.startswith('gen_'):
            return False

        generation = int(file.replace('gen_', '').replace('.json', ''))

        divisible_by = generation_divisible_by or 1

        if last_gen:
            return generation % divisible_by == 0 and generation <= last_gen

        return generation % divisible_by == 0

    dir_list = os.listdir(placement_directory)
    dir_list = list(filter(generation_filter, dir_list))

    for i, file in enumerate(tqdm(dir_list)):
        with open(os.path.join(placement_directory, file)) as f:
            placement = json.load(f)

        generation = int(file.replace('gen_', '').replace('.json', ''))

        if verbose:
            log(f'Benchmarking placement {i+1}/{len(dir_list)}: {file}... ', end='')

        batch_times = benchmark_with_placement(model_type, placement, batches=batches, drop_batches=drop_batches,
                                               device_map=device_map, gpu_memory_limit=gpu_memory_limit,
                                               drop_last=drop_last)

        with open(results_file, 'a') as f:
            if batch_times == -1:
                batch_times = itertools.repeat(batch_times, batches)

            if format == 'wide':
                f.write(f'{generation:04}, {",".join(map(lambda x: str(x), batch_times))}\n')
            elif format == 'long':
                for t in batch_times:
                    f.write(f'{generation:04}, {t}\n')
            else:
                raise RuntimeError('Invalid format')

        if verbose:
            log(f'{sum(batch_times)/len(batch_times):.2f}ms')
Exemplo n.º 4
0
    def optimize(self, net_string, device_graph):
        n_devices = len(device_graph.devices)

        groups = self.create_colocation_groups(
            get_flattened_layer_names(net_string))

        placement = [randint(0, n_devices - 1)
                     for n in range(len(groups))]  # [0] * len(groups)
        score = self.evaluate_placement(
            apply_placement(net_string, placement, groups), device_graph)

        if self.score_save_period:
            with open(os.path.join(get_log_dir(), 'time_history.csv'),
                      'w') as f:
                f.write('step, time\n')

        for i in tqdm(range(self.steps), disable=not self.verbose):
            new_placement = placement[:]
            new_placement[randint(0,
                                  len(new_placement) - 1)] = randint(
                                      0, n_devices - 1)
            new_score = self.evaluate_placement(
                apply_placement(net_string, new_placement, groups),
                device_graph)

            if self.verbose and (i + 1) % self.verbose == 0:
                log(f'[{i + 1}/{self.steps}] Best run time: {score:,.2f}ms')

            if self.score_save_period and i % self.score_save_period == 0:
                with open(os.path.join(get_log_dir(), 'time_history.csv'),
                          'a') as f:
                    f.write(f'{i + 1}, {score}\n')

            if new_score != -1:
                if new_score < score or score == -1 \
                        or random() < expit((score - new_score) / self.temp(i)):
                    score = new_score
                    placement = new_placement

        solution = json.dumps(apply_placement(net_string, placement, groups),
                              indent=4)

        with open(os.path.join(get_log_dir(), 'sa_solution.json'), 'w') as f:
            f.write(solution)

        return solution
Exemplo n.º 5
0
    def optimize(self, net_string, device_graph):
        n_devices = len(device_graph.devices)

        def generate_neighbours(placement):
            if n_devices == 1:
                return

            i = 0
            while i < len(placement):
                p = placement[i]
                if p < n_devices - 1:
                    n = placement[:]
                    n[i] = p + 1
                    yield n
                if p > 0:
                    n = placement[:]
                    n[i] = p - 1
                    yield n
                i += 1

        net = json.loads(net_string)
        groups = self.create_colocation_groups(
            get_flattened_layer_names(net_string))

        placement = generate_random_placement(len(groups), n_devices)
        score = self.evaluate_placement(
            apply_placement(net_string, placement, groups), device_graph)

        i = 0
        while True:
            i += 1
            if self.verbose:
                log(f'Iteration {i}. Best running time: {score:.2f}ms')

            for n in generate_neighbours(placement):
                new_score = self.evaluate_placement(
                    apply_placement(net_string, n, groups), device_graph)
                if (new_score < score or score == -1) and new_score != -1:
                    placement = n
                    score = new_score
                    break
            else:
                break

        return placement
Exemplo n.º 6
0
def run_variants(variants=('normal', 'limited', 'pipelined')):
    networks = 'resnet50', 'alexnet', 'inception'
    global BATCHES, PIPELINE_BATCHES, MEMORY_LIMITED, NETWORK, REPEATS
    for variation in tqdm(product(networks, variants)):
        log(f'Testing {variation[0]} network in {variation[1]} configuration')
        NETWORK = variation[0]
        if variation[1] == 'normal':
            BATCHES = 1
            PIPELINE_BATCHES = 1
            MEMORY_LIMITED = False
            REPEATS = 50
        elif variation[1] == 'limited':
            BATCHES = 1
            PIPELINE_BATCHES = 1
            MEMORY_LIMITED = True
            REPEATS = 50
        elif variation[1] == 'pipelined':
            BATCHES = 10
            PIPELINE_BATCHES = 4
            MEMORY_LIMITED = False
            REPEATS = 10

        run_optimizer_test()
Exemplo n.º 7
0
        def reevaluate_archive(benchmarking_function=None,
                               n_keep=None,
                               time_threshold=None):
            indices = list(np.argwhere(np.isfinite(archive_scores)))

            if time_threshold:
                indices = [
                    i for i in indices
                    if archive_scores[i[0], i[1], i[2]] >= 1 / time_threshold
                ]

            if n_keep:
                indices = sorted(
                    indices, key=lambda i: -archive_scores[i[0], i[1], i[2]])
                indices = indices[:n_keep]

            assert len(
                indices), 'No solutions fulfill the specified requirements'

            archive_scores[:] = np.NaN
            if self.verbose:
                if n_keep:
                    log(f'Reevaluating {n_keep} best individuals in archive (and throwing away the rest)'
                        )
                else:
                    log('Reevaluating all individuals in archive')
                if time_threshold:
                    log(f'Time threshold: {time_threshold}ms')
            for i in tqdm(indices, disable=not self.verbose):
                individual = archive_individuals[i[0], i[1], i[2], :].tolist()
                if benchmarking_function:
                    archive_scores[i[0], i[1],
                                   i[2]] = benchmark(individual,
                                                     benchmarking_function)[0]
                else:
                    archive_scores[i[0], i[1], i[2]] = evaluate(individual)[0]
Exemplo n.º 8
0
def optimize_with_config(config_path=None,
                         config=None,
                         verbose=True,
                         set_log_dir=False):
    assert config_path or config, 'Either a config path or a config dictionary must be provided'
    assert config is None or isinstance(config,
                                        dict), 'config must be a dictionary'

    if config_path:
        with open(config_path) as f:
            config = json.load(f)

    device_graph_path = config['device_graph_path']
    net_path = config['net_path']

    log_dir = config.get('log_dir', '')
    if log_dir and set_log_dir:
        exprimo.set_log_dir(log_dir)

    if verbose:
        log('\n\n\n')
        log('=' * 100)
        log('EXPRIMO OPTIMIZATION'.rjust(60))
        log('=' * 100)
        log()

    if verbose:
        if config_path:
            log(f'Using config path {config_path}')
        else:
            log('Using config provided as dictionary')

    args = config.get('optimizer_args', {})

    batches = args.get('batches', 1)
    pipeline_batches = args.get('pipeline_batches', 1)

    args['batches'] = batches
    args['pipeline_batches'] = pipeline_batches

    if 'benchmarking_function' in args and isinstance(
            args['benchmarking_function'], dict):
        args['benchmarking_function'] = create_benchmark_function(
            **args['benchmarking_function'])

    comp_penalty = args.get('simulator_comp_penalty', 1.0)
    comm_penalty = args.get('simulator_comm_penalty', 1.0)

    optimizers = {
        'random_hill_climber': RandomHillClimbingOptimizer,
        'hill_climber': HillClimbingOptimizer,
        'linear_search': LinearSearchOptimizer,
        'simulated_annealing': SimulatedAnnealingOptimizer,
        'sa': SimulatedAnnealingOptimizer,
        'scipy_sa': ScipySimulatedAnnealingOptimizer,
        'scipy_simulated_annealing': ScipySimulatedAnnealingOptimizer,
        'genetic_algorithm': GAOptimizer,
        'ga': GAOptimizer,
        'pso': ParticleSwarmOptimizer,
        'particle_swarm': ParticleSwarmOptimizer,
        'map_elites': MapElitesOptimizer,
        'map-elites': MapElitesOptimizer
    }

    if config['optimizer'] in ['sa', 'simulated_annealing'] and isinstance(
            args['temp_schedule'], list):
        tp = args['temp_schedule']
        args['temp_schedule'] = temp_schedules[tp[0]](*tp[1:])

    optimizer = optimizers[config['optimizer']](**args)

    device_graph = DeviceGraph.load_from_file(device_graph_path)
    with open(net_path) as f:
        net_string = f.read()

    if verbose:
        log(f'Optimizing {net_path} on {device_graph_path} using {optimizer}')
        log(args)
        log()

    best_net = optimizer.optimize(net_string, device_graph)
    net_dict = json.loads(best_net)

    graph = ComputationGraph()
    graph.load_from_string(best_net)
    simulator = Simulator(graph, device_graph)
    simulated_execution_time, events = simulator.simulate(
        batch_size=128,
        print_memory_usage=config.get('print_memory_usage', False),
        print_event_trace=config.get('print_event_trace', False),
        return_event_trace=True,
        batches=batches,
        pipeline_batches=pipeline_batches,
        comm_penalization=comm_penalty,
        comp_penalization=comp_penalty)

    if config.get('plot_event_trace', True):
        save_path = os.path.join(exprimo.get_log_dir(), 'event_trace.pdf')
        plot_event_trace(events, simulator, save_path=save_path)

    if verbose:
        log('\n')
        # print(f'Best discovered configuration: {[layer["device"] for layer in net_dict["layers"].values()]}')
        log(f'Simulated execution time: {simulated_execution_time:.2f}ms')

        if config.get('benchmark_solution', False) and args.get(
                'benchmarking_function', None):
            device_assignment = get_device_assignment(net_dict)
            time = args['benchmarking_function'](device_assignment)
            log(f'Benchmarked execution time: {time:.2f}ms')

    return best_net, simulated_execution_time
Exemplo n.º 9
0
        def run_optimization(steps, benchmarking_function=None, start_step=0):
            nonlocal archive_individuals, archive_scores

            if self.verbose:
                if benchmarking_function:
                    log('Optimizing with benchmarking...')
                else:
                    log('Optimizing with simulator...')

            step_size = 1 if benchmarking_function else self.n_threads

            for i in tqdm(range(0, steps, step_size),
                          disable=not self.verbose):
                init_number = min(max(0, self.initial_size - i),
                                  self.n_threads)

                if self.include_trivial_solutions and i == 0:
                    candidates = create_candidates(init_number,
                                                   create_trivial=True,
                                                   create_random=True)
                else:
                    candidates = create_candidates(init_number,
                                                   create_random=True)
                if init_number > 0:
                    candidates += create_candidates(
                        self.n_threads - init_number,
                        selectable_candidates=candidates[:])
                else:
                    candidates += create_candidates(self.n_threads -
                                                    init_number)

                if benchmarking_function:
                    eval_results = [
                        benchmark(candidates[0], benchmarking_function)
                    ]
                elif self.n_threads == 1:
                    eval_results = [evaluate(candidates[0])]
                else:
                    fn_args = zip(((create_description(c), c)
                                   for c in candidates), repeat(net_string),
                                  repeat(groups), repeat(device_graph),
                                  repeat(self.pipeline_batches),
                                  repeat(self.batches),
                                  repeat(self.simulator_comp_penalty),
                                  repeat(self.simulator_comm_penalty),
                                  repeat(self.device_memory_utilization))

                    eval_results = self.worker_pool.starmap(_evaluate, fn_args)

                for result in eval_results:
                    score, description, individual = result

                    previous_elite_score = archive_scores[description[0],
                                                          description[1],
                                                          description[2]]
                    if np.isnan(previous_elite_score
                                ) or previous_elite_score < score:
                        archive_scores[description[0], description[1],
                                       description[2]] = score
                        archive_individuals[description[0], description[1],
                                            description[2], :] = individual

                if self.verbose and (i + 1) % self.verbose < step_size:
                    best_time = 1 / np.nanmax(archive_scores)
                    log(f'[{i + 1}/{steps}] Best time: {best_time:.4f}ms')

                if self.score_save_period and (i % self.score_save_period == 0
                                               or steps - i < step_size):
                    best_time = 1 / np.nanmax(archive_scores)
                    with open(os.path.join(get_log_dir(), 'time_history.csv'),
                              'a') as f:
                        f.write(f'{i + start_step + 1}, {best_time}\n')

                if self.archive_log_period and (
                        i + 1) % self.archive_log_period < step_size:
                    log_archive(f'step_{i + start_step + 1:06}.csv')
Exemplo n.º 10
0
    def optimize(self, net_string, device_graph, return_full_archive=False):

        if self.n_threads > 1:
            self.worker_pool = Pool(self.n_threads)

        n_devices = len(device_graph.devices)
        groups = self.create_colocation_groups(
            get_flattened_layer_names(net_string))

        if self.dimension_sizes[0] == -1:
            self.dimension_sizes[0] = n_devices

        if self.dimension_sizes[1] == -1:
            self.dimension_sizes[1] = n_devices

        if self.dimension_sizes[2] == -1:
            comp_graph = ComputationGraph()
            comp_graph.load_from_string(net_string)
            _, max_jumps = comp_graph.get_number_of_jumps(
                return_max_jumps=True)
            self.dimension_sizes[2] = max_jumps

        archive_scores = np.empty(self.dimension_sizes)
        archive_scores[:] = np.NaN
        archive_individuals = np.zeros(list(self.dimension_sizes) +
                                       [len(groups)],
                                       dtype=int)

        def evaluate(individual):
            return _evaluate(individual, net_string, groups, device_graph,
                             self.dimension_sizes, self.pipeline_batches,
                             self.batches, self.simulator_comp_penalty,
                             self.simulator_comm_penalty,
                             self.device_memory_utilization)

        def mutate(individual):
            new_individual = []
            if random.random() < self.replace_mutation_rate:
                devices_present = list(set(individual))
                i1 = random.choice(devices_present)
                i2 = random.choice(devices_present)

                new_individual = [i2 if i == i1 else i for i in individual]
            elif random.random() < self.zone_mutation_rate:
                split1 = random.randint(0, len(individual) - 1)
                split2 = split1 + min(np.random.geometric(0.2),
                                      len(individual) - split1)
                dev = random.randint(0 if self.allow_cpu else 1, n_devices - 1)
                new_individual = individual[:split1] + [dev] * (
                    split2 - split1) + individual[split2:]
            else:
                for i, gene in enumerate(individual):
                    if random.random() < self.copy_mutation_rate and i > 0:
                        new_individual.append(individual[i - 1])
                    elif random.random() < self.mutation_rate:
                        if self.allow_cpu:
                            new_individual.append(
                                random.randint(0, n_devices - 1))
                        else:
                            new_individual.append(
                                random.randint(1, n_devices - 1))
                    else:
                        new_individual.append(gene)

            return new_individual

        def crossover(parent1, parent2):
            crossover_point = random.randint(1, len(parent1) - 1)
            return parent1[:crossover_point] + parent2[crossover_point:]

        def create_candidates(n,
                              create_random=False,
                              create_trivial=False,
                              selectable_candidates=None):
            if n <= 0:
                return []
            candidates = []
            if create_trivial:
                candidates.extend([[i] * len(groups)
                                   for i in range(1, n_devices)])
                n -= n_devices - 1

                if self.allow_cpu:
                    candidates.append([0] * len(groups))
                    n -= 1

            if create_random:
                while len(candidates) < n:
                    candidates.append(
                        generate_random_placement(
                            len(groups),
                            n_devices,
                            allow_device_0=self.allow_cpu))
            else:
                selectable_indices = np.argwhere(np.isfinite(archive_scores))
                # selectable_indices = sorted(selectable_indices, key=lambda x: -archive_scores[x[0], x[1], x[2]])
                while len(candidates) < n:
                    c = []
                    if selectable_candidates:
                        for _ in range(1 + int(
                                random.random() < self.crossover_rate)):
                            c.append(random.choice(selectable_candidates))
                    else:
                        if self.selection == 'random':
                            for _ in range(1 + int(
                                    random.random() < self.crossover_rate)):
                                idx = random.choice(selectable_indices)
                                c.append(
                                    archive_individuals[idx[0], idx[1],
                                                        idx[2], :].tolist())
                        elif self.selection == 'tournament':
                            idx = []
                            t = min(self.tournament_size,
                                    len(selectable_indices))
                            while len(idx) < 1 + int(
                                    random.random() < self.crossover_rate):
                                competitors = random.sample(
                                    selectable_indices.tolist(), t)
                                winner = max(competitors,
                                             key=lambda x: archive_scores[x[
                                                 0], x[1], x[2]])
                                idx.append(winner)
                            for i in idx:
                                c.append(archive_individuals[i[0], i[1],
                                                             i[2], :].tolist())

                    if len(c) == 2:
                        candidate = crossover(*c)
                    else:
                        candidate = c[0]
                    candidate = mutate(candidate)
                    candidates.append(candidate)

            return candidates

        def create_description(individual):
            c = Counter(individual)
            device_mode = c.most_common(1)[0][0]
            device_mode = round((device_mode / len(device_graph.devices)) *
                                self.dimension_sizes[0])

            used_devices = round(
                ((len(set(individual)) - 1) /
                 (len(device_graph.devices))) * self.dimension_sizes[1])

            comp_graph_dict = apply_placement(net_string, individual, groups)
            comp_graph = ComputationGraph()
            comp_graph.load_from_string(json.dumps(comp_graph_dict))

            num_jumps, max_jumps = comp_graph.get_number_of_jumps(
                return_max_jumps=True)
            num_jumps = round(
                (num_jumps / max_jumps) * (self.dimension_sizes[2] - 1))

            return (device_mode, used_devices, num_jumps)

        def benchmark(individual, benchmarking_function):
            device_assignment = get_device_assignment(
                apply_placement(net_string, individual, groups))
            time, memory_overflow = benchmarking_function(
                device_assignment, return_memory_overflow=True)

            description = create_description(individual)

            # Time is set to -1 if memory overflows - but we check with memory_overflow instead
            time = max(time, 0)

            if memory_overflow == -1:
                memory_overflow = 1

            if memory_overflow > 0:
                time += memory_overflow * 10**9 * 1

            return 1 / time, description, individual

        def reevaluate_archive(benchmarking_function=None,
                               n_keep=None,
                               time_threshold=None):
            indices = list(np.argwhere(np.isfinite(archive_scores)))

            if time_threshold:
                indices = [
                    i for i in indices
                    if archive_scores[i[0], i[1], i[2]] >= 1 / time_threshold
                ]

            if n_keep:
                indices = sorted(
                    indices, key=lambda i: -archive_scores[i[0], i[1], i[2]])
                indices = indices[:n_keep]

            assert len(
                indices), 'No solutions fulfill the specified requirements'

            archive_scores[:] = np.NaN
            if self.verbose:
                if n_keep:
                    log(f'Reevaluating {n_keep} best individuals in archive (and throwing away the rest)'
                        )
                else:
                    log('Reevaluating all individuals in archive')
                if time_threshold:
                    log(f'Time threshold: {time_threshold}ms')
            for i in tqdm(indices, disable=not self.verbose):
                individual = archive_individuals[i[0], i[1], i[2], :].tolist()
                if benchmarking_function:
                    archive_scores[i[0], i[1],
                                   i[2]] = benchmark(individual,
                                                     benchmarking_function)[0]
                else:
                    archive_scores[i[0], i[1], i[2]] = evaluate(individual)[0]

        def log_archive(file_name):
            indices = list(np.argwhere(np.isfinite(archive_scores)))
            indices = sorted(indices,
                             key=lambda i: -archive_scores[i[0], i[1], i[2]])

            with open(os.path.join(get_log_dir(), 'archive_logs', file_name),
                      'w') as f:
                f.write('niche; time; placement\n')
                for i in indices:
                    niche = tuple(i)
                    time = 1 / archive_scores[i[0], i[1], i[2]]
                    placement = archive_individuals[i[0], i[1], i[2]].tolist()

                    f.write(f'{niche}; {time}; {placement}\n')

        def run_optimization(steps, benchmarking_function=None, start_step=0):
            nonlocal archive_individuals, archive_scores

            if self.verbose:
                if benchmarking_function:
                    log('Optimizing with benchmarking...')
                else:
                    log('Optimizing with simulator...')

            step_size = 1 if benchmarking_function else self.n_threads

            for i in tqdm(range(0, steps, step_size),
                          disable=not self.verbose):
                init_number = min(max(0, self.initial_size - i),
                                  self.n_threads)

                if self.include_trivial_solutions and i == 0:
                    candidates = create_candidates(init_number,
                                                   create_trivial=True,
                                                   create_random=True)
                else:
                    candidates = create_candidates(init_number,
                                                   create_random=True)
                if init_number > 0:
                    candidates += create_candidates(
                        self.n_threads - init_number,
                        selectable_candidates=candidates[:])
                else:
                    candidates += create_candidates(self.n_threads -
                                                    init_number)

                if benchmarking_function:
                    eval_results = [
                        benchmark(candidates[0], benchmarking_function)
                    ]
                elif self.n_threads == 1:
                    eval_results = [evaluate(candidates[0])]
                else:
                    fn_args = zip(((create_description(c), c)
                                   for c in candidates), repeat(net_string),
                                  repeat(groups), repeat(device_graph),
                                  repeat(self.pipeline_batches),
                                  repeat(self.batches),
                                  repeat(self.simulator_comp_penalty),
                                  repeat(self.simulator_comm_penalty),
                                  repeat(self.device_memory_utilization))

                    eval_results = self.worker_pool.starmap(_evaluate, fn_args)

                for result in eval_results:
                    score, description, individual = result

                    previous_elite_score = archive_scores[description[0],
                                                          description[1],
                                                          description[2]]
                    if np.isnan(previous_elite_score
                                ) or previous_elite_score < score:
                        archive_scores[description[0], description[1],
                                       description[2]] = score
                        archive_individuals[description[0], description[1],
                                            description[2], :] = individual

                if self.verbose and (i + 1) % self.verbose < step_size:
                    best_time = 1 / np.nanmax(archive_scores)
                    log(f'[{i + 1}/{steps}] Best time: {best_time:.4f}ms')

                if self.score_save_period and (i % self.score_save_period == 0
                                               or steps - i < step_size):
                    best_time = 1 / np.nanmax(archive_scores)
                    with open(os.path.join(get_log_dir(), 'time_history.csv'),
                              'a') as f:
                        f.write(f'{i + start_step + 1}, {best_time}\n')

                if self.archive_log_period and (
                        i + 1) % self.archive_log_period < step_size:
                    log_archive(f'step_{i + start_step + 1:06}.csv')

        if self.score_save_period:
            with open(os.path.join(get_log_dir(), 'time_history.csv'),
                      'w') as f:
                f.write('step, time\n')

        run_optimization(self.steps)

        if self.worker_pool:
            self.worker_pool.close()

        if self.archive_log_period is not None:
            log_archive('1_simulation_finished.csv')

        if self.benchmarking_steps > 0 or self.benchmark_before_selection:
            reevaluate_archive(self.benchmarking_function,
                               n_keep=self.benchmarking_n_keep,
                               time_threshold=self.benchmarking_time_threshold)

            if self.archive_log_period is not None:
                log_archive('2_reevaluated.csv')

        if self.benchmarking_steps > 0:
            run_optimization(self.benchmarking_steps,
                             self.benchmarking_function, self.steps)
            log_archive('3_benchmarking_finished.csv')

        if self.show_score_plot:
            if self.verbose:
                log('Plotting archive scores...', end='')
            graph = ComputationGraph()
            graph.load_from_string(net_string)
            _, max_jumps = graph.get_number_of_jumps(return_max_jumps=True)
            plot_map_elites_archive(archive_scores,
                                    n_devices,
                                    max_jumps,
                                    self.plot_axes,
                                    save_path=os.path.join(
                                        get_log_dir(), 'archive_plot.pdf'))
            if self.verbose:
                log('Done')

        if self.plot_animation:
            if not self.archive_log_period and self.verbose:
                log('self.plot_animation was set to True, but archive logging was not enabled. '
                    'Skipping animation plot.')
            else:
                if self.verbose:
                    log('Plotting archive animation...', end='')
                paths = glob(
                    os.path.join(get_log_dir(), 'archive_logs', 'step_*.csv'))
                plot_archive_animation(
                    paths,
                    (os.path.join(get_log_dir(), 'archive_animation.mp4'),
                     os.path.join(get_log_dir(), 'archive_animation.gif')),
                    self.dimension_sizes,
                    n_devices=n_devices,
                    max_jumps=max_jumps,
                    axes=self.plot_axes,
                    fps=self.animation_fps)
                if self.verbose:
                    log('Done')

        if return_full_archive:
            return archive_scores, archive_individuals

        best_index = np.nanargmax(archive_scores)
        best_individual = archive_individuals.reshape(
            (-1, len(groups)))[best_index]

        if self.verbose:
            log(f'Best individual: {best_individual.tolist()}')

        solution = json.dumps(apply_placement(net_string,
                                              best_individual.tolist(),
                                              groups),
                              indent=4)

        with open(os.path.join(get_log_dir(), 'me_solution.json'), 'w') as f:
            f.write(solution)

        return solution
Exemplo n.º 11
0
def benchmark_with_placement(model_type, placement='cuda:0', batches=50, drop_batches=1, lr=0.01, verbose=False,
                             device_map=None, gpu_memory_limit=None, return_memory_overflow=False,
                             drop_last=True):
    if verbose:
        print('Starting benchmark...')

    if model_type.lower() in ['resnet', 'resnet50', 'resnet-50']:
        model_type = 'resnet50'
    elif model_type.lower() in ['inception', 'inception_v3', 'inceptionv3']:
        model_type = 'inception'
    elif model_type.lower() in ['alexnet', 'alex', 'alex_v2']:
        model_type = 'alexnet'

    model, criterion, optimizer, input_device, output_device = load_model_with_placement(model_type, placement, lr=lr,
                                                                                         device_map=device_map)

    model.train()
    batch_times = []

    preprocess = transforms.Compose([
        transforms.ToTensor(),
        transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
    ])

    if model_type in ['resnet50', 'alexnet']:
        dataset = torchvision.datasets.FakeData(transform=preprocess, size=500)
    elif model_type == 'inception':
        dataset = torchvision.datasets.FakeData(transform=preprocess, image_size=(3, 299, 299), size=500)

    train_loader = torch.utils.data.DataLoader(
        dataset, batch_size=BATCH_SIZE, shuffle=True, drop_last=drop_last
    )

    b = 0
    while b < batches + drop_batches:
        for data in train_loader:
            if verbose:
                print(f'Batch {b + 1}/{batches + drop_batches}', end='')

            memory_exceeded = False
            memory_overflow = 0

            if gpu_memory_limit:
                for i in range(torch.cuda.device_count()):
                    torch.cuda.reset_max_memory_allocated(device=torch.device(f'cuda:{i}'))

            try:
                torch.cuda.synchronize()
                data = data[0].to(input_device), data[1].to(output_device)

                start = time.time()
                if model_type == 'inception':
                    train_single_batch_inception(model, data, criterion, optimizer)
                else:
                    train_single_batch(model, data, criterion, optimizer)
                torch.cuda.synchronize()
                end = time.time()

                if gpu_memory_limit:
                    for i in range(torch.cuda.device_count()):
                        if isinstance(gpu_memory_limit, int):
                            max_memory_usage = torch.cuda.max_memory_allocated(torch.device(f'cuda:{i}'))
                            memory_exceeded = memory_exceeded or max_memory_usage > (gpu_memory_limit * 10**9)
                            memory_overflow += max(max_memory_usage / 10**9 - gpu_memory_limit, 0)
            except RuntimeError as e:
                if 'out of memory' in str(e):
                    memory_exceeded = True
                    memory_overflow = -1
                else:
                    raise e

            if not memory_exceeded:
                batch_times.append((end - start) * 1000)

            if verbose:
                if memory_exceeded:
                    log('Memory exceeded')
                else:
                    log(f' {batch_times[-1]}ms')

            if memory_exceeded:
                if return_memory_overflow:
                    return -1, memory_overflow
                return -1

            b += 1
            if b >= batches + drop_batches:
                break

    del model, criterion, optimizer, input_device, output_device

    if return_memory_overflow:
        return batch_times[drop_batches:], memory_overflow
    return batch_times[drop_batches:]