Exemplo n.º 1
0
def make_device_groups(worker_count, devices, shared, cpu_mode):
    if not devices or cpu_mode:
        print('Warning no devices detected, It will run in CPU mode')
        devices = [0]

    if shared or cpu_mode:
        return [devices for _ in range(worker_count)]

    device_per_worker = len(devices) // worker_count
    remaining_devices = len(devices) % worker_count

    if device_per_worker == 0:
        raise RuntimeError(
            f'Not enough devices (devices: {len(devices)}) < (workers: {worker_count})'
            'Use --device-sharing or --cpu to bypass this error'
        )

    groups = []
    for wid in range(worker_count):
        groups.append(devices[device_per_worker * wid: device_per_worker * (wid + 1)])

    if remaining_devices > 0:
        warning('Some devices were not assigned to worker')

    return groups
Exemplo n.º 2
0
    def init(self, params=None, override=False, **kwargs):
        """instantiate the underlying optimizer

        Raises
        ------
        MissingParameters
            if an hyper parameter is missing
        """
        if params is not None:
            params = list(params)
            assert isinstance(params, (list, tuple))

        if self._optimizer and not override:
            warning(
                'Optimizer is already set, use override=True to force re initialization'
            )
            return self

        # add missing hyper parameters
        self.hyper_parameters.add_parameters(**kwargs)

        if params is None:
            params = self._model_parameters

        if params is None:
            raise MissingArgument('Missing Model parameters!')

        self._optimizer = self._wrap_optimizer(
            self.optimizer_builder(
                params, **self.hyper_parameters.parameters(strict=True)))
Exemplo n.º 3
0
    def observe(self, hpo):
        debug('observe')
        new_results = 0

        m = self.pop_result()
        while m is not None:
            actioned = True
            if m.mtype == RESULT_ITEM:
                info(f'HPO {self.experiment} observed {m.message[0]["uid"]}')
                try:
                    hpo.observe(m.message[0], m.message[1])
                    new_results += 1
                except TrialDoesNotExist as e:
                    warning(f'Could not observe trial: {e}')
                    actioned = False

            elif m.mtype == WORKER_JOIN:
                self.worker_count += 1

            elif m.mtype == WORKER_LEFT:
                self.worker_count -= 1

            else:
                debug(f'Received: {m}')

            if actioned:
                self.future_client.mark_actioned(RESULT_QUEUE, m)

            m = self.pop_result()
        return new_results
Exemplo n.º 4
0
    def save(self, task):
        if self.uid is None:
            raise BadCheckpoint('No uid was given cannot save state')

        was_saved = False
        state = state_dict(task)
        state['rng'] = get_rng_states()

        # Was enough time passed since last save
        now = datetime.utcnow()
        elapsed = now - self.last_save
        should_save = elapsed.total_seconds() > self.time_buffer

        # Is it the best model we have seen so far
        is_best = True
        if self.keep_best is not None:
            is_best = self.keep_best(task.metrics.value())

        if state:
            # Current model is not the best and we did not save the last model in a different path
            # (which is the best right now)
            # So we need to move the last state so it does not get overridden by current state
            if not is_best and self.best_name is None:
                info(f'Saving best ({self.keep_best.metric}: {self.keep_best.best})')
                self.best_name = self.new_best_name()

                was_pending = self.save_pending()
                if not was_pending:
                    self.storage.rename(self.uid, self.best_name)

            if should_save:
                was_saved = self.storage.save(self.uid, state)
                self.save_pending()
                self.pending = None
                self.last_save = datetime.utcnow()
            else:
                self.save_pending()
                self.pending = (is_best, state)

            # we have a new best and the best was saved as with a different filename
            # So we need to change both the best state and the latest state
            if is_best and self.best_name is not None:
                info(f'New best ({self.keep_best.metric}: {self.keep_best.best})')

                self.storage.remove(self.best_name)
                self.best_name = self.new_best_name()

                was_pending = self.save_pending()
                if not was_pending:
                    self.storage.copyfile(self.uid, self.best_name)

        else:
            warning('The state dictionary was empty!')

        if was_saved:
            info('Checkpoint saved')
            return

        info('Skipped Checkpoint')
Exemplo n.º 5
0
    def __init__(self,
                 name=None,
                 *,
                 params=None,
                 optimizer=None,
                 half=False,
                 loss_scale=1,
                 dynamic_loss_scale=False,
                 scale_window=1000,
                 scale_factor=2,
                 min_loss_scale=None,
                 max_loss_scale=2.**24,
                 **kwargs):
        self._optimizer = None

        if params is not None:
            params = list(params)
            assert isinstance(params, (list, tuple))

        self._model_parameters = params
        self._half_parameters(half, loss_scale, dynamic_loss_scale,
                              scale_window, scale_factor, min_loss_scale,
                              max_loss_scale)

        # Track defined hyper parameters
        self.hyper_parameters = HyperParameters(space={})

        if optimizer:
            warning('Using custom optimizer')
            if isinstance(optimizer, type):
                self.optimizer_builder = optimizer

                if hasattr(optimizer, 'get_space'):
                    self.hyper_parameters.space = optimizer.get_space()
            else:
                self._optimizer = self._wrap_optimizer(optimizer)

                if hasattr(self._optimizer, 'get_space'):
                    self.hyper_parameters.space = self._optimizer.get_space()

        elif name:
            # load an olympus model
            self.optimizer_builder = registered_optimizers.get(name.lower())

            if not self.optimizer_builder:
                raise RegisteredOptimizerNotFound(name)

            if hasattr(self.optimizer_builder, 'get_space'):
                self.hyper_parameters.space = self.optimizer_builder.get_space(
                )

        else:
            raise MissingArgument('optimizer or name needs to be set')

        # All additional args are hyper parameters
        self.hyper_parameters.add_parameters(**kwargs)
Exemplo n.º 6
0
def register_hpo(name, factory, override=False):
    global registered_optimizer

    if name in registered_optimizer:
        warning(f'{name} was already registered, use override=True to ignore')

        if not override:
            return

    registered_optimizer[name] = factory
Exemplo n.º 7
0
def register_initialization(name, factory, override=False):
    global registered_initialization

    if name in registered_initialization:
        warning(f'{name} was already registered, use override=True to ignore')

        if not override:
            return

    registered_initialization[name] = factory
Exemplo n.º 8
0
def register_dataset(name, factory, override=False):
    global registered_datasets

    if name in registered_datasets:
        warning(f'{name} was already registered, use override=True to ignore')

        if not override:
            return

    registered_datasets[name] = factory
Exemplo n.º 9
0
def register_adversary(name, factory, override=False):
    global registered_adversary

    if name in registered_adversary:
        warning(f'{name} was already registered, use override=True to ignore')

        if not override:
            return

    registered_adversary[name] = factory
Exemplo n.º 10
0
def register_environment(name, factory, override=False):
    """Register a new environment backend"""
    global registered_environment

    if name in registered_environment:
        warning(f'{name} was already registered, use override=True to ignore')

        if not override:
            return

    registered_environment[name] = factory
Exemplo n.º 11
0
    def init(self, override=False, **kwargs):
        if self._initializer and not override:
            warning(
                'Initializer is already set, use override=True to force re initialization'
            )
            return self

        self.hyper_parameters.add_parameters(**kwargs)
        self._initializer = self.initializer_ctor(
            **self.hyper_parameters.parameters(strict=True))

        return self
Exemplo n.º 12
0
    def __init__(self, dataset, device):
        """
        Args:
            dataset (torch.utils.data.Dataset): dataset to cache
            device  (torch.device): device where cached samples will be stored
        """
        warning('DatasetCache must only be used with small datasets')

        if device.type == 'cuda':
            warning(
                'Warning: pin_memory must be set to \'False\' when caching to a cuda device'
            )

        self.dataset = dataset
        self.device = device
        self.cache = {}
Exemplo n.º 13
0
    def init(self, optimizer=None, override=False, **kwargs):
        """Initialize the LR schedule with the given hyper parameters"""
        if self._schedule:
            warning('LRSchedule is already set, use override=True to force re initialization')

            if not override:
                return self._schedule

        if optimizer is None:
            optimizer = self._optimizer

        if optimizer is None:
            raise MissingArgument('Missing optimizer argument!')

        self.hyper_parameters.add_parameters(**kwargs)
        self._schedule = self._schedule_builder(
            optimizer,
            **self.hyper_parameters.parameters(strict=True))

        return self
Exemplo n.º 14
0
    def add_apsect(obj_type, aspect):
        if obj_type in aspect:
            warning(f'Overriding the aspect of {obj_type}')

        Resumable._aspects[obj_type] = aspect
Exemplo n.º 15
0
 def load_state_dict(self, state_dict):
     """Load a state dictionary to resume a previous training"""
     warning(f'This metric {type(self)} does not support resuming')
Exemplo n.º 16
0
 def state_dict(self):
     """Return a state dictionary used to checkpointing and resuming"""
     warning(f'This metric {type(self)} does not support resuming')
     return {}
Exemplo n.º 17
0
    def get_space(self) -> Dict[str, str]:
        """Return the dimension space of each parameters"""
        if self._optimizer:
            warning('Optimizer is already set')

        return self.hyper_parameters.missing_parameters()
Exemplo n.º 18
0
    def get_space(self):
        """Return the missing hyper parameters required to initialize the LR schedule"""
        if self._schedule:
            warning('LRSchedule is already set')

        return self.hyper_parameters.missing_parameters()