Exemplo n.º 1
0
    def wrap_optimizer(self, optimizer: Any) -> Any:
        """
        This should be used to wrap optimizer objects immediately after they have
        been created. Users should use the output of this wrapper as the new instance
        of their optimizer. For example, if users create their optimizer within
        ``build_estimator()``, they should call ``optimizer = wrap_optimizer(optimzer)``
        prior to passing the optimizer into their Estimator.
        """
        self.optimizer_initialized = True
        if not self.hvd_config.use:
            return optimizer

        check.check_false(
            isinstance(optimizer, str),
            "Please specify an optimizer object instead of using a string name.",
        )

        hvd.require_horovod_type(
            "tensorflow", "EstimatorContext.wrap_optimizer was called.")
        use_compression = self.hvd_config.fp16_compression
        optimizer = hvd.DistributedOptimizer(
            optimizer,
            compression=hvd.compression.Compression.fp16
            if use_compression else hvd.compression.Compression.none,
            aggregation_frequency=self.hvd_config.aggregation_frequency,
            average_aggregated_gradients=self.hvd_config.
            average_aggregated_gradients,
        )
        logging.debug(
            "Initialized optimizer for distributed and optimized parallel training."
        )
        return optimizer
Exemplo n.º 2
0
    def cache_dataset(
        self, dataset_id: str, dataset_version: str, shuffle: bool, skip_shuffle_at_epoch_end: bool,
    ) -> Callable:

        # Perform lazy initialization of storage so that if users are not
        # using data layer, we are not creating unused directories.
        self._configure_storage()

        if self._training:
            # We only check the training cacheable for re-use, because for EstimatorTrial
            # it's possible that the validation cacheable is called every time validation
            # is performed.
            check.check_false(
                self._decorator_used,
                "Pleas use both `@context.experimental.cache_train_dataset(dataset_name, "
                "dataset_version)` and `@context.experimental.cache_validation_dataset("
                "dataset_name, dataset_version)` exactly once.",
            )
        self._decorator_used = True
        dataset_version += "_train" if self._training else "_val"

        def _wrap(make_dataset_fn: Callable) -> Callable:
            @functools.wraps(make_dataset_fn)
            def _decorated_fn(*args: Any, **kwargs: Any) -> Any:
                @self._storage.cacheable(  # type: ignore
                    dataset_id=dataset_id, dataset_version=dataset_version,
                )
                def make_dataset() -> yogadl.DataRef:
                    return make_dataset_fn(*args, **kwargs)

                logging.info(f"Preparing dataset: {dataset_id}:{dataset_version}.")
                logging.debug(
                    f"Calling make dataset for: {dataset_id}:{dataset_version} "
                    f"with following start_offset: {self._offset}, "
                    f"shuffle: {shuffle} shuffle_seed: {self._shuffle_seed} "
                    f"shard_rank: {self._shard_rank}, world size: {self._num_shards} "
                    f"training: {self._training}."
                )

                stream_from_cache = make_dataset().stream(
                    start_offset=self._offset,
                    shuffle=shuffle,
                    skip_shuffle_at_epoch_end=skip_shuffle_at_epoch_end,
                    shuffle_seed=self._shuffle_seed,
                    shard_rank=self._shard_rank,
                    num_shards=self._num_shards,
                    drop_shard_remainder=True if self._training else False,
                )
                self._dataset_length = len(stream_from_cache)
                logging.info(f"Dataset {dataset_id}:{dataset_version} preparation finished.")

                return tensorflow.make_tf_dataset(stream_from_cache)

            return _decorated_fn

        return _wrap
Exemplo n.º 3
0
    def patch(args: argparse.Namespace) -> None:
        check_false(args.all and args.agent_id)

        if not (args.all or args.agent_id):
            print("Error: must specify exactly one of `--all` or agent_id")
            sys.exit(1)

        if args.agent_id:
            agent_ids = [args.agent_id]
        else:
            r = api.get(args.master, "agents")
            agent_ids = sorted(local_id(a) for a in r.json().keys())

        for agent_id in agent_ids:
            path = "agents/{}/slots".format(agent_id)
            headers = {"Content-Type": "application/merge-patch+json"}
            payload = {"enabled": enabled}

            api.patch(args.master, path, body=payload, headers=headers)
            status = "Disabled" if not enabled else "Enabled"
            print("{} agent {}".format(status, agent_id))
Exemplo n.º 4
0
    def wrap_optimizer(self, optimizer: Any) -> Any:
        """
        This should be used to wrap optimizer objects immediately after they have
        been created. Users should use the output of this wrapper as the new instance
        of their optimizer. For example, if users create their optimizer within
        ``build_estimator()``, they should call ``optimizer = wrap_optimizer(optimzer)``
        prior to passing the optimizer into their Estimator.
        """
        if not self.env.managed_training:
            return optimizer

        self.optimizer_initialized = True
        if not self.hvd_config.use:
            return optimizer

        check.check_false(
            isinstance(optimizer, str),
            "Please specify an optimizer object instead of using a string name.",
        )

        hvd.require_horovod_type("tensorflow", "EstimatorContext.wrap_optimizer was called.")
        use_compression = self.hvd_config.fp16_compression

        # The signature of our horovod optimizer changed after we rebased onto 0.21.
        hvd_sig = inspect.signature(hvd.DistributedOptimizer)
        horovod_kwargs = {
            "compression": hvd.compression.Compression.fp16
            if use_compression
            else hvd.compression.Compression.none,
            "average_aggregated_gradients": self.hvd_config.average_aggregated_gradients,
        }
        if "aggregation_frequency" in hvd_sig.parameters:
            horovod_kwargs["aggregation_frequency"] = self.hvd_config.aggregation_frequency
        else:
            horovod_kwargs["backward_passes_per_step"] = self.hvd_config.aggregation_frequency

        optimizer = hvd.DistributedOptimizer(optimizer, **horovod_kwargs)
        logging.debug("Initialized optimizer for distributed and optimized parallel training.")
        return optimizer