Python RayExecutor.create_settings примеры использования

Язык программирования: Python

Пространство имен/Пакет: horovod.ray

Класс/Тип: RayExecutor

Метод/Функция: create_settings

Примеров на hotexamples.com: 12

Python RayExecutor.create_settings - 12 примеров найдено. Это лучшие примеры Python кода для horovod.ray.RayExecutor.create_settings, полученные из open source проектов. Вы можете ставить оценку каждому примеру, чтобы помочь нам улучшить качество примеров.

Основные методы

Показать Скрыть

RayExecutor(15)

create_settings(12)

start(9)

shutdown(7)

execute(4)

execute_single(4)

run(2)

run_remote(1)

Пример #1

Показать файл

Файл: horovod.py Проект: zzmcdc/ray

    def setup(self, config: Dict):
        trainable = wrap_function(self.__class__._function)
        # We use a filelock here to ensure that the file-writing
        # process is safe across different trainables.
        if self._ssh_identity_file:
            with FileLock(self._ssh_identity_file + ".lock"):
                settings = RayExecutor.create_settings(self._timeout_s,
                                                       self._ssh_identity_file,
                                                       self._ssh_str)
        else:
            settings = RayExecutor.create_settings(self._timeout_s,
                                                   self._ssh_identity_file,
                                                   self._ssh_str)

        self.executor = RayExecutor(settings,
                                    cpus_per_slot=self._num_cpus_per_slot,
                                    use_gpu=self._use_gpu,
                                    num_hosts=self._num_hosts,
                                    num_slots=self._num_slots)

        # We can't put `self` in the lambda closure, so we
        # resolve the variable ahead of time.
        logdir_ = str(self.logdir)

        # Starts the workers as specified by the resources above.
        self.executor.start(executable_cls=trainable,
                            executable_kwargs={
                                "config":
                                config,
                                "logger_creator":
                                lambda cfg: logger_creator(cfg, logdir_)
                            })

Пример #2

Показать файл

Файл: ray.py Проект: cxz/ludwig

 def __init__(self, horovod_kwargs, trainer_kwargs):
     # TODO ray: make this more configurable by allowing YAML overrides of timeout_s, etc.
     setting = RayExecutor.create_settings(timeout_s=30)
     self.executor = RayExecutor(
         setting, **{**get_horovod_kwargs(), **horovod_kwargs})
     self.executor.start(executable_cls=RayRemoteTrainer,
                         executable_kwargs=trainer_kwargs)

Пример #3

Показать файл

Файл: ray_horovod.py Проект: amogkam/ray_lightning_accelerators

 def setup(self, model: LightningModule):
     """Creates the RayExecutor object."""
     self._model = model
     settings = RayExecutor.create_settings(timeout_s=30)
     self.executor = RayExecutor(settings,
                                 num_hosts=self.num_hosts,
                                 num_slots=self.num_slots,
                                 use_gpu=self.use_gpu)
     self.executor.start(executable_cls=get_executable_cls())

Пример #4

Показать файл

Файл: horovod_ray_accelerator.py Проект: tuyulers5/jav44

 def setup(self, model):
     self.trainer.use_horovod = True
     settings = RayExecutor.create_settings(timeout_s=30)
     self.executor = RayExecutor(settings,
                                 num_hosts=self.num_hosts,
                                 num_slots=self.num_slots,
                                 use_gpu=self.use_gpu)
     self.trainer.model = model
     self.executor.start(executable_cls=get_executable_cls())

Пример #5

Показать файл

Файл: ray.py Проект: sarupurisailalith/ludwig

 def __init__(self, horovod_kwargs, predictor_kwargs):
     # TODO ray: investigate using Dask for prediction instead of Horovod
     setting = RayExecutor.create_settings(timeout_s=30)
     self.executor = RayExecutor(
         setting, **{
             **get_horovod_kwargs(),
             **horovod_kwargs
         })
     self.executor.start(executable_cls=RemotePredictor,
                         executable_kwargs=predictor_kwargs)

Пример #6

Показать файл

def main(num_workers,
         use_gpu,
         timeout_s=30,
         placement_group_timeout_s=100,
         kwargs=None):
    kwargs = kwargs or {}
    if use_gpu:
        kwargs["use_cuda"] = True
    settings = RayExecutor.create_settings(
        timeout_s=timeout_s,
        placement_group_timeout_s=placement_group_timeout_s)
    executor = RayExecutor(settings, use_gpu=use_gpu, num_workers=num_workers)
    executor.start()
    executor.run(train_fn, kwargs=kwargs)

Пример #7

Показать файл

Файл: ray.py Проект: yarenty/ludwig

    def __init__(self, horovod_kwargs, executable_kwargs):
        # TODO ray: make this more configurable by allowing YAML overrides of timeout_s, etc.
        if RayExecutor is None:
            logger.error(
                "RayLegacyTrainer failed to initialize: RayExecutor is None. Make sure horovod[ray] is installed."
            )
            return
        setting = RayExecutor.create_settings(timeout_s=30)

        self.executor = RayExecutor(
            setting, **{
                **get_horovod_kwargs(),
                **horovod_kwargs
            })
        self.executor.start(executable_cls=HorovodRemoteTrainer,
                            executable_kwargs=executable_kwargs)

Пример #8

Показать файл

    def start_executor(self):
        # Ray executor settings
        setting = RayExecutor.create_settings(timeout_s=100)
        num_hosts = 1  # number of machine to use
        num_slots = self.num_slots  # number of workers to use on each machine
        cpus_per_slot = 1  # number of cores to allocate to each worker
        gpus_per_slot = 1  # number of GPUs to allocate to each worker
        use_gpu = gpus_per_slot > 0

        # Start num_hosts * num_slots actors on the cluster
        # https://horovod.readthedocs.io/en/stable/api.html#horovod-ray-api
        executor = RayExecutor(
            setting,
            num_hosts=num_hosts,
            num_slots=num_slots,
            cpus_per_slot=cpus_per_slot,
            gpus_per_slot=gpus_per_slot,
            use_gpu=use_gpu
        )

        # Launch the Ray actors on each machine
        # This will launch `num_slots` actors on each machine
        executor.start()
        return executor

Пример #9

Показать файл

Файл: horovod_nyctaxi.py Проект: wgifford/raydp

        for batch_idx, data in enumerate(train_loader):
            feature = data[:-1]
            target = data[-1]
            optimizer.zero_grad()
            output = model(*feature)
            loss = F.smooth_l1_loss(output, target)
            loss.backward()
            optimizer.step()
            if batch_idx % args.log_interval == 0:
                print('Train Epoch: {} \tLoss: {:.6f}'.format(
                    epoch, loss.item()))

    for epoch in range(1, args.epochs + 1):
        train(epoch)


if __name__ == '__main__':
    # connect to ray cluster
    import ray
    # ray.init(address='auto')
    ray.init()
    torch_ds, num_features = process_data()
    # Start horovod workers on Ray
    from horovod.ray import RayExecutor
    settings = RayExecutor.create_settings(500)
    executor = RayExecutor(settings, num_hosts=1, num_slots=1, cpus_per_slot=1)
    executor.start()
    executor.run(train_fn, args=[torch_ds, num_features])
    raydp.stop_spark()
    ray.shutdown()

Пример #10

Показать файл

        # TensorBoard or other metrics-based callbacks.
        hvd.callbacks.MetricAverageCallback(),

        # Horovod: using `lr = 1.0 * hvd.size()` from the very beginning leads to worse final
        # accuracy. Scale the learning rate `lr = 1.0` ---> `lr = 1.0 * hvd.size()` during
        # the first three epochs. See https://arxiv.org/abs/1706.02677 for details.
        hvd.callbacks.LearningRateWarmupCallback(
            warmup_epochs=3, initial_lr=scaled_lr, verbose=1),
    ]

    # Horovod: save checkpoints only on worker 0 to prevent other workers from corrupting them.
    if hvd.rank() == 0:
        callbacks.append(tf.keras.callbacks.ModelCheckpoint(
            './checkpoint-{epoch}.h5'))

    # Horovod: write logs on worker 0.
    verbose = 1 if hvd.rank() == 0 else 0

    # Train the model.
    # Horovod: adjust number of steps based on number of GPUs.
    mnist_model.fit(dataset, steps_per_epoch=500 // hvd.size(),
                    callbacks=callbacks, epochs=num_epochs, verbose=verbose)


ray.init()
settings = RayExecutor.create_settings(timeout_s=30)
executor = RayExecutor(settings, num_hosts=1, num_slots=2, use_gpu=False)
executor.start()
executor.run(train, kwargs=dict(num_epochs=1))
executor.shutdown()

Пример #11

Показать файл

import socket
import ray

import horovod.tensorflow.keras as hvd
# import horovod.tensorflow as hvd
from horovod.ray import RayExecutor

# Start the Ray cluster or attach to an existing Ray cluster
ray.init(address="auto")

# Ray executor settings
setting = RayExecutor.create_settings(timeout_s=100)
num_hosts = 1  # number of machine to use
num_slots = 6  # number of workers to use on each machine
cpus_per_slot = 1  # number of cores to allocate to each worker
gpus_per_slot = 1  # number of GPUs to allocate to each worker

# Start num_hosts * num_slots actors on the cluster
# https://horovod.readthedocs.io/en/stable/api.html#horovod-ray-api
executor = RayExecutor(setting,
                       num_hosts=num_hosts,
                       num_slots=num_slots,
                       cpus_per_slot=cpus_per_slot,
                       gpus_per_slot=gpus_per_slot,
                       use_gpu=True)

# Launch the Ray actors on each machine
# This will launch `num_slots` actors on each machine
print("Start executor...", end="", flush=True)
executor.start()
print("OK", flush=True)

Пример #12

Показать файл

Файл: horovod_example.py Проект: hngenc/ray

def main(num_workers, use_gpu, **kwargs):
    settings = RayExecutor.create_settings(timeout_s=30)
    executor = RayExecutor(settings, use_gpu=use_gpu, num_workers=num_workers)
    executor.run(train_fn, kwargs=kwargs)