Пример #1
0
 def configure_ddp(self) -> None:
     trainer = self.lightning_module.trainer
     self._model, optimizers = self._setup_model_and_optimizers(
         model=LightningShardedDataParallel(self.model),
         optimizers=trainer.optimizers,
     )
     trainer.optimizers = optimizers
 def configure_ddp(self) -> None:
     # set up optimizers after the wrapped module has been moved to the device
     self.setup_optimizers(self.lightning_module.trainer)
     self.model, self.optimizers = self._setup_model_and_optimizers(
         model=LightningShardedDataParallel(self.model),
         optimizers=self.optimizers)
     optimizers_to_device(self.optimizers, self.root_device)
Пример #3
0
 def configure_ddp(self) -> None:
     self._set_ddp_kwargs()
     self.setup_optimizers(self.model.trainer)
     self.model, self.optimizers = self._setup_model_and_optimizers(
         model=LightningShardedDataParallel(self.model),
         optimizers=self.optimizers,
     )
     optimizers_to_device(self.optimizers, self.root_device)
Пример #4
0
 def configure_ddp(self) -> None:
     self._wrap_optimizers()
     self._model = ShardedDataParallel(
         LightningShardedDataParallel(self.model),
         sharded_optimizer=self.lightning_module.trainer.optimizers,
         **self._ddp_kwargs
     )
     setattr(self._model, "require_backward_grad_sync", False)
Пример #5
0
 def configure_ddp(self):
     self._wrap_optimizers()
     self._model = ShardedDataParallel(
         LightningShardedDataParallel(self.model),
         sharded_optimizer=self.lightning_module.trainer.optimizers,
         # For multi-node training, enabling bucketing will improve performance.
         reduce_buffer_size=self._REDUCE_BUFFER_SIZE_DEFAULT if self.num_nodes > 1 else 0,
     )
     setattr(self._model, "require_backward_grad_sync", False)
Пример #6
0
    def configure_ddp(self) -> None:
        trainer = self.lightning_module.trainer
        if "reduce_buffer_size" not in self._ddp_kwargs:
            # For multi-node training, enabling bucketing will improve performance.
            self._ddp_kwargs[
                "reduce_buffer_size"] = self._REDUCE_BUFFER_SIZE_DEFAULT if self.num_nodes > 1 else 0

        self.model, self.optimizers = self._setup_model_and_optimizers(
            model=LightningShardedDataParallel(self.model),
            optimizers=trainer.optimizers,
        )
Пример #7
0
 def configure_ddp(self):
     self._wrap_optimizers()
     self._model = ShardedDataParallel(
         LightningShardedDataParallel(self.model),
         sharded_optimizer=self.lightning_module.trainer.optimizers)
Пример #8
0
 def configure_ddp(
         self, model: LightningModule, device_ids: List[int]
 ):
     self._wrap_optimizers(model)
     return LightningShardedDataParallel(model, sharded_optimizer=model.trainer.optimizers)
Пример #9
0
 def configure_ddp(self) -> None:
     self.model, self.optimizers = self._setup_model_and_optimizers(
         model=LightningShardedDataParallel(self.model), optimizers=self.optimizers
     )