def configure_ddp(self) -> None: trainer = self.lightning_module.trainer self._model, optimizers = self._setup_model_and_optimizers( model=LightningShardedDataParallel(self.model), optimizers=trainer.optimizers, ) trainer.optimizers = optimizers
def configure_ddp(self) -> None: # set up optimizers after the wrapped module has been moved to the device self.setup_optimizers(self.lightning_module.trainer) self.model, self.optimizers = self._setup_model_and_optimizers( model=LightningShardedDataParallel(self.model), optimizers=self.optimizers) optimizers_to_device(self.optimizers, self.root_device)
def configure_ddp(self) -> None: self._set_ddp_kwargs() self.setup_optimizers(self.model.trainer) self.model, self.optimizers = self._setup_model_and_optimizers( model=LightningShardedDataParallel(self.model), optimizers=self.optimizers, ) optimizers_to_device(self.optimizers, self.root_device)
def configure_ddp(self) -> None: self._wrap_optimizers() self._model = ShardedDataParallel( LightningShardedDataParallel(self.model), sharded_optimizer=self.lightning_module.trainer.optimizers, **self._ddp_kwargs ) setattr(self._model, "require_backward_grad_sync", False)
def configure_ddp(self): self._wrap_optimizers() self._model = ShardedDataParallel( LightningShardedDataParallel(self.model), sharded_optimizer=self.lightning_module.trainer.optimizers, # For multi-node training, enabling bucketing will improve performance. reduce_buffer_size=self._REDUCE_BUFFER_SIZE_DEFAULT if self.num_nodes > 1 else 0, ) setattr(self._model, "require_backward_grad_sync", False)
def configure_ddp(self) -> None: trainer = self.lightning_module.trainer if "reduce_buffer_size" not in self._ddp_kwargs: # For multi-node training, enabling bucketing will improve performance. self._ddp_kwargs[ "reduce_buffer_size"] = self._REDUCE_BUFFER_SIZE_DEFAULT if self.num_nodes > 1 else 0 self.model, self.optimizers = self._setup_model_and_optimizers( model=LightningShardedDataParallel(self.model), optimizers=trainer.optimizers, )
def configure_ddp(self): self._wrap_optimizers() self._model = ShardedDataParallel( LightningShardedDataParallel(self.model), sharded_optimizer=self.lightning_module.trainer.optimizers)
def configure_ddp( self, model: LightningModule, device_ids: List[int] ): self._wrap_optimizers(model) return LightningShardedDataParallel(model, sharded_optimizer=model.trainer.optimizers)
def configure_ddp(self) -> None: self.model, self.optimizers = self._setup_model_and_optimizers( model=LightningShardedDataParallel(self.model), optimizers=self.optimizers )