def __init__( self, *, train_loop_per_worker: Union[Callable[[], None], Callable[[Dict], None]], train_loop_config: Optional[Dict] = None, backend_config: Optional[BackendConfig] = None, scaling_config: Optional[ScalingConfig] = None, run_config: Optional[RunConfig] = None, datasets: Optional[Dict[str, GenDataset]] = None, preprocessor: Optional[Preprocessor] = None, resume_from_checkpoint: Optional[Checkpoint] = None, ): if not ray.is_initialized(): ray.init() self.train_loop_per_worker = train_loop_per_worker self.train_loop_config = train_loop_config backend_config = (backend_config if backend_config is not None else BackendConfig()) self.backend_config = backend_config super(DataParallelTrainer, self).__init__( scaling_config=scaling_config, run_config=run_config, datasets=datasets, preprocessor=preprocessor, resume_from_checkpoint=resume_from_checkpoint, )
def __init__( self, train_loop_per_worker: Union[Callable[[], None], Callable[[Dict], None]], train_loop_config: Optional[Dict] = None, backend_config: Optional[BackendConfig] = None, scaling_config: Optional[ScalingConfig] = None, run_config: Optional[RunConfig] = None, datasets: Optional[Dict[str, GenDataset]] = None, preprocessor: Optional[Preprocessor] = None, resume_from_checkpoint: Optional[Checkpoint] = None, ): if not ray.is_initialized(): ray.init() self.train_loop_per_worker = train_loop_per_worker self.train_loop_config = train_loop_config super(DataParallelTrainer, self).__init__( scaling_config=scaling_config, run_config=run_config, datasets=datasets, preprocessor=preprocessor, resume_from_checkpoint=resume_from_checkpoint, ) if ( not self.scaling_config.get("use_gpu", False) and "GPU" in ray.available_resources() ): logger.info( "GPUs are detected in your Ray cluster, but GPU " "training is not enabled for this trainer. To enable " "GPU training, make sure to set `use_gpu` to True " "in your scaling config." ) if "num_workers" not in self.scaling_config: raise ValueError("You must specify the 'num_workers' in scaling_config.") if self.scaling_config["num_workers"] <= 0: raise ValueError( "'num_workers' in `scaling_config` must be a positive " f"integer. Received {self.scaling_config['num_workers']}" ) num_params = len(inspect.signature(self.train_loop_per_worker).parameters) if num_params > 1: raise ValueError( f"train_loop_per_worker should take in 0 or 1 arguments, " f"but it accepts {num_params} arguments instead." ) backend_config = backend_config if backend_config else BackendConfig() self.backend_config = backend_config
def __init__( self, train_loop_per_worker: Union[Callable[[], None], Callable[[Dict], None]], *, train_loop_config: Optional[Dict] = None, backend_config: Optional[BackendConfig] = None, scaling_config: Optional[ScalingConfig] = None, dataset_config: Optional[Dict[str, DatasetConfig]] = None, run_config: Optional[RunConfig] = None, datasets: Optional[Dict[str, GenDataset]] = None, preprocessor: Optional["Preprocessor"] = None, resume_from_checkpoint: Optional[Checkpoint] = None, ): if not ray.is_initialized(): ray.init() self._train_loop_per_worker = train_loop_per_worker self._train_loop_config = train_loop_config backend_config = ( backend_config if backend_config is not None else BackendConfig() ) self._backend_config = backend_config self._dataset_config = DatasetConfig.validated( DatasetConfig.merge(self._dataset_config, dataset_config), datasets ) self._ingest_spec = DataParallelIngestSpec( dataset_config=self._dataset_config, ) super(DataParallelTrainer, self).__init__( scaling_config=scaling_config, run_config=run_config, datasets=datasets, preprocessor=preprocessor, resume_from_checkpoint=resume_from_checkpoint, )