def __init__( self, backend_executor: Union[BackendExecutor, ActorWrapper], backend_config: BackendConfig, train_func: Union[Callable[[], T], Callable[[Dict[str, Any]], T]], dataset_spec: RayDatasetSpec, checkpoint_manager: CheckpointManager, checkpoint: Optional[Union[Dict, str, Path, Checkpoint]], checkpoint_strategy: Optional[CheckpointConfig], run_dir: Optional[Path] = None, ): self._backend_executor = backend_executor self._backend = backend_config.backend_cls() self._train_func = train_func self._dataset_spec = dataset_spec self._run_dir = run_dir self._checkpoint_manager = checkpoint_manager self._checkpoint_strategy = checkpoint_strategy self._start_training( train_func=train_func, run_dir=run_dir, dataset_spec=self._dataset_spec, checkpoint=checkpoint, checkpoint_strategy=checkpoint_strategy, ) self._final_results = None self._finished_training = False
def __init__( self, backend_config: BackendConfig, # TODO(xwjiang): Legacy Ray Train trainer clean up! trial_info: Optional[TrialInfo] = None, num_workers: int = 1, num_cpus_per_worker: float = 1, num_gpus_per_worker: float = 0, additional_resources_per_worker: Optional[Dict[str, float]] = None, max_retries: int = 3, ): self._backend_config = backend_config self._backend = backend_config.backend_cls() self._num_workers = num_workers self._num_cpus_per_worker = num_cpus_per_worker self._num_gpus_per_worker = num_gpus_per_worker self._additional_resources_per_worker = additional_resources_per_worker self._max_failures = max_retries if self._max_failures < 0: self._max_failures = float("inf") self._num_failures = 0 self._initialization_hook = None self._placement_group = None self._trial_info = trial_info self.worker_group = InactiveWorkerGroup() self.dataset_shards = None
def __init__( self, backend_executor_actor: ActorHandle, backend_config: BackendConfig, train_func: Union[Callable[[], T], Callable[[Dict[str, Any]], T]], run_dir: Path, dataset: Optional[Union[RayDataset, Dict[str, RayDataset]]], checkpoint_manager: CheckpointManager, checkpoint: Optional[Union[Dict, str, Path]], checkpoint_strategy: Optional[CheckpointStrategy], ): self._backend_executor_actor = backend_executor_actor self._backend = backend_config.backend_cls() self._train_func = train_func self._dataset = dataset self._run_dir = run_dir self._checkpoint_manager = checkpoint_manager self._checkpoint_strategy = checkpoint_strategy self._start_training( train_func=train_func, run_dir=run_dir, dataset=dataset, checkpoint=checkpoint, checkpoint_strategy=checkpoint_strategy, ) self._final_results = None self._finished_training = False