def train_rl_ppo_online(num_workers: int, use_gpu: bool = False) -> Result: print("Starting online training") trainer = RLTrainer( run_config=RunConfig(stop={"training_iteration": 5}), scaling_config={ "num_workers": num_workers, "use_gpu": use_gpu, }, algorithm="PPO", config={ "env": "CartPole-v0", "framework": "tf", "evaluation_num_workers": 1, "evaluation_interval": 1, "evaluation_config": { "input": "sampler" }, }, ) # Todo (krfricke/xwjiang): Enable checkpoint config in RunConfig # result = trainer.fit() tuner = Tuner( trainer, _tuner_kwargs={"checkpoint_at_end": True}, ) result = tuner.fit()[0] return result
def test_tuner_with_torch_trainer(self): """Test a successful run using torch trainer.""" shutil.rmtree( os.path.join(DEFAULT_RESULTS_DIR, "test_tuner_torch"), ignore_errors=True ) # The following two should be tunable. config = {"lr": 1e-2, "hidden_size": 1, "batch_size": 4, "epochs": 10} scaling_config = {"num_workers": 1, "use_gpu": False} trainer = TorchTrainer( train_loop_per_worker=linear_train_func, train_loop_config=config, scaling_config=scaling_config, ) param_space = { "scaling_config": { "num_workers": tune.grid_search([1, 2]), }, "train_loop_config": { "batch_size": tune.grid_search([4, 8]), "epochs": tune.grid_search([5, 10]), }, } tuner = Tuner( trainable=trainer, run_config=RunConfig(name="test_tuner"), param_space=param_space, tune_config=TuneConfig(mode="min", metric="loss"), ) results = tuner.fit() assert len(results) == 8
def train_rl_bc_offline(path: str, num_workers: int, use_gpu: bool = False) -> Result: print("Starting offline training") dataset = ray.data.read_json(path, parallelism=num_workers, ray_remote_args={"num_cpus": 1}) trainer = RLTrainer( run_config=RunConfig(stop={"training_iteration": 5}), scaling_config={ "num_workers": num_workers, "use_gpu": use_gpu, }, datasets={"train": dataset}, algorithm=BCTrainer, config={ "env": "CartPole-v0", "framework": "tf", "evaluation_num_workers": 1, "evaluation_interval": 1, "evaluation_config": { "input": "sampler" }, }, ) # Todo (krfricke/xwjiang): Enable checkpoint config in RunConfig # result = trainer.fit() tuner = Tuner( trainer, _tuner_kwargs={"checkpoint_at_end": True}, ) result = tuner.fit()[0] return result
def __init__( self, restore_path: str = None, trainable: Optional[Union[str, Callable, Type[Trainable], Trainer, ]] = None, param_space: Optional[Dict[str, Any]] = None, tune_config: Optional[TuneConfig] = None, run_config: Optional[RunConfig] = None, ): # Restored from Tuner checkpoint. if restore_path: trainable_ckpt = os.path.join(restore_path, _TRAINABLE_PKL) with open(trainable_ckpt, "rb") as fp: trainable = pickle.load(fp) tuner_ckpt = os.path.join(restore_path, _TUNER_PKL) with open(tuner_ckpt, "rb") as fp: tuner = pickle.load(fp) self.__dict__.update(tuner.__dict__) self._is_restored = True self._trainable = trainable self._experiment_checkpoint_dir = restore_path return # Start from fresh if not trainable: raise TuneError("You need to provide a trainable to tune.") self._is_restored = False self._trainable = trainable self._tune_config = tune_config or TuneConfig() self._run_config = run_config or RunConfig() self._experiment_checkpoint_dir = self._setup_create_experiment_checkpoint_dir( self._run_config) # Not used for restored Tuner. self._param_space = param_space or {} self._process_dataset_param() # This needs to happen before `tune.run()` is kicked in. # This is because currently tune does not exit gracefully if # run in ray client mode - if crash happens, it just exits immediately # without allowing for checkpointing tuner and trainable. # Thus this has to happen before tune.run() so that we can have something # to restore from. tuner_ckpt = os.path.join(self._experiment_checkpoint_dir, _TUNER_PKL) with open(tuner_ckpt, "wb") as fp: pickle.dump(self, fp) trainable_ckpt = os.path.join(self._experiment_checkpoint_dir, _TRAINABLE_PKL) with open(trainable_ckpt, "wb") as fp: pickle.dump(self._trainable, fp)
def __init__( self, scaling_config: Optional[ScalingConfig] = None, run_config: Optional[RunConfig] = None, datasets: Optional[Dict[str, GenDataset]] = None, preprocessor: Optional[Preprocessor] = None, resume_from_checkpoint: Optional[Checkpoint] = None, ): self.scaling_config = scaling_config if scaling_config else {} self.run_config = run_config if run_config else RunConfig() self.datasets = datasets if datasets else {} self.preprocessor = preprocessor self.resume_from_checkpoint = resume_from_checkpoint
def generate_offline_data(path: str): print(f"Generating offline data for training at {path}") trainer = RLTrainer( algorithm="PPO", run_config=RunConfig(stop={"timesteps_total": 5000}), config={ "env": "CartPole-v0", "output": "dataset", "output_config": { "format": "json", "path": path, "max_num_samples_per_file": 1, }, "batch_mode": "complete_episodes", }, ) trainer.fit()
def test_tuner_trainer_fail(self): trainer = DummyTrainer() param_space = { "scaling_config": { "num_workers": tune.grid_search([1, 2]), } } tuner = Tuner( trainable=trainer, run_config=RunConfig(name="test_tuner_trainer_fail"), param_space=param_space, tune_config=TuneConfig(mode="max", metric="iteration"), ) results = tuner.fit() assert len(results) == 2 for i in range(2): assert results[i].error
def test_tuner_with_xgboost_trainer(self): """Test a successful run.""" shutil.rmtree(os.path.join(DEFAULT_RESULTS_DIR, "test_tuner"), ignore_errors=True) trainer = XGBoostTrainer( label_column="target", params={}, # TODO(xwjiang): change when dataset out-of-band ser/des is landed. datasets={"train": gen_dataset_func_eager()}, ) # prep_v1 = StandardScaler(["worst radius", "worst area"]) # prep_v2 = StandardScaler(["worst concavity", "worst smoothness"]) param_space = { "scaling_config": { "num_workers": tune.grid_search([1, 2]), }, # TODO(xwjiang): Add when https://github.com/ray-project/ray/issues/23363 # is resolved. # "preprocessor": tune.grid_search([prep_v1, prep_v2]), # "datasets": { # "train": tune.choice( # [gen_dataset_func(), gen_dataset_func(do_shuffle=True)] # ), # }, "params": { "objective": "binary:logistic", "tree_method": "approx", "eval_metric": ["logloss", "error"], "eta": tune.loguniform(1e-4, 1e-1), "subsample": tune.uniform(0.5, 1.0), "max_depth": tune.randint(1, 9), }, } tuner = Tuner( trainable=trainer, run_config=RunConfig(name="test_tuner"), param_space=param_space, tune_config=TuneConfig(mode="min", metric="train-error"), ) results = tuner.fit() assert not isinstance(results.get_best_result().checkpoint, TrialCheckpoint) assert len(results) == 2
def test_tuner_with_xgboost_trainer(self): """Test a successful run.""" shutil.rmtree( os.path.join(DEFAULT_RESULTS_DIR, "test_tuner"), ignore_errors=True ) trainer = XGBoostTrainer( label_column="target", params={}, datasets={"train": gen_dataset_func_eager()}, ) # prep_v1 = StandardScaler(["worst radius", "worst area"]) # prep_v2 = StandardScaler(["worst concavity", "worst smoothness"]) param_space = { "scaling_config": { "num_workers": tune.grid_search([1, 2]), }, # "preprocessor": tune.grid_search([prep_v1, prep_v2]), "datasets": { "train": tune.grid_search( [gen_dataset_func(), gen_dataset_func(do_shuffle=True)] ), }, "params": { "objective": "binary:logistic", "tree_method": "approx", "eval_metric": ["logloss", "error"], "eta": tune.loguniform(1e-4, 1e-1), "subsample": tune.uniform(0.5, 1.0), "max_depth": tune.randint(1, 9), }, } tuner = Tuner( trainable=trainer, run_config=RunConfig(name="test_tuner"), param_space=param_space, tune_config=TuneConfig(mode="min", metric="train-error"), # limiting the number of trials running at one time. # As the unit test only has access to 4 CPUs on Buildkite. _tuner_kwargs={"max_concurrent_trials": 1}, ) results = tuner.fit() assert not isinstance(results.get_best_result().checkpoint, TrialCheckpoint) assert len(results) == 4
def train_rl_ppo_online(num_workers: int, use_gpu: bool = False) -> Result: print("Starting online training") trainer = RLTrainer( run_config=RunConfig(stop={"training_iteration": 5}), scaling_config={ "num_workers": num_workers, "use_gpu": use_gpu, }, algorithm="PPO", config={ "env": "CartPole-v0", "framework": "tf", "evaluation_num_workers": 1, "evaluation_interval": 1, "evaluation_config": {"input": "sampler"}, }, ) result = trainer.fit() return result
def test_tuner_trainer_fail(self): class DummyTrainer(Trainer): def training_loop(self) -> None: raise RuntimeError("There is an error in trainer!") trainer = DummyTrainer() param_space = { "scaling_config": { "num_workers": tune.grid_search([1, 2]), } } tuner = Tuner( trainable=trainer, run_config=RunConfig(name="test_tuner_trainer_fail"), param_space=param_space, tune_config=TuneConfig(mode="max", metric="iteration"), ) results = tuner.fit() assert len(results) == 2 for i in range(2): assert results[i].error
def test_tuner_with_torch_trainer(self): """Test a successful run using torch trainer.""" shutil.rmtree( os.path.join(DEFAULT_RESULTS_DIR, "test_tuner_torch"), ignore_errors=True ) # The following two should be tunable. config = {"lr": 1e-2, "hidden_size": 1, "batch_size": 4, "epochs": 10} scaling_config = {"num_workers": 1, "use_gpu": False} trainer = TorchTrainer( train_loop_per_worker=linear_train_func, train_loop_config=config, scaling_config=scaling_config, ) # prep_v1 = StandardScaler(["worst radius", "worst area"]) # prep_v2 = StandardScaler(["worst concavity", "worst smoothness"]) param_space = { "scaling_config": { "num_workers": tune.grid_search([1, 2]), }, # TODO(xwjiang): Add when https://github.com/ray-project/ray/issues/23363 # is resolved. # "preprocessor": tune.grid_search([prep_v1, prep_v2]), # "datasets": { # "train": tune.choice( # [gen_dataset_func(), gen_dataset_func(do_shuffle=True)] # ), # }, "train_loop_config": { "batch_size": tune.grid_search([4, 8]), "epochs": tune.grid_search([5, 10]), }, } tuner = Tuner( trainable=trainer, run_config=RunConfig(name="test_tuner"), param_space=param_space, tune_config=TuneConfig(mode="min", metric="loss"), ) results = tuner.fit() assert len(results) == 8
def test_tuner_run_config_override(self): trainer = DummyTrainer(run_config=RunConfig(stop={"metric": 4})) tuner = Tuner(trainer) assert tuner._local_tuner._run_config.stop == {"metric": 4}
def test_tuner_with_xgboost_trainer_driver_fail_and_resume(self): # So that we have some global checkpointing happening. os.environ["TUNE_GLOBAL_CHECKPOINT_S"] = "1" shutil.rmtree( os.path.join(DEFAULT_RESULTS_DIR, "test_tuner_driver_fail"), ignore_errors=True, ) trainer = XGBoostTrainer( label_column="target", params={}, # TODO(xwjiang): change when dataset out-of-band ser/des is landed. datasets={"train": gen_dataset_func_eager()}, ) # prep_v1 = StandardScaler(["worst radius", "worst area"]) # prep_v2 = StandardScaler(["worst concavity", "worst smoothness"]) param_space = { "scaling_config": { "num_workers": tune.grid_search([1, 2]), }, # TODO(xwjiang): Add when https://github.com/ray-project/ray/issues/23363 # is resolved. # "preprocessor": tune.grid_search([prep_v1, prep_v2]), # "datasets": { # "train": tune.choice( # [gen_dataset_func(), gen_dataset_func(do_shuffle=True)] # ), # }, "params": { "objective": "binary:logistic", "tree_method": "approx", "eval_metric": ["logloss", "error"], "eta": tune.loguniform(1e-4, 1e-1), "subsample": tune.uniform(0.5, 1.0), "max_depth": tune.randint(1, 9), }, } class FailureInjectionCallback(Callback): """Inject failure at the configured iteration number.""" def __init__(self, num_iters=10): self.num_iters = num_iters def on_step_end(self, iteration, trials, **kwargs): if iteration == self.num_iters: print(f"Failing after {self.num_iters} iters.") raise RuntimeError tuner = Tuner( trainable=trainer, run_config=RunConfig(name="test_tuner_driver_fail", callbacks=[FailureInjectionCallback()]), param_space=param_space, tune_config=TuneConfig(mode="min", metric="train-error"), ) with self.assertRaises(TuneError): tuner.fit() # Test resume restore_path = os.path.join(DEFAULT_RESULTS_DIR, "test_tuner_driver_fail") tuner = Tuner.restore(restore_path) # A hack before we figure out RunConfig semantics across resumes. tuner._local_tuner._run_config.callbacks = None results = tuner.fit() assert len(results) == 2