def _check_environment_trains(env, config, meta_curriculum=None, success_threshold=0.99): # Create controller and begin training. with tempfile.TemporaryDirectory() as dir: run_id = "id" save_freq = 99999 seed = 1337 StatsReporter.writers.clear( ) # Clear StatsReporters so we don't write to file trainer_config = yaml.safe_load(config) env_manager = SimpleEnvManager(env, FloatPropertiesChannel()) trainer_factory = TrainerFactory( trainer_config=trainer_config, summaries_dir=dir, run_id=run_id, model_path=dir, keep_checkpoints=1, train_model=True, load_model=False, seed=seed, meta_curriculum=meta_curriculum, multi_gpu=False, ) tc = TrainerController( trainer_factory=trainer_factory, summaries_dir=dir, model_path=dir, run_id=run_id, meta_curriculum=meta_curriculum, train=True, training_seed=seed, sampler_manager=SamplerManager(None), resampling_interval=None, save_freq=save_freq, ) # Begin training tc.start_learning(env_manager) print(tc._get_measure_vals()) if (success_threshold is not None ): # For tests where we are just checking setup and not reward for mean_reward in tc._get_measure_vals().values(): assert not math.isnan(mean_reward) assert mean_reward > success_threshold
def _check_environment_trains(env, config): # Create controller and begin training. with tempfile.TemporaryDirectory() as dir: run_id = "id" save_freq = 99999 seed = 1337 trainer_config = yaml.safe_load(config) env_manager = SimpleEnvManager(env) trainer_factory = TrainerFactory( trainer_config=trainer_config, summaries_dir=dir, run_id=run_id, model_path=dir, keep_checkpoints=1, train_model=True, load_model=False, seed=seed, meta_curriculum=None, multi_gpu=False, ) tc = TrainerController( trainer_factory=trainer_factory, summaries_dir=dir, model_path=dir, run_id=run_id, meta_curriculum=None, train=True, training_seed=seed, fast_simulation=True, sampler_manager=SamplerManager(None), resampling_interval=None, save_freq=save_freq, ) # Begin training tc.start_learning(env_manager) print(tc._get_measure_vals()) for brain_name, mean_reward in tc._get_measure_vals().items(): assert not math.isnan(mean_reward) assert mean_reward > 0.99
def test_simple(): config = """ default: trainer: ppo batch_size: 16 beta: 5.0e-3 buffer_size: 64 epsilon: 0.2 hidden_units: 128 lambd: 0.95 learning_rate: 5.0e-3 max_steps: 2500 memory_size: 256 normalize: false num_epoch: 3 num_layers: 2 time_horizon: 64 sequence_length: 64 summary_freq: 500 use_recurrent: false reward_signals: extrinsic: strength: 1.0 gamma: 0.99 """ # Create controller and begin training. with tempfile.TemporaryDirectory() as dir: run_id = "id" save_freq = 99999 tc = TrainerController( dir, dir, run_id, save_freq, meta_curriculum=None, load=False, train=True, keep_checkpoints=1, lesson=None, training_seed=1337, fast_simulation=True, sampler_manager=SamplerManager(None), resampling_interval=None, ) # Begin training env = Simple1DEnvironment() env_manager = SimpleEnvManager(env) trainer_config = yaml.safe_load(config) tc.start_learning(env_manager, trainer_config) for brain_name, mean_reward in tc._get_measure_vals().items(): assert not math.isnan(mean_reward) assert mean_reward > 0.99