def test_recurrent_sac(use_discrete): step_size = 0.5 if use_discrete else 0.2 env = MemoryEnvironment([BRAIN_NAME], use_discrete=use_discrete, step_size=step_size) new_networksettings = attr.evolve( SAC_TF_CONFIG.network_settings, memory=NetworkSettings.MemorySettings(memory_size=16, sequence_length=16), ) new_hyperparams = attr.evolve( SAC_TF_CONFIG.hyperparameters, batch_size=128, learning_rate=1e-3, buffer_init_steps=1000, steps_per_update=2, ) config = attr.evolve( SAC_TF_CONFIG, hyperparameters=new_hyperparams, network_settings=new_networksettings, max_steps=5000, framework=FrameworkType.TENSORFLOW, ) _check_environment_trains(env, {BRAIN_NAME: config})
def test_recurrent_poca(action_sizes, is_multiagent): if is_multiagent: # This is not a recurrent environment, just check if LSTM doesn't crash env = MultiAgentEnvironment([BRAIN_NAME], action_sizes=action_sizes, num_agents=2) else: # Actually test LSTM here env = MemoryEnvironment([BRAIN_NAME], action_sizes=action_sizes) new_network_settings = attr.evolve( POCA_TORCH_CONFIG.network_settings, memory=NetworkSettings.MemorySettings(memory_size=16), ) new_hyperparams = attr.evolve( POCA_TORCH_CONFIG.hyperparameters, learning_rate=1.0e-3, batch_size=64, buffer_size=128, ) config = attr.evolve( POCA_TORCH_CONFIG, hyperparameters=new_hyperparams, network_settings=new_network_settings, max_steps=500 if is_multiagent else 6000, ) check_environment_trains(env, {BRAIN_NAME: config}, success_threshold=None if is_multiagent else 0.9)
def test_recurrent_ppo(use_discrete): env = MemoryEnvironment([BRAIN_NAME], use_discrete=use_discrete) override_vals = { "max_steps": 5000, "batch_size": 64, "buffer_size": 128, "learning_rate": 1e-3, "use_recurrent": True, } config = generate_config(PPO_CONFIG, override_vals) _check_environment_trains(env, config, success_threshold=0.9)
def test_recurrent_sac(use_discrete): env = MemoryEnvironment([BRAIN_NAME], use_discrete=use_discrete) override_vals = { "batch_size": 64, "use_recurrent": True, "max_steps": 3000, "learning_rate": 1e-3, "buffer_init_steps": 500, } config = generate_config(SAC_CONFIG, override_vals) _check_environment_trains(env, config)
def test_recurrent_ppo(use_discrete): env = MemoryEnvironment([BRAIN_NAME], use_discrete=use_discrete) new_network_settings = attr.evolve( PPO_CONFIG.network_settings, memory=NetworkSettings.MemorySettings(memory_size=16), ) new_hyperparams = attr.evolve( PPO_CONFIG.hyperparameters, learning_rate=1.0e-3, batch_size=64, buffer_size=128 ) config = attr.evolve( PPO_CONFIG, hyperparameters=new_hyperparams, network_settings=new_network_settings, max_steps=5000, ) _check_environment_trains(env, {BRAIN_NAME: config}, success_threshold=0.9)
def test_hybrid_recurrent_ppo(): env = MemoryEnvironment([BRAIN_NAME], action_sizes=(1, 1), step_size=0.5) new_network_settings = attr.evolve( PPO_TORCH_CONFIG.network_settings, memory=NetworkSettings.MemorySettings(memory_size=16), ) new_hyperparams = attr.evolve( PPO_TORCH_CONFIG.hyperparameters, learning_rate=1.0e-3, batch_size=64, buffer_size=512, ) config = attr.evolve( PPO_TORCH_CONFIG, hyperparameters=new_hyperparams, network_settings=new_network_settings, max_steps=3000, ) check_environment_trains(env, {BRAIN_NAME: config}, success_threshold=0.9)
def test_hybrid_recurrent_sac(): env = MemoryEnvironment([BRAIN_NAME], action_sizes=(1, 1), step_size=0.5) new_networksettings = attr.evolve( SAC_TORCH_CONFIG.network_settings, memory=NetworkSettings.MemorySettings(memory_size=16, sequence_length=16), ) new_hyperparams = attr.evolve( SAC_TORCH_CONFIG.hyperparameters, batch_size=256, learning_rate=1e-3, buffer_init_steps=1000, steps_per_update=2, ) config = attr.evolve( SAC_TORCH_CONFIG, hyperparameters=new_hyperparams, network_settings=new_networksettings, max_steps=4000, ) check_environment_trains(env, {BRAIN_NAME: config})
def test_recurrent_ppo(action_sizes): env = MemoryEnvironment([BRAIN_NAME], action_sizes=action_sizes) new_network_settings = attr.evolve( PPO_TF_CONFIG.network_settings, memory=NetworkSettings.MemorySettings(memory_size=16), ) new_hyperparams = attr.evolve( PPO_TF_CONFIG.hyperparameters, learning_rate=1.0e-3, batch_size=64, buffer_size=128, ) config = attr.evolve( PPO_TF_CONFIG, hyperparameters=new_hyperparams, network_settings=new_network_settings, max_steps=5000, framework=FrameworkType.TENSORFLOW, ) _check_environment_trains(env, {BRAIN_NAME: config}, success_threshold=0.9)
def test_recurrent_sac(use_discrete): env = MemoryEnvironment([BRAIN_NAME], use_discrete=use_discrete) new_networksettings = attr.evolve( SAC_CONFIG.network_settings, memory=NetworkSettings.MemorySettings(memory_size=16, sequence_length=32), ) new_hyperparams = attr.evolve( SAC_CONFIG.hyperparameters, batch_size=64, learning_rate=1e-3, buffer_init_steps=500, steps_per_update=2, ) config = attr.evolve( SAC_CONFIG, hyperparameters=new_hyperparams, network_settings=new_networksettings, max_steps=5000, ) _check_environment_trains(env, {BRAIN_NAME: config})
def test_recurrent_sac(action_sizes): step_size = 0.2 if action_sizes == (0, 1) else 0.5 env = MemoryEnvironment( [BRAIN_NAME], action_sizes=action_sizes, step_size=step_size ) new_networksettings = attr.evolve( SAC_TORCH_CONFIG.network_settings, memory=NetworkSettings.MemorySettings(memory_size=16, sequence_length=16), ) new_hyperparams = attr.evolve( SAC_TORCH_CONFIG.hyperparameters, batch_size=256, learning_rate=3e-4, buffer_init_steps=1000, steps_per_update=2, ) config = attr.evolve( SAC_TORCH_CONFIG, hyperparameters=new_hyperparams, network_settings=new_networksettings, max_steps=4000, ) check_environment_trains(env, {BRAIN_NAME: config}, training_seed=1337)
def test_recurrent_sac(action_sizes): step_size = 0.2 if action_sizes == (0, 1) else 0.5 env = MemoryEnvironment([BRAIN_NAME], action_sizes=action_sizes, step_size=step_size) new_networksettings = attr.evolve( SAC_TF_CONFIG.network_settings, memory=NetworkSettings.MemorySettings(memory_size=16), ) new_hyperparams = attr.evolve( SAC_TF_CONFIG.hyperparameters, batch_size=128, learning_rate=1e-3, buffer_init_steps=1000, steps_per_update=2, ) config = attr.evolve( SAC_TF_CONFIG, hyperparameters=new_hyperparams, network_settings=new_networksettings, max_steps=4000, framework=FrameworkType.TENSORFLOW, ) _check_environment_trains(env, {BRAIN_NAME: config})