def test_trainer_increment_step(dummy_config): trainer_params = dummy_config brain_params = BrainParameters("test_brain", 1, 1, [], [2], [], 0) trainer = PPOTrainer(brain_params, 0, trainer_params, True, False, 0, "0", False) policy_mock = mock.Mock() step_count = 10 policy_mock.increment_step = mock.Mock(return_value=step_count) trainer.policy = policy_mock trainer.increment_step(5) policy_mock.increment_step.assert_called_with(5) assert trainer.step == 10
def test_trainer_increment_step(): trainer_params = { "trainer": "ppo", "batch_size": 2048, "beta": 0.005, "buffer_size": 20480, "epsilon": 0.2, "gamma": 0.995, "hidden_units": 512, "lambd": 0.95, "learning_rate": 0.0003, "max_steps": "2e6", "memory_size": 256, "normalize": True, "num_epoch": 3, "num_layers": 3, "time_horizon": 1000, "sequence_length": 64, "summary_freq": 3000, "use_recurrent": False, "use_curiosity": False, "curiosity_strength": 0.01, "curiosity_enc_size": 128, "summary_path": "./summaries/test_trainer_summary", "model_path": "./models/test_trainer_models/TestModel", "keep_checkpoints": 5, "reward_signals": { "extrinsic": { "strength": 1.0, "gamma": 0.99 } }, } brain_params = BrainParameters("test_brain", 1, 1, [], [2], [], 0) trainer = PPOTrainer(brain_params, 0, trainer_params, True, False, 0, "0", False) policy_mock = mock.Mock() step_count = 10 policy_mock.increment_step = mock.Mock(return_value=step_count) trainer.policy = policy_mock trainer.increment_step(5) policy_mock.increment_step.assert_called_with(5) assert trainer.step == 10
def test_trainer_increment_step(dummy_config): trainer_params = dummy_config brain_params = BrainParameters( brain_name="test_brain", vector_observation_space_size=1, camera_resolutions=[], vector_action_space_size=[2], vector_action_descriptions=[], vector_action_space_type=0, ) trainer = PPOTrainer(brain_params, 0, trainer_params, True, False, 0, "0", False) policy_mock = mock.Mock() step_count = 10 policy_mock.increment_step = mock.Mock(return_value=step_count) trainer.policy = policy_mock trainer.increment_step(5) policy_mock.increment_step.assert_called_with(5) assert trainer.step == 10
def test_trainer_increment_step(dummy_config): trainer_params = dummy_config brain_params = BrainParameters( brain_name="test_brain", vector_observation_space_size=1, camera_resolutions=[], vector_action_space_size=[2], vector_action_descriptions=[], vector_action_space_type=0, ) trainer = PPOTrainer(brain_params.brain_name, 0, trainer_params, True, False, 0, "0", False) policy_mock = mock.Mock() step_count = ( 5 ) # 10 hacked becausee this function is no longer called through trainer policy_mock.increment_step = mock.Mock(return_value=step_count) trainer.policy = policy_mock trainer.increment_step(5) print(trainer.policy.increment_step(5)) policy_mock.increment_step.assert_called_with(5) assert trainer.step == step_count