def test_reward(behavior_spec: BehaviorSpec, reward: float) -> None: buffer = create_agent_buffer(behavior_spec, 1000, reward) settings = RewardSignalSettings() extrinsic_rp = ExtrinsicRewardProvider(behavior_spec, settings) generated_rewards = extrinsic_rp.evaluate(buffer) assert (generated_rewards == reward).all() # Test group rewards. Rewards should be double of the environment rewards, but shouldn't count # the groupmate rewards. buffer[BufferKey.GROUP_REWARD] = buffer[BufferKey.ENVIRONMENT_REWARDS] # 2 agents with identical rewards buffer[BufferKey.GROUPMATE_REWARDS].set( [np.ones(1, dtype=np.float32) * reward] * 2 for _ in range(buffer.num_experiences)) generated_rewards = extrinsic_rp.evaluate(buffer) assert (generated_rewards == 2 * reward).all() # Test groupmate rewards. Total reward should be indiv_reward + 2 * teammate_reward + group_reward extrinsic_rp = ExtrinsicRewardProvider(behavior_spec, settings) extrinsic_rp.add_groupmate_rewards = True generated_rewards = extrinsic_rp.evaluate(buffer) assert (generated_rewards == 4 * reward).all()
def test_construction(behavior_spec: BehaviorSpec) -> None: settings = RewardSignalSettings() settings.gamma = 0.2 extrinsic_rp = ExtrinsicRewardProvider(behavior_spec, settings) assert extrinsic_rp.gamma == 0.2 assert extrinsic_rp.name == "Extrinsic"
def test_reward(behavior_spec: BehaviorSpec, reward: float) -> None: buffer = create_agent_buffer(behavior_spec, 1000, reward) settings = RewardSignalSettings() extrinsic_rp = ExtrinsicRewardProvider(behavior_spec, settings) generated_rewards = extrinsic_rp.evaluate(buffer) assert (generated_rewards == reward).all()