def _compare_two_policies(policy1: NNPolicy, policy2: NNPolicy) -> None: """ Make sure two policies have the same output for the same input. """ decision_step, _ = mb.create_steps_from_brainparams(policy1.brain, num_agents=1) run_out1 = policy1.evaluate(decision_step, list(decision_step.agent_id)) run_out2 = policy2.evaluate(decision_step, list(decision_step.agent_id)) np.testing.assert_array_equal(run_out2["log_probs"], run_out1["log_probs"])
def test_policy_evaluate(dummy_config, rnn, visual, discrete): # Test evaluate tf.reset_default_graph() policy = create_policy_mock(dummy_config, use_rnn=rnn, use_discrete=discrete, use_visual=visual) decision_step, terminal_step = mb.create_steps_from_brainparams( policy.brain, num_agents=NUM_AGENTS) run_out = policy.evaluate(decision_step, list(decision_step.agent_id)) if discrete: run_out["action"].shape == (NUM_AGENTS, len(DISCRETE_ACTION_SPACE)) else: assert run_out["action"].shape == (NUM_AGENTS, VECTOR_ACTION_SPACE[0])