def create_bc_trainer(dummy_config, is_discrete=False):
    mock_env = mock.Mock()
    if is_discrete:
        mock_brain = mb.create_mock_pushblock_brain()
        mock_braininfo = mb.create_mock_braininfo(num_agents=12,
                                                  num_vector_observations=70)
    else:
        mock_brain = mb.create_mock_3dball_brain()
        mock_braininfo = mb.create_mock_braininfo(num_agents=12,
                                                  num_vector_observations=8)
    mb.setup_mock_unityenvironment(mock_env, mock_brain, mock_braininfo)
    env = mock_env()

    trainer_parameters = dummy_config
    trainer_parameters["summary_path"] = "tmp"
    trainer_parameters["model_path"] = "tmp"
    trainer_parameters["demo_path"] = (
        os.path.dirname(os.path.abspath(__file__)) + "/test.demo")
    trainer = BCTrainer(mock_brain,
                        trainer_parameters,
                        training=True,
                        load=False,
                        seed=0,
                        run_id=0)
    trainer.demonstration_buffer = mb.simulate_rollout(env, trainer.policy,
                                                       100)
    return trainer, env
Пример #2
0
def create_ppo_policy_mock(
    mock_env, dummy_config, reward_signal_config, use_rnn, use_discrete, use_visual
):

    if not use_visual:
        mock_brain = mb.create_mock_brainparams(
            vector_action_space_type="discrete" if use_discrete else "continuous",
            vector_action_space_size=DISCRETE_ACTION_SPACE
            if use_discrete
            else VECTOR_ACTION_SPACE,
            vector_observation_space_size=VECTOR_OBS_SPACE,
        )
        mock_braininfo = mb.create_mock_braininfo(
            num_agents=NUM_AGENTS,
            num_vector_observations=VECTOR_OBS_SPACE,
            num_vector_acts=sum(
                DISCRETE_ACTION_SPACE if use_discrete else VECTOR_ACTION_SPACE
            ),
            discrete=use_discrete,
        )
    else:
        mock_brain = mb.create_mock_brainparams(
            vector_action_space_type="discrete" if use_discrete else "continuous",
            vector_action_space_size=DISCRETE_ACTION_SPACE
            if use_discrete
            else VECTOR_ACTION_SPACE,
            vector_observation_space_size=0,
            number_visual_observations=1,
        )
        mock_braininfo = mb.create_mock_braininfo(
            num_agents=NUM_AGENTS,
            num_vis_observations=1,
            num_vector_acts=sum(
                DISCRETE_ACTION_SPACE if use_discrete else VECTOR_ACTION_SPACE
            ),
            discrete=use_discrete,
        )
    mb.setup_mock_unityenvironment(mock_env, mock_brain, mock_braininfo)
    env = mock_env()

    trainer_parameters = dummy_config
    model_path = env.brain_names[0]
    trainer_parameters["model_path"] = model_path
    trainer_parameters["keep_checkpoints"] = 3
    trainer_parameters["reward_signals"].update(reward_signal_config)
    trainer_parameters["use_recurrent"] = use_rnn
    policy = PPOPolicy(0, mock_brain, trainer_parameters, False, False)
    return env, policy
Пример #3
0
def test_agentprocessor(num_vis_obs):
    policy = create_mock_policy()
    trainer = mock.Mock()
    processor = AgentProcessor(
        trainer,
        policy,
        max_trajectory_length=5,
        stats_reporter=StatsReporter("testcat"),
    )
    fake_action_outputs = {
        "action": [0.1, 0.1],
        "entropy": np.array([1.0], dtype=np.float32),
        "learning_rate": 1.0,
        "pre_action": [0.1, 0.1],
        "log_probs": [0.1, 0.1],
    }
    mock_braininfo = mb.create_mock_braininfo(
        num_agents=2,
        num_vector_observations=8,
        num_vector_acts=2,
        num_vis_observations=num_vis_obs,
    )
    for i in range(5):
        processor.add_experiences(mock_braininfo, mock_braininfo,
                                  fake_action_outputs)

    # Assert that two trajectories have been added to the Trainer
    assert len(trainer.process_trajectory.call_args_list) == 2

    # Assert that the trajectory is of length 5
    trajectory = trainer.process_trajectory.call_args_list[0][0][0]
    assert len(trajectory.steps) == 5

    # Assert that the AgentProcessor is empty
    assert len(processor.experience_buffers[0]) == 0
def test_rl_trainer(add_policy_outputs, add_rewards_outputs, num_vis_obs):
    trainer = create_rl_trainer()
    trainer.policy = create_mock_policy()
    fake_action_outputs = {
        "action": [0.1, 0.1],
        "value_heads": {},
        "entropy": np.array([1.0]),
        "learning_rate": 1.0,
    }
    mock_braininfo = mb.create_mock_braininfo(
        num_agents=2,
        num_vector_observations=8,
        num_vector_acts=2,
        num_vis_observations=num_vis_obs,
    )
    trainer.add_experiences(
        create_mock_all_brain_info(mock_braininfo),
        create_mock_all_brain_info(mock_braininfo),
        fake_action_outputs,
    )

    # Remove one of the agents
    next_mock_braininfo = mb.create_mock_braininfo(
        num_agents=1,
        num_vector_observations=8,
        num_vector_acts=2,
        num_vis_observations=num_vis_obs,
    )
    brain_info = trainer.construct_curr_info(next_mock_braininfo)

    # assert construct_curr_info worked properly
    assert len(brain_info.agents) == 1
    assert len(brain_info.visual_observations) == num_vis_obs
    assert len(brain_info.vector_observations) == 1
    assert len(brain_info.previous_vector_actions) == 1

    # Test end episode
    trainer.end_episode()
    for agent_id in trainer.episode_steps:
        assert trainer.episode_steps[agent_id] == 0
        assert len(trainer.training_buffer[agent_id]["action"]) == 0
    for rewards in trainer.collected_rewards.values():
        for agent_id in rewards:
            assert rewards[agent_id] == 0
Пример #5
0
def create_ppo_policy_with_bc_mock(mock_env, mock_brain, dummy_config, use_rnn,
                                   demo_file):
    mock_braininfo = mb.create_mock_braininfo(num_agents=12,
                                              num_vector_observations=8)
    mb.setup_mock_unityenvironment(mock_env, mock_brain, mock_braininfo)
    env = mock_env()

    trainer_parameters = dummy_config
    model_path = env.brain_names[0]
    trainer_parameters["model_path"] = model_path
    trainer_parameters["keep_checkpoints"] = 3
    trainer_parameters["use_recurrent"] = use_rnn
    trainer_parameters["pretraining"]["demo_path"] = (
        os.path.dirname(os.path.abspath(__file__)) + "/" + demo_file)
    policy = PPOPolicy(0, mock_brain, trainer_parameters, False, False)
    return env, policy
def test_agentprocessor(num_vis_obs):
    policy = create_mock_policy()
    tqueue = mock.Mock()
    name_behavior_id = "test_brain_name"
    processor = AgentProcessor(
        policy,
        name_behavior_id,
        max_trajectory_length=5,
        stats_reporter=StatsReporter("testcat"),
    )

    fake_action_outputs = {
        "action": [0.1, 0.1],
        "entropy": np.array([1.0], dtype=np.float32),
        "learning_rate": 1.0,
        "pre_action": [0.1, 0.1],
        "log_probs": [0.1, 0.1],
    }
    mock_braininfo = mb.create_mock_braininfo(
        num_agents=2,
        num_vector_observations=8,
        num_vector_acts=2,
        num_vis_observations=num_vis_obs,
    )
    fake_action_info = ActionInfo(
        action=[0.1, 0.1],
        value=[0.1, 0.1],
        outputs=fake_action_outputs,
        agents=mock_braininfo.agents,
    )
    processor.publish_trajectory_queue(tqueue)
    # This is like the initial state after the env reset
    processor.add_experiences(mock_braininfo, ActionInfo([], [], {}, []))
    for _ in range(5):
        processor.add_experiences(mock_braininfo, fake_action_info)

    # Assert that two trajectories have been added to the Trainer
    assert len(tqueue.put.call_args_list) == 2

    # Assert that the trajectory is of length 5
    trajectory = tqueue.put.call_args_list[0][0][0]
    assert len(trajectory.steps) == 5

    # Assert that the AgentProcessor is empty
    assert len(processor.experience_buffers[0]) == 0
Пример #7
0
def create_policy_with_bc_mock(mock_env, mock_brain, trainer_config, use_rnn,
                               demo_file):
    mock_braininfo = mb.create_mock_braininfo(num_agents=12,
                                              num_vector_observations=8)
    mb.setup_mock_unityenvironment(mock_env, mock_brain, mock_braininfo)
    env = mock_env()

    model_path = env.external_brain_names[0]
    trainer_config["model_path"] = model_path
    trainer_config["keep_checkpoints"] = 3
    trainer_config["use_recurrent"] = use_rnn
    trainer_config["behavioral_cloning"]["demo_path"] = (
        os.path.dirname(os.path.abspath(__file__)) + "/" + demo_file)

    policy = (PPOPolicy(0, mock_brain, trainer_config, False, False)
              if trainer_config["trainer"] == "ppo" else SACPolicy(
                  0, mock_brain, trainer_config, False, False))
    return env, policy
Пример #8
0
def test_bc_trainer(mock_env, dummy_config):
    mock_brain = mb.create_mock_3dball_brain()
    mock_braininfo = mb.create_mock_braininfo(num_agents=12,
                                              num_vector_observations=8)
    mb.setup_mock_unityenvironment(mock_env, mock_brain, mock_braininfo)
    env = mock_env()

    trainer_parameters = dummy_config
    trainer_parameters["summary_path"] = "tmp"
    trainer_parameters["model_path"] = "tmp"
    trainer_parameters["demo_path"] = (
        os.path.dirname(os.path.abspath(__file__)) + "/test.demo")
    trainer = BCTrainer(mock_brain,
                        trainer_parameters,
                        training=True,
                        load=False,
                        seed=0,
                        run_id=0)
    trainer.demonstration_buffer = mb.simulate_rollout(env, trainer.policy,
                                                       100)
    trainer.update_policy()
    assert len(trainer.stats["Losses/Cloning Loss"]) > 0
    trainer.increment_step(1)
    assert trainer.step == 1