def test_ppo_model_dc_vector(mock_communicator, mock_launcher):
    tf.reset_default_graph()
    with tf.Session() as sess:
        with tf.variable_scope("FakeGraphScope"):
            mock_communicator.return_value = MockCommunicator(
                discrete_action=True, visual_inputs=0)
            env = UnityEnvironment(" ")
            model = PPOModel(env.brains["RealFakeBrain"])
            init = tf.global_variables_initializer()
            sess.run(init)

            run_list = [
                model.output,
                model.all_log_probs,
                model.value,
                model.entropy,
                model.learning_rate,
            ]
            feed_dict = {
                model.batch_size: 2,
                model.sequence_length: 1,
                model.vector_in: np.array([[1, 2, 3, 1, 2, 3],
                                           [3, 4, 5, 3, 4, 5]]),
                model.action_masks: np.ones([2, 2]),
            }
            sess.run(run_list, feed_dict=feed_dict)
            env.close()
Exemplo n.º 2
0
def init_unity_env(env_path, show_visuals=True):
    worker_id = 0
    done = False

    while not done:
        if worker_id > 64:
            sys.exit()
        try:
            env = UnityEnvironment(env_path,
                                   worker_id=worker_id,
                                   no_graphics=not show_visuals)
            done = True
        except mlagents.envs.exception.UnityWorkerInUseException:
            worker_id += 1

    env.reset(train_mode=True)
    brain_name = list(env.brains.keys())[0]

    state_space = env.brains[brain_name].vector_observation_space_size
    action_space = env.brains[brain_name].vector_action_space_size

    n_agents = env._n_agents[brain_name]

    multiagent = True if n_agents > 1 else False

    return env, state_space, action_space, n_agents, multiagent, brain_name
def test_ppo_model_cc_vector_rnn(mock_communicator, mock_launcher):
    tf.reset_default_graph()
    with tf.Session() as sess:
        with tf.variable_scope("FakeGraphScope"):
            mock_communicator.return_value = MockCommunicator(
                discrete_action=False, visual_inputs=0)
            env = UnityEnvironment(" ")
            memory_size = 128
            model = PPOModel(env.brains["RealFakeBrain"],
                             use_recurrent=True,
                             m_size=memory_size)
            init = tf.global_variables_initializer()
            sess.run(init)

            run_list = [
                model.output,
                model.all_log_probs,
                model.value,
                model.entropy,
                model.learning_rate,
                model.memory_out,
            ]
            feed_dict = {
                model.batch_size: 1,
                model.sequence_length: 2,
                model.memory_in: np.zeros((1, memory_size)),
                model.vector_in: np.array([[1, 2, 3, 1, 2, 3],
                                           [3, 4, 5, 3, 4, 5]]),
                model.epsilon: np.array([[0, 1]]),
            }
            sess.run(run_list, feed_dict=feed_dict)
            env.close()
Exemplo n.º 4
0
def do_rollout(env: UnityEnvironment, brain_name):
    """ Builds a path by running through an environment using a provided function to select actions. """
    obs, rewards, actions, human_obs = [], [], [], []

    curr_info = env.reset(train_mode=False)
    # Primary environment loop
    while not env.global_done:
        ob = curr_info
        action = 2 * np.random.rand() - 1
        obs.append(ob)
        actions.append(action)
        new_info = env.step(vector_action=action)[brain_name]
        ob, rew, done, info = env.step(action)
        rewards.append(rew)
        human_obs.append(info.get("human_obs"))
        if done:
            break
    # Build path dictionary
    path = {
        "obs": np.array(obs),
        "original_rewards": np.array(rewards),
        "actions": np.array(actions),
        "human_obs": np.array(human_obs)
    }
    return path
Exemplo n.º 5
0
    def _init_env(self, replay_config, sac_config, model_root_path):
        if self.config['build_path'] is None or self.config['build_path'] == '':
            self.env = UnityEnvironment()
        else:
            self.env = UnityEnvironment(file_name=self.config['build_path'],
                                        no_graphics=True,
                                        base_port=self.config['build_port'],
                                        args=['--scene', self.config['scene']])

        self.logger.info(f'{self.config["build_path"]} initialized')

        self.default_brain_name = self.env.brain_names[0]

        brain_params = self.env.brains[self.default_brain_name]
        state_dim = brain_params.vector_observation_space_size * brain_params.num_stacked_vector_observations
        action_dim = brain_params.vector_action_space_size[0]

        custom_sac_model = importlib.import_module(self.config['sac'])
        shutil.copyfile(f'{self.config["sac"]}.py',
                        f'{model_root_path}/{self.config["sac"]}.py')

        self.sac = SAC_DS_with_Replay_Base(
            state_dim=state_dim,
            action_dim=action_dim,
            model_root_path=model_root_path,
            model=custom_sac_model,
            use_rnn=self.config['use_rnn'],
            replay_config=replay_config,
            burn_in_step=self.config['burn_in_step'],
            n_step=self.config['n_step'],
            **sac_config)
Exemplo n.º 6
0
def test_ppo_get_value_estimates(mock_communicator, mock_launcher,
                                 dummy_config):
    tf.reset_default_graph()
    mock_communicator.return_value = MockCommunicator(discrete_action=False,
                                                      visual_inputs=0)
    env = UnityEnvironment(" ")
    brain_infos = env.reset()
    brain_info = brain_infos[env.external_brain_names[0]]

    trainer_parameters = dummy_config
    model_path = env.external_brain_names[0]
    trainer_parameters["model_path"] = model_path
    trainer_parameters["keep_checkpoints"] = 3
    policy = PPOPolicy(0, env.brains[env.external_brain_names[0]],
                       trainer_parameters, False, False)
    run_out = policy.get_value_estimates(brain_info, 0, done=False)
    for key, val in run_out.items():
        assert type(key) is str
        assert type(val) is float

    run_out = policy.get_value_estimates(brain_info, 0, done=True)
    for key, val in run_out.items():
        assert type(key) is str
        assert val == 0.0

    # Check if we ignore terminal states properly
    policy.reward_signals["extrinsic"].use_terminal_states = False
    run_out = policy.get_value_estimates(brain_info, 0, done=True)
    for key, val in run_out.items():
        assert type(key) is str
        assert val != 0.0

    env.close()
def test_close(mock_communicator, mock_launcher):
    comm = MockCommunicator(discrete_action=False, visual_inputs=0)
    mock_communicator.return_value = comm
    env = UnityEnvironment(" ")
    assert env._loaded
    env.close()
    assert not env._loaded
    assert comm.has_been_closed
Exemplo n.º 8
0
    def setup_connection_with_unity(self, build_scene):
        # Connect to Unity and get environment
        self.env = UnityEnvironment(file_name=build_scene, worker_id=0, seed=1)

        # Reset the environment
        self.env_info = self.env.reset(train_mode=True)

        # Set the default brain to work with
        self.default_brain = "Robot"
def test_step(mock_communicator, mock_launcher):
    mock_communicator.return_value = MockCommunicator(discrete_action=False,
                                                      visual_inputs=0)
    env = UnityEnvironment(" ")
    brain = env.brains["RealFakeBrain"]
    brain_info = env.step()
    brain_info = env.step([0] * brain.vector_action_space_size[0] *
                          len(brain_info["RealFakeBrain"].agents))
    with pytest.raises(UnityActionException):
        env.step([0])
    brain_info = env.step([-1] * brain.vector_action_space_size[0] *
                          len(brain_info["RealFakeBrain"].agents))
    env.close()
    assert isinstance(brain_info, dict)
    assert isinstance(brain_info["RealFakeBrain"], BrainInfo)
    assert isinstance(brain_info["RealFakeBrain"].visual_observations, list)
    assert isinstance(brain_info["RealFakeBrain"].vector_observations,
                      np.ndarray)
    assert (len(brain_info["RealFakeBrain"].visual_observations) ==
            brain.number_visual_observations)
    assert len(brain_info["RealFakeBrain"].vector_observations) == len(
        brain_info["RealFakeBrain"].agents)
    assert (len(brain_info["RealFakeBrain"].vector_observations[0]) ==
            brain.vector_observation_space_size *
            brain.num_stacked_vector_observations)

    print("\n\n\n\n\n\n\n" + str(brain_info["RealFakeBrain"].local_done))
    assert not brain_info["RealFakeBrain"].local_done[0]
    assert brain_info["RealFakeBrain"].local_done[2]
Exemplo n.º 10
0
def test_int_channel():
    sender = IntChannel()
    receiver = IntChannel()
    sender.send_int(5)
    sender.send_int(6)
    data = UnityEnvironment._generate_side_channel_data(
        {sender.channel_type: sender})
    UnityEnvironment._parse_side_channel_message(
        {receiver.channel_type: receiver}, data)
    assert receiver.list_int[0] == 5
    assert receiver.list_int[1] == 6
Exemplo n.º 11
0
def get_unity_envs():
    # check the python environment
    print("Python version: ", sys.version)
    if (sys.version_info[0] < 3):
        raise Exception("ERROR: ML-Agents Toolkit requires Python 3")

    # set the unity environment
    env = UnityEnvironment(file_name=UNITY_PATH, base_port=5005)
    brain = env.brain_names[0]
    env.reset(train_mode=True)[brain]

    return env, brain
Exemplo n.º 12
0
def _start_env(file_name="", seed=0, worker_id=0):

    log2logger("Starting Environment: " + str(worker_id))

    if file_name == "":
        env = UnityEnvironment(file_name=None, seed=seed)
    else:
        env = UnityEnvironment(file_name=file_name,
                               seed=seed,
                               worker_id=worker_id)

    return env
Exemplo n.º 13
0
    def _init_env(self):
        if self.config['build_path'] is None or self.config['build_path'] == '':
            self.env = UnityEnvironment()
        else:
            self.env = UnityEnvironment(file_name=self.config['build_path'],
                                        no_graphics=self._train_mode,
                                        base_port=self.config['build_port'],
                                        args=['--scene', self.config['scene']])

        self.logger.info(f'{self.config["build_path"]} initialized')

        self.default_brain_name = self.env.brain_names[0]
Exemplo n.º 14
0
    def __init__(self, file_name=None, worker_id=0, base_port=5005, seed=0, docker_training=False, no_graphics=False,
                 timeout_wait=30, train_mode=True, **kwargs):
        """
        Args:
            file_name (Optional[str]): Name of Unity environment binary.
            base_port (int): Port number to connect to Unity environment. `worker_id` increments on top of this.
            worker_id (int): Number to add to `base_port`. Used for asynchronous agent scenarios.
            docker_training (bool): Informs this class, whether the process is being run within a container.
                Default: False.
            no_graphics (bool): Whether to run the Unity simulator in no-graphics mode. Default: False.
            timeout_wait (int): Time (in seconds) to wait for connection from environment.
            train_mode (bool): Whether to run in training mode, speeding up the simulation. Default: True.
        """
        # First create the UnityMLAgentsEnvironment to get state and action spaces, then create RLgraph Environment
        # instance.
        self.mlagents_env = UnityEnvironment(
            file_name, worker_id, base_port, seed, docker_training, no_graphics
        )
        all_brain_info = self.mlagents_env.reset()
        # Get all possible information from AllBrainInfo.
        # TODO: Which scene do we pick?
        self.scene_key = next(iter(all_brain_info))
        first_brain_info = all_brain_info[self.scene_key]
        num_environments = len(first_brain_info.agents)

        state_space = {}
        if len(first_brain_info.vector_observations[0]) > 0:
            state_space["vector"] = get_space_from_op(first_brain_info.vector_observations[0])
            # TODO: This is a hack.
            if state_space["vector"].dtype == np.float64:
                state_space["vector"].dtype = np.float32
        if len(first_brain_info.visual_observations) > 0:
            state_space["visual"] = get_space_from_op(first_brain_info.visual_observations[0])
        if first_brain_info.text_observations[0]:
            state_space["text"] = get_space_from_op(first_brain_info.text_observations[0])

        if len(state_space) == 1:
            self.state_key = next(iter(state_space))
            state_space = state_space[self.state_key]
        else:
            self.state_key = None
            state_space = Dict(state_space)
        action_space = get_space_from_op(first_brain_info.action_masks[0])
        if action_space.dtype == np.float64:
            action_space.dtype = np.float32

        super(MLAgentsEnv, self).__init__(
            num_environments=num_environments, state_space=state_space, action_space=action_space, **kwargs
        )

        # Caches the last observation we made (after stepping or resetting).
        self.last_state = []
Exemplo n.º 15
0
def test_ppo_policy_evaluate(mock_communicator, mock_launcher, dummy_config):
    tf.reset_default_graph()
    mock_communicator.return_value = MockCommunicator(discrete_action=False,
                                                      visual_inputs=0)
    env = UnityEnvironment(" ")
    brain_infos = env.reset()
    brain_info = brain_infos[env.external_brain_names[0]]

    trainer_parameters = dummy_config
    model_path = env.external_brain_names[0]
    trainer_parameters["model_path"] = model_path
    trainer_parameters["keep_checkpoints"] = 3
    policy = PPOPolicy(0, env.brains[env.external_brain_names[0]],
                       trainer_parameters, False, False)
    run_out = policy.evaluate(brain_info)
    assert run_out["action"].shape == (3, 2)
    env.close()
Exemplo n.º 16
0
def test_reset(mock_communicator, mock_launcher):
    mock_communicator.return_value = MockCommunicator(discrete_action=False,
                                                      visual_inputs=0)
    env = UnityEnvironment(" ")
    brain = env.brains["RealFakeBrain"]
    brain_info = env.reset()
    env.close()
    assert isinstance(brain_info, dict)
    assert isinstance(brain_info["RealFakeBrain"], BrainInfo)
    assert isinstance(brain_info["RealFakeBrain"].visual_observations, list)
    assert isinstance(brain_info["RealFakeBrain"].vector_observations,
                      np.ndarray)
    assert (len(brain_info["RealFakeBrain"].visual_observations) ==
            brain.number_visual_observations)
    assert len(brain_info["RealFakeBrain"].vector_observations) == len(
        brain_info["RealFakeBrain"].agents)
    assert (len(brain_info["RealFakeBrain"].vector_observations[0]) ==
            brain.vector_observation_space_size)
Exemplo n.º 17
0
 def __init__(self,
              env_file='data/Reacher.exe',
              no_graphics=True,
              mlagents=False):
     if mlagents:
         from mlagents.envs.environment import UnityEnvironment
     else:
         from unityagents import UnityEnvironment
     self.env = UnityEnvironment(file_name=env_file,
                                 no_graphics=no_graphics)
     self.brain_name = self.env.brain_names[0]
     brain = self.env.brains[self.brain_name]
     self.action_size = brain.vector_action_space_size
     if type(self.action_size) != int:
         self.action_size = self.action_size[0]
     env_info = self.env.reset(train_mode=True)[self.brain_name]
     self.state_size = env_info.vector_observations.shape[1]
     self.num_agents = len(env_info.agents)
Exemplo n.º 18
0
def test_raw_bytes():
    sender = RawBytesChannel()
    receiver = RawBytesChannel()

    sender.send_raw_data("foo".encode("ascii"))
    sender.send_raw_data("bar".encode("ascii"))

    data = UnityEnvironment._generate_side_channel_data(
        {sender.channel_type: sender})
    UnityEnvironment._parse_side_channel_message(
        {receiver.channel_type: receiver}, data)

    messages = receiver.get_and_clear_received_messages()
    assert len(messages) == 2
    assert messages[0].decode("ascii") == "foo"
    assert messages[1].decode("ascii") == "bar"

    messages = receiver.get_and_clear_received_messages()
    assert len(messages) == 0
Exemplo n.º 19
0
class UnityEnv():
    """Unity Reacher Environment Wrapper
        https://github.com/Unity-Technologies/ml-agents/blob/master/docs/Learning-Environment-Examples.md

    """
    def __init__(self,
                 env_file='data/Reacher.exe',
                 no_graphics=True,
                 mlagents=False):
        if mlagents:
            from mlagents.envs.environment import UnityEnvironment
        else:
            from unityagents import UnityEnvironment
        self.env = UnityEnvironment(file_name=env_file,
                                    no_graphics=no_graphics)
        self.brain_name = self.env.brain_names[0]
        brain = self.env.brains[self.brain_name]
        self.action_size = brain.vector_action_space_size
        if type(self.action_size) != int:
            self.action_size = self.action_size[0]
        env_info = self.env.reset(train_mode=True)[self.brain_name]
        self.state_size = env_info.vector_observations.shape[1]
        self.num_agents = len(env_info.agents)

    def reset(self, train=True):
        env_info = self.env.reset(train_mode=train)[self.brain_name]
        return env_info.vector_observations

    def close(self):
        self.env.close()

    def step(self, actions):
        actions = np.clip(actions, -1, 1)
        env_info = self.env.step(actions)[self.brain_name]
        next_states = env_info.vector_observations
        rewards = env_info.rewards
        dones = env_info.local_done
        return next_states, np.array(rewards), np.array(dones)

    @property
    def action_shape(self):
        return (self.num_agents, self.action_size)
Exemplo n.º 20
0
def test_reset(mock_communicator, mock_launcher):
    mock_communicator.return_value = MockCommunicator(discrete_action=False,
                                                      visual_inputs=0)
    env = UnityEnvironment(" ")
    spec = env.get_agent_group_spec("RealFakeBrain")
    env.reset()
    batched_step_result = env.get_step_result("RealFakeBrain")
    env.close()
    assert isinstance(batched_step_result, BatchedStepResult)
    assert len(spec.observation_shapes) == len(batched_step_result.obs)
    n_agents = batched_step_result.n_agents()
    for shape, obs in zip(spec.observation_shapes, batched_step_result.obs):
        assert (n_agents, ) + shape == obs.shape
Exemplo n.º 21
0
def test_ppo_policy_evaluate(mock_communicator, mock_launcher, dummy_config):
    tf.reset_default_graph()
    mock_communicator.return_value = MockCommunicator(discrete_action=False,
                                                      visual_inputs=0)
    env = UnityEnvironment(" ")
    env.reset()
    brain_name = env.get_agent_groups()[0]
    brain_info = step_result_to_brain_info(
        env.get_step_result(brain_name), env.get_agent_group_spec(brain_name))
    brain_params = group_spec_to_brain_parameters(
        brain_name, env.get_agent_group_spec(brain_name))

    trainer_parameters = dummy_config
    model_path = brain_name
    trainer_parameters["model_path"] = model_path
    trainer_parameters["keep_checkpoints"] = 3
    policy = PPOPolicy(0, brain_params, trainer_parameters, False, False)
    run_out = policy.evaluate(brain_info)
    assert run_out["action"].shape == (3, 2)
    env.close()
def test_cc_bc_model(mock_communicator, mock_launcher):
    tf.reset_default_graph()
    with tf.Session() as sess:
        with tf.variable_scope("FakeGraphScope"):
            mock_communicator.return_value = MockCommunicator(
                discrete_action=False, visual_inputs=0
            )
            env = UnityEnvironment(" ")
            model = BehavioralCloningModel(env.brains["RealFakeBrain"])
            init = tf.global_variables_initializer()
            sess.run(init)

            run_list = [model.sample_action, model.policy]
            feed_dict = {
                model.batch_size: 2,
                model.sequence_length: 1,
                model.vector_in: np.array([[1, 2, 3, 1, 2, 3], [3, 4, 5, 3, 4, 5]]),
            }
            sess.run(run_list, feed_dict=feed_dict)
            env.close()
Exemplo n.º 23
0
def test_float_properties():
    sender = FloatPropertiesChannel()
    receiver = FloatPropertiesChannel()

    sender.set_property("prop1", 1.0)

    data = UnityEnvironment._generate_side_channel_data(
        {sender.channel_type: sender})
    UnityEnvironment._parse_side_channel_message(
        {receiver.channel_type: receiver}, data)

    val = receiver.get_property("prop1")
    assert val == 1.0
    val = receiver.get_property("prop2")
    assert val is None
    sender.set_property("prop2", 2.0)

    data = UnityEnvironment._generate_side_channel_data(
        {sender.channel_type: sender})
    UnityEnvironment._parse_side_channel_message(
        {receiver.channel_type: receiver}, data)

    val = receiver.get_property("prop1")
    assert val == 1.0
    val = receiver.get_property("prop2")
    assert val == 2.0
    assert len(receiver.list_properties()) == 2
    assert "prop1" in receiver.list_properties()
    assert "prop2" in receiver.list_properties()
    val = sender.get_property("prop1")
    assert val == 1.0

    assert receiver.get_property_dict_copy() == {"prop1": 1.0, "prop2": 2.0}
    assert receiver.get_property_dict_copy() == sender.get_property_dict_copy()
Exemplo n.º 24
0
def init_unity_env(env_path, show_visuals=True):

    # Find a worker_id < 64 that's not in use
    worker_id = 0
    done = False

    while not done:
        if worker_id > 64:
            sys.exit()
        try:
            env = UnityEnvironment(env_path,
                                   worker_id=worker_id,
                                   no_graphics=not show_visuals)
            done = True
        except mlagents.envs.exception.UnityWorkerInUseException:
            worker_id += 1

    # Get state and action space, as well as multiagent and multibrain info from environment
    env.reset(train_mode=not show_visuals)
    # brain_name = list(env.brains.keys())[0]
    brain_names = list(env.brains.keys())

    if len(brain_names) > 1:
        multibrain = True
        n_agents = env._n_agents[brain_names[0]] + env._n_agents[
            brain_names[1]]
    else:
        multibrain = False
        n_agents = env._n_agents[brain_names[0]]

    # WalkerVis is a version of the Walker environment with one brain 'WalkerVis'
    # having visual observations, whereas 'Walker' brain does not.
    # The visual observations are used for recording episodes
    state_space = env.brains[brain_names[0]].vector_observation_space_size
    action_space = env.brains[brain_names[0]].vector_action_space_size

    multiagent = True if n_agents > 1 else False

    return env, state_space, action_space, n_agents, multiagent, brain_names, multibrain
Exemplo n.º 25
0
 def create_unity_environment(worker_id: int) -> UnityEnvironment:
     env_seed = seed
     if not env_seed:
         env_seed = seed_pool[worker_id % len(seed_pool)]
     return UnityEnvironment(
         file_name=env_path,
         worker_id=worker_id,
         seed=env_seed,
         docker_training=docker_training,
         no_graphics=no_graphics,
         base_port=start_port,
         args=env_args,
     )
Exemplo n.º 26
0
def run(train_mode, load_model, env_name):
    env = UnityEnvironment(file_name=env_name)
    default_brain = env.brain_names[0]

    agent = DDPGAgent(state_size, action_size, train_mode, load_model)
    rewards = deque(maxlen=print_interval)
    success_cnt = 0
    step = 0

    for episode in range(run_episode + test_episode):
        if episode == run_episode:
            train_mode = False

        env_info = env.reset(train_mode=train_mode)[default_brain]
        state = env_info.vector_observations[0]
        episode_rewards = 0
        done = False

        while not done:
            step += 1

            action = agent.get_action([state])[0]
            #print(action)
            env_info = env.step(action)[default_brain]
            next_state = env_info.vector_observations[0]
            reward = env_info.rewards[0]
            done = env_info.local_done[0]

            episode_rewards += reward

            if train_mode:
                agent.append_sample(state, action, reward, next_state, done)

            state = next_state

            if episode > start_train_episode and train_mode:
                agent.train_model()

        success_cnt = success_cnt + 1 if reward == 1 else success_cnt
        rewards.append(episode_rewards)
        agent.save_samples(episode)

        if episode % print_interval == 0 and episode != 0:
            print("step: {} / episode: {} / reward: {:.3f} / success_cnt: {}".
                  format(step, episode, np.mean(rewards), success_cnt))
            agent.Write_Summray(np.mean(rewards), success_cnt, episode)
            success_cnt = 0

        if train_mode and episode % save_interval == 0 and episode != 0:
            print("model saved")
            agent.save_model()

    env.close()
Exemplo n.º 27
0
    def __init__(self,
                 environment_filename=None,
                 docker_training=False,
                 worker_id=0,
                 retro=True,
                 timeout_wait=30,
                 realtime_mode=False,
                 config=None,
                 greyscale=False):
        """
        Arguments:
          environment_filename: The file path to the Unity executable.  Does not require the extension.
          docker_training: Whether this is running within a docker environment and should use a virtual
            frame buffer (xvfb).
          worker_id: The index of the worker in the case where multiple environments are running.  Each
            environment reserves port (5005 + worker_id) for communication with the Unity executable.
          retro: Resize visual observation to 84x84 (int8) and flattens action space.
          timeout_wait: Time for python interface to wait for environment to connect.
          realtime_mode: Whether to render the environment window image and run environment at realtime.
        """
        self._env = UnityEnvironment(environment_filename,
                                     worker_id,
                                     docker_training=docker_training,
                                     timeout_wait=timeout_wait)

        split_name = self._env.academy_name.split('-v')
        if len(split_name) == 2 and split_name[0] == "ObstacleTower":
            self.name, self.version = split_name
        else:
            raise UnityGymException(
                "Attempting to launch non-Obstacle Tower environment")

        if self.version not in self.ALLOWED_VERSIONS:
            raise UnityGymException(
                "Invalid Obstacle Tower version.  Your build is v" +
                self.version +
                " but only the following versions are compatible with this gym: "
                + str(self.ALLOWED_VERSIONS))

        self.visual_obs = None
        self._current_state = None
        self._n_agents = None
        self._flattener = None
        self._greyscale = greyscale

        # Environment reset parameters
        self._seed = None
        self._floor = None

        self.realtime_mode = realtime_mode
        self.game_over = False  # Hidden flag used by Atari environments to determine if the game is over
        self.retro = retro
        if config != None:
            self.config = config
        else:
            self.config = None

        flatten_branched = self.retro
        uint8_visual = self.retro

        # Check brain configuration
        if len(self._env.brains) != 1:
            raise UnityGymException(
                "There can only be one brain in a UnityEnvironment "
                "if it is wrapped in a gym.")
        self.brain_name = self._env.external_brain_names[0]
        brain = self._env.brains[self.brain_name]

        if brain.number_visual_observations == 0:
            raise UnityGymException(
                "Environment provides no visual observations.")

        self.uint8_visual = uint8_visual

        if brain.number_visual_observations > 1:
            logger.warning(
                "The environment contains more than one visual observation. "
                "Please note that only the first will be provided in the observation."
            )

        # Check for number of agents in scene.
        initial_info = self._env.reset(
            train_mode=not self.realtime_mode)[self.brain_name]
        self._check_agents(len(initial_info.agents))

        # Set observation and action spaces
        if len(brain.vector_action_space_size) == 1:
            self._action_space = spaces.Discrete(
                brain.vector_action_space_size[0])
        else:
            if flatten_branched:
                self._flattener = ActionFlattener(
                    brain.vector_action_space_size)
                self._action_space = self._flattener.action_space
            else:
                self._action_space = spaces.MultiDiscrete(
                    brain.vector_action_space_size)

        high = np.array([np.inf] * brain.vector_observation_space_size)
        self.action_meanings = brain.vector_action_descriptions

        if self._greyscale:
            depth = 1
        else:
            depth = 3
        image_space_max = 1.0
        image_space_dtype = np.float32
        camera_height = brain.camera_resolutions[0]["height"]
        camera_width = brain.camera_resolutions[0]["width"]
        if self.retro:
            image_space_max = 255
            image_space_dtype = np.uint8
            camera_height = 84
            camera_width = 84

        image_space = spaces.Box(0,
                                 image_space_max,
                                 dtype=image_space_dtype,
                                 shape=(camera_height, camera_width, depth))
        if self.retro:
            self._observation_space = image_space
        else:
            max_float = np.finfo(np.float32).max
            keys_space = spaces.Discrete(5)
            time_remaining_space = spaces.Box(low=0.0,
                                              high=max_float,
                                              shape=(1, ),
                                              dtype=np.float32)
            floor_space = spaces.Discrete(9999)
            self._observation_space = spaces.Tuple(
                (image_space, keys_space, time_remaining_space, floor_space))
Exemplo n.º 28
0
class UnityEnv(gym.Env):
    """
    Provides Gym wrapper for Unity Learning Environments.
    Multi-agent environments use lists for object types, as done here:
    https://github.com/openai/multiagent-particle-envs
    """
    def __init__(
        self,
        environment_filename: str,
        worker_id: int = 0,
        use_visual: bool = False,
        uint8_visual: bool = False,
        multiagent: bool = False,
        flatten_branched: bool = False,
        no_graphics: bool = False,
        allow_multiple_visual_obs: bool = False,
    ):
        """
        Environment initialization
        :param environment_filename: The UnityEnvironment path or file to be wrapped in the gym.
        :param worker_id: Worker number for environment.
        :param use_visual: Whether to use visual observation or vector observation.
        :param uint8_visual: Return visual observations as uint8 (0-255) matrices instead of float (0.0-1.0).
        :param multiagent: Whether to run in multi-agent mode (lists of obs, reward, done).
        :param flatten_branched: If True, turn branched discrete action spaces into a Discrete space rather than
            MultiDiscrete.
        :param no_graphics: Whether to run the Unity simulator in no-graphics mode
        :param allow_multiple_visual_obs: If True, return a list of visual observations instead of only one.
        """
        self._env = UnityEnvironment(environment_filename,
                                     worker_id,
                                     no_graphics=no_graphics)

        # Take a single step so that the brain information will be sent over
        if not self._env.brains:
            self._env.step()

        self.name = self._env.academy_name
        self.visual_obs = None
        self._current_state = None
        self._n_agents = None
        self._multiagent = multiagent
        self._flattener = None
        self.game_over = (
            False
        )  # Hidden flag used by Atari environments to determine if the game is over
        self._allow_multiple_visual_obs = allow_multiple_visual_obs

        # Check brain configuration
        if len(self._env.brains) != 1:
            raise UnityGymException(
                "There can only be one brain in a UnityEnvironment "
                "if it is wrapped in a gym.")
        if len(self._env.external_brain_names) <= 0:
            raise UnityGymException(
                "There are not any external brain in the UnityEnvironment")

        self.brain_name = self._env.external_brain_names[0]
        brain = self._env.brains[self.brain_name]

        if use_visual and brain.number_visual_observations == 0:
            raise UnityGymException(
                "`use_visual` was set to True, however there are no"
                " visual observations as part of this environment.")
        self.use_visual = brain.number_visual_observations >= 1 and use_visual

        if not use_visual and uint8_visual:
            logger.warning(
                "`uint8_visual was set to true, but visual observations are not in use. "
                "This setting will not have any effect.")
        else:
            self.uint8_visual = uint8_visual

        if brain.number_visual_observations > 1 and not self._allow_multiple_visual_obs:
            logger.warning(
                "The environment contains more than one visual observation. "
                "You must define allow_multiple_visual_obs=True to received them all. "
                "Otherwise, please note that only the first will be provided in the observation."
            )

        if brain.num_stacked_vector_observations != 1:
            raise UnityGymException(
                "There can only be one stacked vector observation in a UnityEnvironment "
                "if it is wrapped in a gym.")

        # Check for number of agents in scene.
        initial_info = self._env.reset()[self.brain_name]
        self._check_agents(len(initial_info.agents))

        # Set observation and action spaces
        if brain.vector_action_space_type == "discrete":
            if len(brain.vector_action_space_size) == 1:
                self._action_space = spaces.Discrete(
                    brain.vector_action_space_size[0])
            else:
                if flatten_branched:
                    self._flattener = ActionFlattener(
                        brain.vector_action_space_size)
                    self._action_space = self._flattener.action_space
                else:
                    self._action_space = spaces.MultiDiscrete(
                        brain.vector_action_space_size)

        else:
            if flatten_branched:
                logger.warning(
                    "The environment has a non-discrete action space. It will "
                    "not be flattened.")
            high = np.array([1] * brain.vector_action_space_size[0])
            self._action_space = spaces.Box(-high, high, dtype=np.float32)
        high = np.array([np.inf] * brain.vector_observation_space_size)
        self.action_meanings = brain.vector_action_descriptions
        if self.use_visual:
            shape = (
                brain.camera_resolutions[0].height,
                brain.camera_resolutions[0].width,
                brain.camera_resolutions[0].num_channels,
            )
            if uint8_visual:
                self._observation_space = spaces.Box(0,
                                                     255,
                                                     dtype=np.uint8,
                                                     shape=shape)
            else:
                self._observation_space = spaces.Box(0,
                                                     1,
                                                     dtype=np.float32,
                                                     shape=shape)

        else:
            self._observation_space = spaces.Box(-high, high, dtype=np.float32)

    def reset(self):
        """Resets the state of the environment and returns an initial observation.
        In the case of multi-agent environments, this is a list.
        Returns: observation (object/list): the initial observation of the
            space.
        """
        info = self._env.reset()[self.brain_name]
        n_agents = len(info.agents)
        self._check_agents(n_agents)
        self.game_over = False

        if not self._multiagent:
            obs, reward, done, info = self._single_step(info)
        else:
            obs, reward, done, info = self._multi_step(info)
        return obs

    def step(self, action):
        """Run one timestep of the environment's dynamics. When end of
        episode is reached, you are responsible for calling `reset()`
        to reset this environment's state.
        Accepts an action and returns a tuple (observation, reward, done, info).
        In the case of multi-agent environments, these are lists.
        Args:
            action (object/list): an action provided by the environment
        Returns:
            observation (object/list): agent's observation of the current environment
            reward (float/list) : amount of reward returned after previous action
            done (boolean/list): whether the episode has ended.
            info (dict): contains auxiliary diagnostic information, including BrainInfo.
        """

        # Use random actions for all other agents in environment.
        if self._multiagent:
            if not isinstance(action, list):
                raise UnityGymException(
                    "The environment was expecting `action` to be a list.")
            if len(action) != self._n_agents:
                raise UnityGymException(
                    "The environment was expecting a list of {} actions.".
                    format(self._n_agents))
            else:
                if self._flattener is not None:
                    # Action space is discrete and flattened - we expect a list of scalars
                    action = [
                        self._flattener.lookup_action(_act) for _act in action
                    ]
                action = np.array(action)
        else:
            if self._flattener is not None:
                # Translate action into list
                action = self._flattener.lookup_action(action)

        info = self._env.step(action)[self.brain_name]
        n_agents = len(info.agents)
        self._check_agents(n_agents)
        self._current_state = info

        if not self._multiagent:
            obs, reward, done, info = self._single_step(info)
            self.game_over = done
        else:
            obs, reward, done, info = self._multi_step(info)
            self.game_over = all(done)
        return obs, reward, done, info

    def _single_step(self, info):
        if self.use_visual:
            visual_obs = info.visual_observations

            if self._allow_multiple_visual_obs:
                visual_obs_list = []
                for obs in visual_obs:
                    visual_obs_list.append(self._preprocess_single(obs[0]))
                self.visual_obs = visual_obs_list
            else:
                self.visual_obs = self._preprocess_single(visual_obs[0][0])

            default_observation = self.visual_obs
        else:
            default_observation = info.vector_observations[0, :]

        return (
            default_observation,
            info.rewards[0],
            info.local_done[0],
            {
                "text_observation": info.text_observations[0],
                "brain_info": info
            },
        )

    def _preprocess_single(self, single_visual_obs):
        if self.uint8_visual:
            return (255.0 * single_visual_obs).astype(np.uint8)
        else:
            return single_visual_obs

    def _multi_step(self, info):
        if self.use_visual:
            self.visual_obs = self._preprocess_multi(info.visual_observations)
            default_observation = self.visual_obs
        else:
            default_observation = info.vector_observations
        return (
            list(default_observation),
            info.rewards,
            info.local_done,
            {
                "text_observation": info.text_observations,
                "brain_info": info
            },
        )

    def _preprocess_multi(self, multiple_visual_obs):
        if self.uint8_visual:
            return [(255.0 * _visual_obs).astype(np.uint8)
                    for _visual_obs in multiple_visual_obs]
        else:
            return multiple_visual_obs

    def render(self, mode="rgb_array"):
        return self.visual_obs

    def close(self):
        """Override _close in your subclass to perform any necessary cleanup.
        Environments will automatically close() themselves when
        garbage collected or when the program exits.
        """
        self._env.close()

    def get_action_meanings(self):
        return self.action_meanings

    def seed(self, seed=None):
        """Sets the seed for this env's random number generator(s).
        Currently not implemented.
        """
        logger.warn("Could not seed environment %s", self.name)
        return

    def _check_agents(self, n_agents):
        if not self._multiagent and n_agents > 1:
            raise UnityGymException(
                "The environment was launched as a single-agent environment, however"
                "there is more than one agent in the scene.")
        elif self._multiagent and n_agents <= 1:
            raise UnityGymException(
                "The environment was launched as a mutli-agent environment, however"
                "there is only one agent in the scene.")
        if self._n_agents is None:
            self._n_agents = n_agents
            logger.info("{} agents within environment.".format(n_agents))
        elif self._n_agents != n_agents:
            raise UnityGymException(
                "The number of agents in the environment has changed since "
                "initialization. This is not supported.")

    @property
    def metadata(self):
        return {"render.modes": ["rgb_array"]}

    @property
    def reward_range(self):
        return -float("inf"), float("inf")

    @property
    def spec(self):
        return None

    @property
    def action_space(self):
        return self._action_space

    @property
    def observation_space(self):
        return self._observation_space

    @property
    def number_agents(self):
        return self._n_agents
Exemplo n.º 29
0
class MLAgentsEnv(VectorEnv):
    """
    An Environment sitting behind a tcp connection and communicating through this adapter.
    Note: Communication between Unity and Python takes place over an open socket without authentication.
    Ensure that the network where training takes place is secure.
    """
    def __init__(self, file_name=None, worker_id=0, base_port=5005, seed=0, docker_training=False, no_graphics=False,
                 timeout_wait=30, train_mode=True, **kwargs):
        """
        Args:
            file_name (Optional[str]): Name of Unity environment binary.
            base_port (int): Port number to connect to Unity environment. `worker_id` increments on top of this.
            worker_id (int): Number to add to `base_port`. Used for asynchronous agent scenarios.
            docker_training (bool): Informs this class, whether the process is being run within a container.
                Default: False.
            no_graphics (bool): Whether to run the Unity simulator in no-graphics mode. Default: False.
            timeout_wait (int): Time (in seconds) to wait for connection from environment.
            train_mode (bool): Whether to run in training mode, speeding up the simulation. Default: True.
        """
        # First create the UnityMLAgentsEnvironment to get state and action spaces, then create RLgraph Environment
        # instance.
        self.mlagents_env = UnityEnvironment(
            file_name, worker_id, base_port, seed, docker_training, no_graphics
        )
        all_brain_info = self.mlagents_env.reset()
        # Get all possible information from AllBrainInfo.
        # TODO: Which scene do we pick?
        self.scene_key = next(iter(all_brain_info))
        first_brain_info = all_brain_info[self.scene_key]
        num_environments = len(first_brain_info.agents)

        state_space = {}
        if len(first_brain_info.vector_observations[0]) > 0:
            state_space["vector"] = get_space_from_op(first_brain_info.vector_observations[0])
            # TODO: This is a hack.
            if state_space["vector"].dtype == np.float64:
                state_space["vector"].dtype = np.float32
        if len(first_brain_info.visual_observations) > 0:
            state_space["visual"] = get_space_from_op(first_brain_info.visual_observations[0])
        if first_brain_info.text_observations[0]:
            state_space["text"] = get_space_from_op(first_brain_info.text_observations[0])

        if len(state_space) == 1:
            self.state_key = next(iter(state_space))
            state_space = state_space[self.state_key]
        else:
            self.state_key = None
            state_space = Dict(state_space)
        brain_params = next(iter(self.mlagents_env.brains.values()))
        if brain_params.vector_action_space_type == "discrete":
            highs = brain_params.vector_action_space_size
            # MultiDiscrete (Tuple(IntBox)).
            if any(h != highs[0] for h in highs):
                action_space = Tuple([IntBox(h) for h in highs])
            # Normal IntBox:
            else:
                action_space = IntBox(
                    low=np.zeros_like(highs, dtype=np.int32),
                    high=np.array(highs, dtype=np.int32),
                    shape=(len(highs),)
                )
        else:
            action_space = get_space_from_op(first_brain_info.action_masks[0])
        if action_space.dtype == np.float64:
            action_space.dtype = np.float32

        super(MLAgentsEnv, self).__init__(
            num_environments=num_environments, state_space=state_space, action_space=action_space, **kwargs
        )

        # Caches the last observation we made (after stepping or resetting).
        self.last_state = None

    def get_env(self):
        return self

    def reset(self, index=0):
        # Reset entire MLAgentsEnv iff global_done is True.
        if self.mlagents_env.global_done is True or self.last_state is None:
            self.reset_all()
        return self.last_state[index]

    def reset_all(self):
        all_brain_info = self.mlagents_env.reset()
        self.last_state = self._get_state_from_brain_info(all_brain_info)
        return self.last_state

    def step(self, actions, text_actions=None, **kwargs):
        # MLAgents Envs don't like tuple-actions.
        if isinstance(actions[0], tuple):
            actions = [list(a) for a in actions]
        all_brain_info = self.mlagents_env.step(
            # TODO: Only support vector actions for now.
            vector_action=actions, memory=None, text_action=text_actions, value=None
        )
        self.last_state = self._get_state_from_brain_info(all_brain_info)
        r = self._get_reward_from_brain_info(all_brain_info)
        t = self._get_terminal_from_brain_info(all_brain_info)
        return self.last_state, r, t, None

    def render(self):
        # TODO: If no_graphics is True, maybe user can render through this method manually?
        pass

    def terminate(self):
        self.mlagents_env.close()

    def terminate_all(self):
        return self.terminate()

    def __str__(self):
        return "MLAgentsEnv(port={}{})".format(
            self.mlagents_env.port, " [loaded]" if self.mlagents_env._loaded else ""
        )

    def _get_state_from_brain_info(self, all_brain_info):
        brain_info = all_brain_info[self.scene_key]
        if self.state_key is None:
            return {"vector": list(brain_info.vector_observations), "visual": list(brain_info.visual_observations),
                    "text": list(brain_info.text_observations)}
        elif self.state_key == "vector":
            return list(brain_info.vector_observations)
        elif self.state_key == "visual":
            return list(brain_info.visual_observations)
        elif self.state_key == "text":
            return list(brain_info.text_observations)

    def _get_reward_from_brain_info(self, all_brain_info):
        brain_info = all_brain_info[self.scene_key]
        return [np.array(r_, dtype=np.float32) for r_ in brain_info.rewards]

    def _get_terminal_from_brain_info(self, all_brain_info):
        brain_info = all_brain_info[self.scene_key]
        return brain_info.local_done
Exemplo n.º 30
0
    def __init__(
        self,
        environment_filename: str,
        worker_id: int = 0,
        use_visual: bool = False,
        uint8_visual: bool = False,
        multiagent: bool = False,
        flatten_branched: bool = False,
        no_graphics: bool = False,
        allow_multiple_visual_obs: bool = False,
    ):
        """
        Environment initialization
        :param environment_filename: The UnityEnvironment path or file to be wrapped in the gym.
        :param worker_id: Worker number for environment.
        :param use_visual: Whether to use visual observation or vector observation.
        :param uint8_visual: Return visual observations as uint8 (0-255) matrices instead of float (0.0-1.0).
        :param multiagent: Whether to run in multi-agent mode (lists of obs, reward, done).
        :param flatten_branched: If True, turn branched discrete action spaces into a Discrete space rather than
            MultiDiscrete.
        :param no_graphics: Whether to run the Unity simulator in no-graphics mode
        :param allow_multiple_visual_obs: If True, return a list of visual observations instead of only one.
        """
        self._env = UnityEnvironment(environment_filename,
                                     worker_id,
                                     no_graphics=no_graphics)

        # Take a single step so that the brain information will be sent over
        if not self._env.brains:
            self._env.step()

        self.name = self._env.academy_name
        self.visual_obs = None
        self._current_state = None
        self._n_agents = None
        self._multiagent = multiagent
        self._flattener = None
        self.game_over = (
            False
        )  # Hidden flag used by Atari environments to determine if the game is over
        self._allow_multiple_visual_obs = allow_multiple_visual_obs

        # Check brain configuration
        if len(self._env.brains) != 1:
            raise UnityGymException(
                "There can only be one brain in a UnityEnvironment "
                "if it is wrapped in a gym.")
        if len(self._env.external_brain_names) <= 0:
            raise UnityGymException(
                "There are not any external brain in the UnityEnvironment")

        self.brain_name = self._env.external_brain_names[0]
        brain = self._env.brains[self.brain_name]

        if use_visual and brain.number_visual_observations == 0:
            raise UnityGymException(
                "`use_visual` was set to True, however there are no"
                " visual observations as part of this environment.")
        self.use_visual = brain.number_visual_observations >= 1 and use_visual

        if not use_visual and uint8_visual:
            logger.warning(
                "`uint8_visual was set to true, but visual observations are not in use. "
                "This setting will not have any effect.")
        else:
            self.uint8_visual = uint8_visual

        if brain.number_visual_observations > 1 and not self._allow_multiple_visual_obs:
            logger.warning(
                "The environment contains more than one visual observation. "
                "You must define allow_multiple_visual_obs=True to received them all. "
                "Otherwise, please note that only the first will be provided in the observation."
            )

        if brain.num_stacked_vector_observations != 1:
            raise UnityGymException(
                "There can only be one stacked vector observation in a UnityEnvironment "
                "if it is wrapped in a gym.")

        # Check for number of agents in scene.
        initial_info = self._env.reset()[self.brain_name]
        self._check_agents(len(initial_info.agents))

        # Set observation and action spaces
        if brain.vector_action_space_type == "discrete":
            if len(brain.vector_action_space_size) == 1:
                self._action_space = spaces.Discrete(
                    brain.vector_action_space_size[0])
            else:
                if flatten_branched:
                    self._flattener = ActionFlattener(
                        brain.vector_action_space_size)
                    self._action_space = self._flattener.action_space
                else:
                    self._action_space = spaces.MultiDiscrete(
                        brain.vector_action_space_size)

        else:
            if flatten_branched:
                logger.warning(
                    "The environment has a non-discrete action space. It will "
                    "not be flattened.")
            high = np.array([1] * brain.vector_action_space_size[0])
            self._action_space = spaces.Box(-high, high, dtype=np.float32)
        high = np.array([np.inf] * brain.vector_observation_space_size)
        self.action_meanings = brain.vector_action_descriptions
        if self.use_visual:
            shape = (
                brain.camera_resolutions[0].height,
                brain.camera_resolutions[0].width,
                brain.camera_resolutions[0].num_channels,
            )
            if uint8_visual:
                self._observation_space = spaces.Box(0,
                                                     255,
                                                     dtype=np.uint8,
                                                     shape=shape)
            else:
                self._observation_space = spaces.Box(0,
                                                     1,
                                                     dtype=np.float32,
                                                     shape=shape)

        else:
            self._observation_space = spaces.Box(-high, high, dtype=np.float32)