Exemplo n.º 1
0
def test_initialization():
    with mock.patch('subprocess.Popen'):
        with mock.patch('socket.socket') as mock_socket:
            with mock.patch('glob.glob') as mock_glob:
                mock_glob.return_value = ['FakeLaunchPath']
                mock_socket.return_value.accept.return_value = (mock_socket, 0)
                mock_socket.recv.return_value.decode.return_value = dummy_start
                env = UnityEnvironment(' ')
                with pytest.raises(UnityActionException):
                    env.step([0])
                assert env.brain_names[0] == 'RealFakeBrain'
                env.close()
class BananaEnvironment:
    def __init__(self, file_name=None, **kwargs):
        self.__env = UnityEnvironment(file_name=file_name,
                                      seed=1234)  # create environment
        self.__brain_name = self.__env.brain_names[0]
        self.__env.reset()
        self.__state_dim = self.__env.brains[
            self.__brain_name].vector_observation_space_size
        self.__action_dim = self.__env.brains[
            self.__brain_name].vector_action_space_size

    def step(self, action):
        env_info = self.__env.step(action)[self.__brain_name]  # step
        next_state = env_info.vector_observations[0][
            np.newaxis]  # get the next state
        reward = env_info.rewards[0]  # get the reward
        done = env_info.local_done[0]  # see if episode has finished
        return next_state, reward, done

    def reset(self, train_mode=True):
        env_info = self.__env.reset(train_mode=train_mode)[self.__brain_name]
        state = env_info.vector_observations[0][np.newaxis]
        return state

    def get_state_dim(self):
        return self.__state_dim

    def get_action_dim(self):
        return self.__action_dim

    def close(self):
        self.__env.close()
def test(unity_environment: str, checkpoint: str, seed: int = 42):

    env = UnityEnvironment(file_name=unity_environment, worker_id=42)

    print("start testing")
    # get the default brain
    brain_name = env.brain_names[0]
    brain = env.brains[brain_name]

    env_info = env.reset(train_mode=False)[brain_name]

    agent = Agent(state_size=len(env_info.vector_observations[0]),
                  action_size=brain.vector_action_space_size,
                  seed=42)

    agent.qnetwork_local.load_state_dict(torch.load(checkpoint))

    score = 0
    env_info = env.reset(train_mode=False)[brain_name]
    state = env_info.vector_observations[0]

    for j in range(200):
        action = agent.act(state)
        env_info = env.step(action)[brain_name]
        state = env_info.vector_observations[0]
        score += env_info.rewards[0]
        done = env_info.local_done[0]

        if done:
            print(f"Score: {score}")
            env.close()
            return score
Exemplo n.º 4
0
class UnityEnv():
    def __init__(self,
                 env_file='data/Tennis_Windows_x86_64/Tennis.exe',
                 no_graphics=True):
        self.env = UnityEnvironment(file_name=env_file,
                                    no_graphics=no_graphics)
        self.brain_name = self.env.brain_names[0]
        brain = self.env.brains[self.brain_name]
        self.action_size = brain.vector_action_space_size
        if type(self.action_size) != int:
            self.action_size = self.action_size[0]
        env_info = self.env.reset(train_mode=True)[self.brain_name]
        self.state_size = env_info.vector_observations.shape[1]
        self.num_agents = len(env_info.agents)

    def reset(self, train=True):
        env_info = self.env.reset(train_mode=train)[self.brain_name]
        # combine both agent state
        return env_info.vector_observations

    def close(self):
        self.env.close()

    def step(self, actions):
        actions = np.clip(actions, -1, 1)
        env_info = self.env.step(actions)[self.brain_name]
        # combine both agent state
        next_states = env_info.vector_observations
        rewards = env_info.rewards
        dones = np.array(env_info.local_done).astype(np.float)
        return next_states, np.array(rewards), dones

    @property
    def action_shape(self):
        return (self.num_agents, self.action_size)
Exemplo n.º 5
0
    def test(self, params):
        model_path = "navigation_{}.pth"

        env = UnityEnvironment(file_name="Reacher.x86_64")
        brain_name = env.brain_names[0]
        brain = env.brains[brain_name]

        env_info = env.reset(train_mode=False)[brain_name]

        num_states = len(env_info.vector_observations[0])
        num_actions = brain.vector_action_space_size

        agent = Agent(params, num_states, num_actions, None)
        agent.load_model(model_path)

        state = env_info.vector_observations[0]
        score = 0
        while True:
            action = agent.get_action(state)
            env_info = env.step(action)[brain_name]
            next_state = env_info.vector_observations[0]
            reward = env_info.rewards[0]
            done = env_info.local_done[0]
            score += reward
            state = next_state
            if done:
                break

        print("test score: {}".format(score))

        env.close()
Exemplo n.º 6
0
class TennisEnv:
    def __init__(self):
        #self.env = UnityEnvironment(file_name="Tennis_Windows_x86_64/Tennis.exe")
        self.env = UnityEnvironment(file_name="Tennis_Linux/Tennis.x86_64")

        # get the default brain
        self.brain_name = self.env.brain_names[0]
        self.brain = self.env.brains[self.brain_name]
        states, env_info = self.reset(True)
        # number of agents
        self.num_agents = len(env_info.agents)
        print('Number of agents:', self.num_agents)
        # size of each action
        self.action_size = self.brain.vector_action_space_size
        print('Size of each action:', self.action_size)
        # examine the state space
        self.state_size = states.shape[-1]
        print('There are {} agents. Each observes a state with length: {}'.format(2, self.state_size))
        print('The state for the first agent looks like:', states[0, :])
        print('The state for the second agent looks like:', states[1, :])

    def reset(self, train_mode=True):
        env_info = self.env.reset(train_mode=train_mode)[self.brain_name]
        states = env_info.vector_observations
        return states, env_info

    def step(self, actions):
        env_info = self.env.step(actions)[self.brain_name]  # send all actions to the environment
        next_states = env_info.vector_observations
        rewards = env_info.rewards  # get reward (for each agent)
        dones = env_info.local_done
        return next_states, rewards, dones, env_info

    def close(self):
        self.env.close()
class UnityEnvWrapper:
    """ This class provides gym-like wrapper around the unity environment """
    def __init__(self, env_file: str = 'Banana_Linux_NoVis/Banana.x86_64'):
        self._env = UnityEnvironment(file_name=env_file)
        self._brain_name = self._env.brain_names[0]
        self._brain = self._env.brains[self._brain_name]

        env_info = self._env.reset(train_mode=True)[self._brain_name]
        state = env_info.vector_observations[0]

        self.state_space_dim = len(state)
        self.action_space_size = self._brain.vector_action_space_size

    def reset(self, train_mode: bool = False):
        env_info = self._env.reset(train_mode)[self._brain_name]
        state = env_info.vector_observations[0]
        return state

    def step(self, action):
        env_info = self._env.step(action)[
            self._brain_name]  # send the action to the environment
        next_state = env_info.vector_observations[0]  # get the next state
        reward = env_info.rewards[0]  # get the reward
        done = env_info.local_done[0]  # see if episode has finished
        return next_state, reward, done, None

    def close(self):
        self._env.close()
class ProjectEnv():
    def __init__(self, env_file_name):
        self.env = UnityEnvironment(file_name=env_file_name)
        self.brain_name = self.env.brain_names[0]

        env_info = self.env.reset(train_mode=True)[self.brain_name]

        self.action_size = self.env.brains[
            self.brain_name].vector_action_space_size
        self.state_size = len(env_info.vector_observations[0])

    def reset(self, train_mode=True):
        env_info = self.env.reset(train_mode)[self.brain_name]
        next_state = env_info.vector_observations[0]

        return next_state

    def step(self, action):
        env_info = self.env.step(action)[self.brain_name]
        next_state = env_info.vector_observations[0]
        reward = env_info.rewards[0]
        done = env_info.local_done[0]

        return next_state, reward, done, env_info

    def close(self):
        self.env.close()
Exemplo n.º 9
0
class EnvWrapper:
    """A wrapper for the unity environment which implements functionalies similar to openai gym

    Params
    ======
        path(string): relative/absolute path to env executable
    """
    def __init__(self, path, no_graphics=True):
        self.env = UnityEnvironment(file_name=path, no_graphics=no_graphics)
        self.brain_name = self.env.brain_names[0]
        self.brain = self.env.brains[self.brain_name]
        self.env_info = None
        self.reset()

        self.action_space = self.brain.vector_action_space_size
        self.observation_space = self.env_info.vector_observations.shape[1]

    def step(self, actions):
        self.env_info = self.env.step(actions)[self.brain_name]
        next_state = self.env_info.vector_observations
        reward = self.env_info.rewards
        done = self.env_info.local_done
        return next_state, reward, done, None

    def reset(self):
        self.env_info = self.env.reset(train_mode=True)[self.brain_name]
        return self.env_info.vector_observations
Exemplo n.º 10
0
class BananaMazeEnv(EnvInterface):
    def __init__(self,
                 env_binary='../bin/unity_banana_maze/Banana.x86_64',
                 train_mode=True):
        self.env = UnityEnvironment(file_name=env_binary)

        self.brain_name = self.env.brain_names[0]
        self.brain = self.env.brains[self.brain_name]
        self.train_mode = train_mode
        self.info = self.env.reset(train_mode=self.train_mode)[self.brain_name]

    def reset(self):
        env_info = self.env.reset(train_mode=self.train_mode)[self.brain_name]
        state = env_info.vector_observations[0]
        return state

    def step(self, action):
        env_info = self.env.step(action)[self.brain_name]
        next_state = env_info.vector_observations[0]
        reward = env_info.rewards[0]
        done = env_info.local_done[0]
        return (next_state, reward, int(done))

    def close(self):
        self.env.close()

    @property
    def state_size(self):
        state = self.info.vector_observations[0]
        return len(state)

    @property
    def action_size(self):
        action_size = self.brain.vector_action_space_size
        return action_size
class UnityMLVectorMultiAgent():
    """Multi-agent UnityML environment with vector observations."""
    def __init__(self, evaluation_only=False, seed=0):
        """Load platform specific file and initialize the environment."""
        os = platform.system()
        if os == 'Darwin':
            file_name = 'Tennis.app'
        elif os == 'Linux':
            file_name = 'Tennis_Linux/Tennis.x86_64'
        self.env = UnityEnvironment(file_name='unity_envs/' + file_name,
                                    seed=seed)
        self.brain_name = self.env.brain_names[0]
        self.evaluation_only = evaluation_only

    def reset(self):
        """Reset the environment."""
        info = self.env.reset(
            train_mode=not self.evaluation_only)[self.brain_name]
        state = info.vector_observations
        return state

    def step(self, action):
        """Take a step in the environment."""
        info = self.env.step(action)[self.brain_name]
        state = info.vector_observations
        reward = info.rewards
        done = info.local_done
        return state, reward, done
Exemplo n.º 12
0
class UnityMultiAgentEnvWrapper:
    """ This class provides gym-like wrapper around the unity environment with multiple agents """

    def __init__(self, env_file: str, train_mode: bool = False):
        self._env = UnityEnvironment(file_name=env_file)
        self._train_mode = train_mode
        self._brain_name = self._env.brain_names[0]
        self._brain = self._env.brains[self._brain_name]

        env_info = self._env.reset(train_mode=True)[self._brain_name]

        self.num_agents = len(env_info.agents)
        self.state_space_dim = env_info.vector_observations.shape[1]
        self.action_space_dim = self._brain.vector_action_space_size

    def reset(self):
        env_info = self._env.reset(self._train_mode)[self._brain_name]
        state = env_info.vector_observations
        return state

    def step(self, action):
        env_info = self._env.step(action)[self._brain_name]  # send the action to the environment
        next_states = env_info.vector_observations
        rewards = env_info.rewards
        dones = env_info.local_done
        return next_states, rewards, dones, None

    def close(self):
        self._env.close()
Exemplo n.º 13
0
class Reacher:
    def __init__(self):
        self.env = UnityEnvironment(file_name="files/Reacher.app")
        log.info("Reacher environment set up")

    def reset(self, train_mode=True) -> np.ndarray:
        """Reset the environment

        :param train_mode boolean to indicate whether to start environment in train mode
        :return the initial state
        """
        env_info = self.env.reset(train_mode=train_mode)["ReacherBrain"]
        log.info("Reacher environment reset with train_mode=%s", train_mode)
        return env_info.vector_observations

    def step(self, action: np.ndarray) -> StepResult:
        """Take the action in the environment and collect the relevant information to return from the environment"""
        log.debug("taking step")
        assert action.shape == (20, 4)
        action_clipped = np.clip(action, -1, 1)
        env_info = self.env.step(action_clipped)["ReacherBrain"]
        # check the assumption that the end of an interaction is determined by a number of steps, hence all are done
        # at the same time
        if np.any(env_info.local_done):
            assert np.all(env_info.local_done)
        return StepResult(done=env_info.local_done,
                          rewards=env_info.rewards,
                          next_state=env_info.vector_observations)
Exemplo n.º 14
0
class BananaEnv(gym.Env):
    def __init__(self, filename):
        super(BananaEnv, self).__init__()
        self.unity_env = UnityEnvironment(file_name=filename)
        self.brain_name = self.unity_env.brain_names[0]
        self.brain = self.unity_env.brains[self.brain_name]
        action_size = int(self.brain.vector_action_space_size)
        self.action_space = gym.spaces.Discrete(action_size)
        self.observation_space = gym.spaces.Box(low=-1, high=1, shape=(37, ))

    def reset(self, train_mode=True):
        env_info = self.unity_env.reset(train_mode=True)[self.brain_name]
        state = env_info.vector_observations[0]
        return state

    def step(self, action):
        env_info = self.unity_env.step(action)[self.brain_name]
        next_state = env_info.vector_observations[0]
        reward = env_info.rewards[0]
        done = env_info.local_done[0]
        return next_state, reward, done, env_info

    def close(self):
        return self.unity_env.close()

    def render(self, mode='human'):
        pass
Exemplo n.º 15
0
class CollabCompeteEnv:
    def __init__(self, mode='train'):
        if mode != 'train':
            print('[Mode] Setting to Test Mode')
            self.train = False
        else:
            print('[Mode] Setting to Train Mode')
            self.train = True
        self.base_env = UnityEnvironment(file_name='Tennis.app')
        self.brain_name = self.base_env.brain_names[0]
        self.brain = self.base_env.brains[self.brain_name]
        self.action_size = self.brain.vector_action_space_size

    def reset(self):
        self.env_info = self.base_env.reset(
            train_mode=self.train)[self.brain_name]
        return self.get_state()

    def get_state(self):
        return self.env_info.vector_observations

    def step(self, action):
        self.env_info = self.base_env.step(action)[
            self.brain_name]  # send the action to the environment
        next_states = self.get_state()
        rewards = self.env_info.rewards
        dones = self.env_info.local_done
        return next_states, rewards, dones, None

    def close(self):
        self.base_env.close()
Exemplo n.º 16
0
class Environment():
    """Learning Environment."""

    def __init__(self, file_name="environments/Tennis.app", no_graphics=True):
        """Initialize parameters and build model.
        Params
        ======
            file_name (string): unity environment file
            no_graphics (boolean): Start environment with graphics
        """
        self.env = UnityEnvironment(file_name=file_name, no_graphics=no_graphics)
        self.brain_name = self.env.brain_names[0]
        self.brain = self.env.brains[self.brain_name]
        self.reset()
        self.action_space_size = self.brain.vector_action_space_size
        self.state_size = len(self.info.vector_observations[0])
        self.num_agents = len(self.info.agents)

    def __enter__(self):
        return self

    def __exit__(self, type, value, tb):
        self.env.close()

    def reset(self, train_mode=False):
        self.info = self.env.reset(train_mode=train_mode)[self.brain_name]
        return self.info

    def step(self, action):
        self.info = self.env.step(action)[self.brain_name]
        return self.info
Exemplo n.º 17
0
class TennisEnv:
    def __init__(self, env_path):
        self.env = UnityEnvironment(file_name=env_path)
        self.brain_name = self.env.brain_names[0]
        brain = self.env.brains[self.brain_name]
        self.action_size = brain.vector_action_space_size
        self.state_size = brain.vector_observation_space_size

    def reset(self, train_mode):
        env_info = self.env.reset(train_mode)[self.brain_name]
        return env_info.vector_observations

    def step(self, action):
        env_info = self.env.step(action)[self.brain_name]
        return env_info.vector_observations, env_info.rewards, env_info.local_done

    def close(self):
        self.env.close()

    def run_episode(self, agent1, agent2, fast=False):
        states = self.reset(train_mode=fast)
        score1 = 0
        score2 = 0
        done = False
        while not done:
            action1 = agent1.compute_action(states[0], epsilon=0)
            action2 = agent2.compute_action(states[1], epsilon=0)
            states, rewards, dones = self.step([action1, action2])
            score1 += rewards[0]
            score2 += rewards[1]
            if (rewards[0] != 0 or rewards[1] != 0) and not fast:
                print(f"Scores: {score1:.2f}, {score2:.2f}")
        return max(score1, score2)
Exemplo n.º 18
0
def run_agent(num_episodes=1):
    env = UnityEnvironment(file_name="env/Reacher20.app")

    brain_name = env.brain_names[0]
    brain = env.brains[brain_name]

    env_info = env.reset(train_mode=True)[brain_name]

    action_size = brain.vector_action_space_size
    state_size = env_info.vector_observations.shape[1]
    num_agents = len(env_info.agents)

    agent = Agent(state_size=state_size, action_size=action_size, random_seed=2)
    agent.actor_local.load_state_dict(torch.load("model/checkpoint_actor.pth", map_location='cpu'))
    agent.critic_local.load_state_dict(torch.load("model/checkpoint_critic.pth", map_location='cpu'))

    for i in range(num_episodes):
        scores = np.zeros(num_agents)
        env_info = env.reset(train_mode=False)[brain_name]
        states = env_info.vector_observations
        while True:
            actions = agent.act(states)
            env_info = env.step(actions)[brain_name]
            next_states = env_info.vector_observations
            scores += env_info.rewards
            states = next_states
            if np.any(env_info.local_done):
                break
        print(f"{i + 1} episode, averaged score: {np.mean(scores)}")
Exemplo n.º 19
0
def run(env_file, model_file, num_episodes=5):
    env = UnityEnvironment(file_name=env_file)
    brain_name = env.brain_names[0]
    brain = env.brains[brain_name]
    action_size = brain.vector_action_space_size
    env_info = env.reset(train_mode=False)[brain_name]
    state = env_info.vector_observations[0]
    state_shape = state.shape
    agent = Agent(state_shape=state_shape, action_size=action_size, seed=0)

    agent.qnetwork_local.load_state_dict(torch.load(model_file))

    for i in range(num_episodes):
        env_info = env.reset(train_mode=False)[brain_name]
        state = env_info.vector_observations[0]
        score = 0
        while True:
            action = agent.act(state)
            env_info = env.step(action)[brain_name]
            next_state = env_info.vector_observations[0]
            reward = env_info.rewards[0]
            done = env_info.local_done[0]
            score += reward
            state = next_state
            if done:
                break

        print("Score: {}".format(score))
    env.close()
Exemplo n.º 20
0
class UnityEnvironmentWrapper(EnvInterface):
    def __init__(self, env_binary='../bin/tennis/Tennis.x86_64', train_mode=True):
        self.env = UnityEnvironment(file_name=env_binary)

        self.brain_name = self.env.brain_names[0]
        self.brain = self.env.brains[self.brain_name]
        self.train_mode = train_mode
        self.info = self.env.reset(train_mode=self.train_mode)[self.brain_name]
        self.num_agents = len(self.info.agents)

    def reset(self):
        env_info = self.env.reset(train_mode=self.train_mode)[self.brain_name]
        states = env_info.vector_observations
        return states

    def step(self, actions):
        env_info = self.env.step(actions)[self.brain_name]
        next_states = env_info.vector_observations
        rewards = env_info.rewards
        dones = env_info.local_done
        return (next_states, rewards, np.array(dones) * 1)

    def close(self):
        self.env.close()

    @property
    def state_size(self):
        state = self.info.vector_observations[0]
        return len(state)

    @property
    def action_size(self):
        action_size = self.brain.vector_action_space_size
        return action_size
Exemplo n.º 21
0
class EnvUnityMLAgents:
    def __init__(self, file_name, train_mode=True):
        self.env = UnityEnvironment(file_name=file_name)
        self.brain_name = self.env.brain_names[0]
        self.train_mode = train_mode

        env_info = self.env.reset(train_mode=self.train_mode)[self.brain_name]
        self.num_agents = len(env_info.agents)
        brain = self.env.brains[self.brain_name]
        self.action_size = brain.vector_action_space_size
        states = env_info.vector_observations
        self.state_size = states.shape[1]

        print('Number of agents:', self.num_agents)
        print('Size of each action:', self.action_size)
        print('There are {} agents. Each observes a state with length: {}'.
              format(states.shape[0], self.state_size))
        print('The state for the first agent looks like:', states[0])

    def reset(self):
        env_info = self.env.reset(train_mode=self.train_mode)[self.brain_name]
        rewards = env_info.rewards
        next_states = env_info.vector_observations
        dones = env_info.local_done
        return rewards, next_states, dones

    def step(self, actions):
        env_info = self.env.step(actions)[self.brain_name]
        next_states = env_info.vector_observations
        rewards = env_info.rewards
        dones = env_info.local_done
        return rewards, next_states, dones

    def close(self):
        self.env.close()
Exemplo n.º 22
0
class VisualEnvironment():
    def __init__(self, env_file, state_stack=4, train=True):
        self.state_stack = state_stack
        self.env_file = env_file

        self.env = UnityEnvironment(file_name=env_file)
        self.brain_name = self.env.brain_names[0]
        brain = self.env.brains[self.brain_name]
        self.action_size = brain.vector_action_space_size

    def step(self, action):
        env_info = self.env.step(action)[self.brain_name]
        self.states_history.append(env_info.visual_observations[0].transpose(
            [0, 3, 1, 2]))
        next_state = np.array(self.states_history).transpose([1, 2, 0, 3, 4])
        reward = env_info.rewards[0]
        done = env_info.local_done[0]
        return (next_state, reward, done)

    def sample(self):
        return np.random.randint(0, self.action_size)

    # batch_size, channels (RGB), depth (state_stack), height, width
    def reset(self, train=True):
        self.states_history = deque(maxlen=self.state_stack)
        for _ in range(self.state_stack):
            self.states_history.append(np.zeros((1, 3, 84, 84)))
        # Reset environment
        env_info = self.env.reset(train_mode=train)[self.brain_name]
        self.states_history.append(env_info.visual_observations[0].transpose(
            [0, 3, 1, 2]))
        return np.array(self.states_history).transpose([1, 2, 0, 3, 4])

    def close(self):
        self.env.close()
Exemplo n.º 23
0
class UnityEnvironmentTask:
    def __init__(self, file_name, train_mode=True):
        self.train_mode = train_mode
        self.env = UnityEnvironment(file_name=file_name)
        self.brain_name = self.env.brain_names[0]
        self.brain = self.env.brains[self.brain_name]
        self.env_info = self.env.reset(
            train_mode=self.train_mode)[self.brain_name]
        self.num_agents = len(self.env_info.agents)
        self.action_dim = self.brain.vector_action_space_size
        print('Brain name:', self.brain_name)
        print('Number of agents:', self.num_agents)
        print('Size of each action:', self.action_dim)

    def reset(self):
        env_info = self.env.reset(train_mode=self.train_mode)[self.brain_name]
        return env_info.vector_observations

    def step(self, actions):
        env_info = self.env.step(np.clip(actions, -1, 1))[self.brain_name]
        next_states = env_info.vector_observations
        rewards = env_info.rewards
        terminals = env_info.local_done
        if np.any(terminals):
            next_states = self.reset()
        return next_states, np.asarray(rewards, dtype=np.float32), np.asarray(
            terminals, dtype=np.float32), env_info

    def close(self):
        self.env.close()
Exemplo n.º 24
0
class Environment():
    
    def __init__(self,path, seed = 0):
        self.env = UnityEnvironment(file_name=path, seed=seed);
        self.brain_name = self.env.brain_names[0]
        self.brain = self.env.brains[self.brain_name]
        # self.state_size = self.brain.vector_observation_space_size # bug, returns 8 :.(
        self.action_size = self.brain.vector_action_space_size
        info = self.env.reset(train_mode=True)[self.brain_name]
        self.state_size = len(info.vector_observations[0])
        self.num_agents = len(info.agents)
        
    def close(self):
        self.env.close()
    
    def reset(self,train=True):
        info = self.env.reset(train_mode=train)[self.brain_name]
        return info.vector_observations
    
    def step(self,action):
        info = self.env.step(action)[self.brain_name]    
        state = info.vector_observations          
        reward = info.rewards 
        done = info.local_done   
        return state, reward, done, info
Exemplo n.º 25
0
def main():
    env_name = file_name = "Environments/Banana_Linux/Banana.x86_64"
    train_mode = True  # Whether to run the environment in training or inference mode
    env = UnityEnvironment(file_name=env_name, no_graphics=False)
    # env = UnityEnvironment(file_name="/data/Banana_Linux_NoVis/Banana.x86_64")
    # Set the default brain to work with
    brain_name = env.brain_names[0]
    brain = env.brains[brain_name]
    env_info = env.reset(train_mode=True)[brain_name]
    # Action and Observation spaces
    nA = brain.vector_action_space_size
    nS = env_info.vector_observations.shape[1]
    print('Observation Space {}, Action Space {}'.format(nS, nA))
    seed = 7
    agent = Priority_DQN(nS, nA, seed, UPDATE_EVERY, BATCH_SIZE, BUFFER_SIZE,
                         MIN_BUFFER_SIZE, LR, GAMMA, TAU, CLIP_NORM, ALPHA)
    agent.qnetwork_local.load_state_dict(torch.load('checkpoint.pth'))
    # scores = train(agent,env,brain_name)
    for i in range(1):
        state = env.reset()
        img = plt.imshow(env.render(mode='rgb_array'))
        for j in range(500):
            action = agent.act(state)
            img.set_data(env.render(mode='rgb_array'))
            plt.axis('off')
            display.display(plt.gcf())
            display.clear_output(wait=True)
            state, reward, done, _ = env.step(action)
            # save the image
            plt.savefig('test' + str(j) + '.png', bbox_inches='tight')
            if done:
                break
    # plot the scores
    plot(scores)
Exemplo n.º 26
0
def process(args):
    env = UnityEnvironment(file_name="Banana.app")
    brain_name = env.brain_names[0]
    brain = env.brains[brain_name]
    env_info = env.reset(train_mode=False)[brain_name]  # reset the environment
    state = env_info.vector_observations[0]  # get the current state
    score = 0  # initialize the score
    action_size = brain.vector_action_space_size
    state_size = len(state)

    agent = Agent(state_size, action_size, 1, args.model_path)

    while True:
        action = agent.act(state, 0.0)  # select an action
        env_info = env.step(action)[
            brain_name]  # send the action to the environment
        next_state = env_info.vector_observations[0]  # get the next state
        reward = env_info.rewards[0]  # get the reward
        done = env_info.local_done[0]  # see if episode has finished
        score += reward  # update the score
        state = next_state  # roll over the state to next time step
        if done:  # exit loop if episode finished
            break

    print("Score: {}".format(score))
class BananaWrapper:
    """Banana Unity Environment Wrapper.
    """

    def __init__(self, file_name: Path):
        self.env = UnityEnvironment(file_name)
        # Get the default brain
        self.brain_name = self.env.brain_names[0]
        self.brain = self.env.brains[self.brain_name]

    def reset(self):
        env_info = self.env.reset(train_mode=True)[self.brain_name]
        state = env_info.vector_observations[0]
        return state

    def step(self, action):
        env_info = self.env.step(action)[self.brain_name]
        next_state = env_info.vector_observations[0]
        reward = env_info.rewards[0]
        done = env_info.local_done[0]
        return next_state, reward, done

    def close(self):
        self.env.close()

    @property
    def action_size(self):
        return self.brain.vector_action_space_size

    @property
    def observation_size(self):
        return self.brain.vector_observation_space_size
Exemplo n.º 28
0
class UnityMultiAgent():
    """Multi-agent UnityML environment."""
    def __init__(self,
                 evaluation_only=False,
                 seed=0,
                 file_name='Tennis_Linux_NoVis/Tennis.x86_64'):
        """Load env file (platform specific, see README) and initialize the environment."""
        self.env = UnityEnvironment(file_name=file_name, seed=seed)
        self.brain_name = self.env.brain_names[0]
        self.evaluation_only = evaluation_only

    def reset(self):
        """Reset the environment."""
        info = self.env.reset(
            train_mode=not self.evaluation_only)[self.brain_name]
        state = info.vector_observations
        return state

    def step(self, action):
        """Take a step in the environment."""
        info = self.env.step(action)[self.brain_name]
        state = info.vector_observations
        reward = info.rewards
        done = info.local_done
        return state, reward, done
Exemplo n.º 29
0
class BananaEnvWrapper(object):

    blank_state = torch.zeros(1, 37, dtype=torch.uint8)

    """ Wraps the udacity enviroment into an object behaving like an atari env
    """

    def __init__(self, train_mode=True, device='cuda'):
        self.train_mode = train_mode
        self.device = device
        self.unity_env = UnityEnvironment(
            file_name="/home/philipp/udacity/deep-reinforcement-learning/p1_navigation/Banana_Linux/Banana.x86_64")

        # get the default brain
        self.brain_name = self.unity_env.brain_names[0]
        brain = self.unity_env.brains[self.brain_name]

        # reset the environment
        env_info = self.unity_env.reset(train_mode=self.train_mode)[self.brain_name]

        # number of agents in the environment
        print('Number of agents:', len(env_info.agents))

        # number of actions
        self.action_space = brain.vector_action_space_size
        print('Number of actions:', self.action_space)

        # examine the state space
        state = env_info.vector_observations[0]
        print('States look like:', state)
        self.state_size = len(state)
        print('States have length:', self.state_size)

        self.score = 0
        self.episode = 0

    def eval(self):
        self.train_mode = False

    def train(self):
        self.train_mode = True

    def reset(self):
        # print("Score: %d, epsiode: %d" % (self.score, self.episode))
        self.episode += 1
        self.score = 0
        env_info = self.unity_env.reset(train_mode=self.train_mode)[self.brain_name]
        return env_info.vector_observations[0]  # Return current state

    def step(self, action):
        env_info = self.unity_env.step(action)[self.brain_name]
        state = env_info.vector_observations[0]  # get the current state
        reward = env_info.rewards[0]  # get the reward
        done = env_info.local_done[0]  # see if episode has finished
        self.score += reward  # update the score
        return state, reward, done

    def close(self):
        self.unity_env.close()
Exemplo n.º 30
0
def play():
    env = UnityEnvironment(file_name='./Tennis.app')

    # get the default brain
    brain_name = env.brain_names[0]
    brain = env.brains[brain_name]

    # reset the environment
    env_info = env.reset(train_mode=False)[brain_name]

    # number of agents
    num_agents = len(env_info.agents)
    print('Number of agents:', num_agents)

    # size of each action
    action_size = brain.vector_action_space_size
    print('Size of each action:', action_size)

    # examine the state space
    states = env_info.vector_observations
    state_size = states.shape[1]
    print('There are {} agents. Each observes a state with length: {}'.format(
        states.shape[0], state_size))

    # create agent
    maddpg_agent = MADDPG(state_size=state_size,
                          action_size=action_size,
                          seed=0)

    # load weights
    for i, agent in enumerate(maddpg_agent.maddpg_agent):
        agent.policy_local.load_state_dict(
            torch.load('models/checkpoint_actor_{}.pth'.format(i)))

    # reverse weights so agent 1 is on the left instead
    # for i, agent in enumerate(reversed(maddpg_agent.maddpg_agent)):
    #     agent.policy_local.load_state_dict(torch.load('models/checkpoint_actor_{}.pth'.format(i)))

    env_info = env.reset(train_mode=False)[brain_name]  # reset the environment
    states = env_info.vector_observations  # get the current state (for each agent)
    scores = np.zeros(num_agents)  # initialize the score (for each agent)
    while True:
        actions = maddpg_agent.act(
            states, add_noise=False)  # select an action (for each agent)
        env_info = env.step(actions)[
            brain_name]  # send all actions to tne environment
        next_states = env_info.vector_observations  # get next state (for each agent)
        rewards = env_info.rewards  # get reward (for each agent)
        dones = env_info.local_done  # see if episode finished
        scores += rewards  # update the score (for each agent)
        states = next_states  # roll over states to next time step
        if np.any(dones):  # exit loop if episode finished
            break

    print('Agent 0 score this episode: {}'.format(scores[0]))
    print('Agent 0 score this episode: {}'.format(scores[1]))

    env.close()
def train_agent(env: UnityEnvironment,
                brain_name: str,
                agent: Agent,
                n_episodes: int,
                max_steps: int = 1500) -> []:
    """
    Trans the agent for n episodes
    :param env:
    :param brain_name:
    :param agent:
    :param n_episodes: number of episodes to train
    :param max_steps: max amount of steps
    :return: returns an array containing the score of every episode
    """
    scores: [int] = []
    # store the last 100 scores into a queue to check if the agent reached the goal
    scores_window = deque(maxlen=100)

    for i_episode in range(1, n_episodes + 1):
        # reset the environment
        env_info = env.reset(train_mode=True)[brain_name]
        agent.reset()
        state = env_info.vector_observations[0]
        score = 0

        # the environment will end the episode after n steps, thus no manual termination of the episode is needed
        for a in range(max_steps):
            action: int = agent.act(state, add_noise=False)
            env_info = env.step(action)[brain_name]
            next_state = env_info.vector_observations[0]
            reward = env_info.rewards[0]
            done = env_info.local_done[0]
            score += reward

            agent.step((state, action, reward, next_state, done))

            state = next_state
            if done:
                break

        scores_window.append(score)  # save most recent score
        scores.append(score)  # save most recent score

        # print('\rEpisode {}\tavg Score: {:.2f}'.format(i_episode, np.mean(scores_window)), end="")
        if i_episode % 10 == 0:
            print(
                f"""Episode {i_episode}: Average Score: {np.mean(scores_window):.2f}"""
            )

        if np.mean(scores_window) >= 30.0:
            print(
                '\nEnvironment solved in {:d} episodes!\tAverage Score: {:.2f}'
                .format(i_episode, np.mean(scores_window)))
            torch.save(agent.actor_local.state_dict(), 'checkpoint-actor.pth')
            torch.save(agent.critic_local.state_dict(),
                       'checkpoint-critic.pth')
            break
    return scores
Exemplo n.º 32
0
def test_step():
    with mock.patch('subprocess.Popen'):
        with mock.patch('socket.socket') as mock_socket:
            with mock.patch('glob.glob') as mock_glob:
                mock_glob.return_value = ['FakeLaunchPath']
                mock_socket.return_value.accept.return_value = (mock_socket, 0)
                mock_socket.recv.return_value.decode.return_value = dummy_start
                env = UnityEnvironment(' ')
                brain = env.brains['RealFakeBrain']
                mock_socket.recv.side_effect = dummy_reset
                brain_info = env.reset()
                mock_socket.recv.side_effect = dummy_step
                brain_info = env.step([0] * brain.vector_action_space_size * len(brain_info['RealFakeBrain'].agents))
                with pytest.raises(UnityActionException):
                    env.step([0])
                brain_info = env.step([0] * brain.vector_action_space_size * len(brain_info['RealFakeBrain'].agents))
                with pytest.raises(UnityActionException):
                    env.step([0] * brain.vector_action_space_size * len(brain_info['RealFakeBrain'].agents))
                env.close()
                assert env.global_done
                assert isinstance(brain_info, dict)
                assert isinstance(brain_info['RealFakeBrain'], BrainInfo)
                assert isinstance(brain_info['RealFakeBrain'].visual_observations, list)
                assert isinstance(brain_info['RealFakeBrain'].vector_observations, np.ndarray)
                assert len(brain_info['RealFakeBrain'].visual_observations) == brain.number_visual_observations
                assert brain_info['RealFakeBrain'].vector_observations.shape[0] == \
                       len(brain_info['RealFakeBrain'].agents)
                assert brain_info['RealFakeBrain'].vector_observations.shape[1] == \
                       brain.vector_observation_space_size * brain.num_stacked_vector_observations
                assert not brain_info['RealFakeBrain'].local_done[0]
                assert brain_info['RealFakeBrain'].local_done[2]
class TrainerController(object):
    def __init__(self, env_path, run_id, save_freq, curriculum_file, fast_simulation, load, train,
                 worker_id, keep_checkpoints, lesson, seed, docker_target_name, trainer_config_path,
                 no_graphics):
        """
        :param env_path: Location to the environment executable to be loaded.
        :param run_id: The sub-directory name for model and summary statistics
        :param save_freq: Frequency at which to save model
        :param curriculum_file: Curriculum json file for environment
        :param fast_simulation: Whether to run the game at training speed
        :param load: Whether to load the model or randomly initialize
        :param train: Whether to train model, or only run inference
        :param worker_id: Number to add to communication port (5005). Used for multi-environment
        :param keep_checkpoints: How many model checkpoints to keep
        :param lesson: Start learning from this lesson
        :param seed: Random seed used for training.
        :param docker_target_name: Name of docker volume that will contain all data.
        :param trainer_config_path: Fully qualified path to location of trainer configuration file
        :param no_graphics: Whether to run the Unity simulator in no-graphics mode
        """
        self.trainer_config_path = trainer_config_path
        if env_path is not None:
            env_path = (env_path.strip()
                        .replace('.app', '')
                        .replace('.exe', '')
                        .replace('.x86_64', '')
                        .replace('.x86', ''))  # Strip out executable extensions if passed
        # Recognize and use docker volume if one is passed as an argument
        if docker_target_name == '':
            self.docker_training = False
            self.model_path = './models/{run_id}'.format(run_id=run_id)
            self.curriculum_file = curriculum_file
            self.summaries_dir = './summaries'
        else:
            self.docker_training = True
            self.model_path = '/{docker_target_name}/models/{run_id}'.format(
                docker_target_name=docker_target_name,
                run_id=run_id)
            if env_path is not None:
                env_path = '/{docker_target_name}/{env_name}'.format(docker_target_name=docker_target_name,
                                                                     env_name=env_path)
            if curriculum_file is None:
                self.curriculum_file = None
            else:
                self.curriculum_file = '/{docker_target_name}/{curriculum_file}'.format(
                    docker_target_name=docker_target_name,
                    curriculum_file=curriculum_file)
            self.summaries_dir = '/{docker_target_name}/summaries'.format(docker_target_name=docker_target_name)
        self.logger = logging.getLogger("unityagents")
        self.run_id = run_id
        self.save_freq = save_freq
        self.lesson = lesson
        self.fast_simulation = fast_simulation
        self.load_model = load
        self.train_model = train
        self.worker_id = worker_id
        self.keep_checkpoints = keep_checkpoints
        self.trainers = {}
        if seed == -1:
            seed = np.random.randint(0, 999999)
        self.seed = seed
        np.random.seed(self.seed)
        tf.set_random_seed(self.seed)
        self.env = UnityEnvironment(file_name=env_path, worker_id=self.worker_id,
                                    curriculum=self.curriculum_file, seed=self.seed,
                                    docker_training=self.docker_training,
                                    no_graphics=no_graphics)
        if env_path is None:
            self.env_name = 'editor_'+self.env.academy_name
        else:
            self.env_name = os.path.basename(os.path.normpath(env_path))  # Extract out name of environment

    def _get_progress(self):
        if self.curriculum_file is not None:
            progress = 0
            if self.env.curriculum.measure_type == "progress":
                for brain_name in self.env.external_brain_names:
                    progress += self.trainers[brain_name].get_step / self.trainers[brain_name].get_max_steps
                return progress / len(self.env.external_brain_names)
            elif self.env.curriculum.measure_type == "reward":
                for brain_name in self.env.external_brain_names:
                    progress += self.trainers[brain_name].get_last_reward
                return progress
            else:
                return None
        else:
            return None

    def _process_graph(self):
        nodes = []
        scopes = []
        for brain_name in self.trainers.keys():
            if self.trainers[brain_name].graph_scope is not None:
                scope = self.trainers[brain_name].graph_scope + '/'
                if scope == '/':
                    scope = ''
                scopes += [scope]
                if self.trainers[brain_name].parameters["trainer"] == "imitation":
                    nodes += [scope + x for x in ["action"]]
                else:
                    nodes += [scope + x for x in ["action", "value_estimate", "action_probs"]]
                if self.trainers[brain_name].parameters["use_recurrent"]:
                    nodes += [scope + x for x in ["recurrent_out", "memory_size"]]
        if len(scopes) > 1:
            self.logger.info("List of available scopes :")
            for scope in scopes:
                self.logger.info("\t" + scope)
        self.logger.info("List of nodes to export :")
        for n in nodes:
            self.logger.info("\t" + n)
        return nodes

    def _save_model(self, sess, saver, steps=0):
        """
        Saves current model to checkpoint folder.
        :param sess: Current Tensorflow session.
        :param steps: Current number of steps in training process.
        :param saver: Tensorflow saver for session.
        """
        last_checkpoint = self.model_path + '/model-' + str(steps) + '.cptk'
        saver.save(sess, last_checkpoint)
        tf.train.write_graph(sess.graph_def, self.model_path, 'raw_graph_def.pb', as_text=False)
        self.logger.info("Saved Model")

    def _export_graph(self):
        """
        Exports latest saved model to .bytes format for Unity embedding.
        """
        target_nodes = ','.join(self._process_graph())
        ckpt = tf.train.get_checkpoint_state(self.model_path)
        freeze_graph.freeze_graph(input_graph=self.model_path + '/raw_graph_def.pb',
                                  input_binary=True,
                                  input_checkpoint=ckpt.model_checkpoint_path,
                                  output_node_names=target_nodes,
                                  output_graph=self.model_path + '/' + self.env_name + "_" + self.run_id + '.bytes',
                                  clear_devices=True, initializer_nodes="", input_saver="",
                                  restore_op_name="save/restore_all", filename_tensor_name="save/Const:0")

    def _initialize_trainers(self, trainer_config, sess):
        trainer_parameters_dict = {}
        self.trainers = {}
        for brain_name in self.env.external_brain_names:
            trainer_parameters = trainer_config['default'].copy()
            if len(self.env.external_brain_names) > 1:
                graph_scope = re.sub('[^0-9a-zA-Z]+', '-', brain_name)
                trainer_parameters['graph_scope'] = graph_scope
                trainer_parameters['summary_path'] = '{basedir}/{name}'.format(
                    basedir=self.summaries_dir,
                    name=str(self.run_id) + '_' + graph_scope)
            else:
                trainer_parameters['graph_scope'] = ''
                trainer_parameters['summary_path'] = '{basedir}/{name}'.format(
                    basedir=self.summaries_dir,
                    name=str(self.run_id))
            if brain_name in trainer_config:
                _brain_key = brain_name
                while not isinstance(trainer_config[_brain_key], dict):
                    _brain_key = trainer_config[_brain_key]
                for k in trainer_config[_brain_key]:
                    trainer_parameters[k] = trainer_config[_brain_key][k]
            trainer_parameters_dict[brain_name] = trainer_parameters.copy()
        for brain_name in self.env.external_brain_names:
            if trainer_parameters_dict[brain_name]['trainer'] == "imitation":
                self.trainers[brain_name] = BehavioralCloningTrainer(sess, self.env, brain_name,
                                                                     trainer_parameters_dict[brain_name],
                                                                     self.train_model, self.seed)
            elif trainer_parameters_dict[brain_name]['trainer'] == "ppo":
                self.trainers[brain_name] = PPOTrainer(sess, self.env, brain_name, trainer_parameters_dict[brain_name],
                                                       self.train_model, self.seed)
            else:
                raise UnityEnvironmentException("The trainer config contains an unknown trainer type for brain {}"
                                                .format(brain_name))

    def _load_config(self):
        try:
            with open(self.trainer_config_path) as data_file:
                trainer_config = yaml.load(data_file)
                return trainer_config
        except IOError:
            raise UnityEnvironmentException("""Parameter file could not be found here {}.
                                            Will use default Hyper parameters"""
                                            .format(self.trainer_config_path))
        except UnicodeDecodeError:
            raise UnityEnvironmentException("There was an error decoding Trainer Config from this path : {}"
                                            .format(self.trainer_config_path))

    @staticmethod
    def _create_model_path(model_path):
        try:
            if not os.path.exists(model_path):
                os.makedirs(model_path)
        except Exception:
            raise UnityEnvironmentException("The folder {} containing the generated model could not be accessed."
                                            " Please make sure the permissions are set correctly."
                                            .format(model_path))

    def start_learning(self):
        self.env.curriculum.set_lesson_number(self.lesson)
        trainer_config = self._load_config()
        self._create_model_path(self.model_path)

        tf.reset_default_graph()

        with tf.Session() as sess:
            self._initialize_trainers(trainer_config, sess)
            for k, t in self.trainers.items():
                self.logger.info(t)
            init = tf.global_variables_initializer()
            saver = tf.train.Saver(max_to_keep=self.keep_checkpoints)
            # Instantiate model parameters
            if self.load_model:
                self.logger.info('Loading Model...')
                ckpt = tf.train.get_checkpoint_state(self.model_path)
                if ckpt is None:
                    self.logger.info('The model {0} could not be found. Make sure you specified the right '
                                     '--run-id'.format(self.model_path))
                saver.restore(sess, ckpt.model_checkpoint_path)
            else:
                sess.run(init)
            global_step = 0  # This is only for saving the model
            self.env.curriculum.increment_lesson(self._get_progress())
            curr_info = self.env.reset(train_mode=self.fast_simulation)
            if self.train_model:
                for brain_name, trainer in self.trainers.items():
                    trainer.write_tensorboard_text('Hyperparameters', trainer.parameters)
            try:
                while any([t.get_step <= t.get_max_steps for k, t in self.trainers.items()]) or not self.train_model:
                    if self.env.global_done:
                        self.env.curriculum.increment_lesson(self._get_progress())
                        curr_info = self.env.reset(train_mode=self.fast_simulation)
                        for brain_name, trainer in self.trainers.items():
                            trainer.end_episode()
                    # Decide and take an action
                    take_action_vector, take_action_memories, take_action_text, take_action_outputs = {}, {}, {}, {}
                    for brain_name, trainer in self.trainers.items():
                        (take_action_vector[brain_name],
                         take_action_memories[brain_name],
                         take_action_text[brain_name],
                         take_action_outputs[brain_name]) = trainer.take_action(curr_info)
                    new_info = self.env.step(vector_action=take_action_vector, memory=take_action_memories,
                                             text_action=take_action_text)
                    for brain_name, trainer in self.trainers.items():
                        trainer.add_experiences(curr_info, new_info, take_action_outputs[brain_name])
                        trainer.process_experiences(curr_info, new_info)
                        if trainer.is_ready_update() and self.train_model and trainer.get_step <= trainer.get_max_steps:
                            # Perform gradient descent with experience buffer
                            trainer.update_model()
                        # Write training statistics to Tensorboard.
                        trainer.write_summary(self.env.curriculum.lesson_number)
                        if self.train_model and trainer.get_step <= trainer.get_max_steps:
                            trainer.increment_step_and_update_last_reward()
                    if self.train_model:
                        global_step += 1
                    if global_step % self.save_freq == 0 and global_step != 0 and self.train_model:
                        # Save Tensorflow model
                        self._save_model(sess, steps=global_step, saver=saver)
                    curr_info = new_info
                # Final save Tensorflow model
                if global_step != 0 and self.train_model:
                    self._save_model(sess, steps=global_step, saver=saver)
            except KeyboardInterrupt:
                print('--------------------------Now saving model-------------------------')
                if self.train_model:
                    self.logger.info("Learning was interrupted. Please wait while the graph is generated.")
                    self._save_model(sess, steps=global_step, saver=saver)
                pass
        self.env.close()
        if self.train_model:
            self._export_graph()
Exemplo n.º 34
0
  per agents to be retrieved at the next step.
- value is an optional input that be used to send a single float per agent
  to be displayed if and AgentMonitor.cs component is attached to the agent.
if u have more than one brain, use dict for action per brain
action = {'brain1': [1.0, 2.0], 'brain2': [3.0, 4.0]}
'''


for epi in range(10):
    # env.global_done could be used to check all
    env_info = env.reset(train_mode=train_mode)[default_brain]
    state = env_info.states[0]
    done = False
    epi_rewards = 0
    while not done:
        if brain.action_space_type == 'discrete':
            action = np.random.randint(
                0, brain.action_space_size, size=(len(env_info.agents)))
        else:
            action = np.random.randn(
                len(env_info.agents), brain.action_space_size)
        env_info = env.step(action)[default_brain]
        state = env_info.states[0]
        epi_rewards += env_info.rewards[0]
        done = env_info.local_done[0]
    print('Total reward for this episode: {}'.format(epi_rewards))


env.close()
print('Environment is closed')
Exemplo n.º 35
0
class UnityEnv:
    '''
    Class for all Envs.
    Standardizes the UnityEnv design to work in Lab.
    Access Agents properties by: Agents - AgentSpace - AEBSpace - EnvSpace - Envs
    '''

    def __init__(self, env_spec, env_space, e=0):
        self.env_spec = env_spec
        self.env_space = env_space
        self.info_space = env_space.info_space
        self.e = e
        util.set_attr(self, self.env_spec)
        self.name = self.env_spec['name']
        self.body_e = None
        self.nanflat_body_e = None  # nanflatten version of bodies
        self.body_num = None

        worker_id = int(f'{os.getpid()}{self.e+int(ps.unique_id())}'[-4:])
        self.u_env = UnityEnvironment(file_name=util.get_env_path(self.name), worker_id=worker_id)
        # spaces for NN auto input/output inference
        logger.warn('Unity environment observation_space and action_space are constructed with invalid range. Use only their shapes.')
        self.observation_spaces = []
        self.action_spaces = []
        for a in range(len(self.u_env.brain_names)):
            observation_shape = (self.get_observable_dim(a)['state'],)
            if self.get_brain(a).state_space_type == 'discrete':
                observation_space = gym.spaces.Box(low=0, high=1, shape=observation_shape, dtype=np.int32)
            else:
                observation_space = gym.spaces.Box(low=0.0, high=1.0, shape=observation_shape, dtype=np.float32)
            self.observation_spaces.append(observation_space)
            if self.is_discrete(a):
                action_space = gym.spaces.Discrete(self.get_action_dim(a))
            else:
                action_space = gym.spaces.Box(low=0.0, high=1.0, shape=(1,), dtype=np.float32)
            self.action_spaces.append(action_space)
        for observation_space, action_space in zip(self.observation_spaces, self.action_spaces):
            set_gym_space_attr(observation_space)
            set_gym_space_attr(action_space)

        # TODO experiment to find out optimal benchmarking max_timestep, set
        # TODO ensure clock_speed from env_spec
        self.clock_speed = 1
        self.clock = Clock(self.clock_speed)
        self.done = False

    def check_u_brain_to_agent(self):
        '''Check the size match between unity brain and agent'''
        u_brain_num = self.u_env.number_brains
        agent_num = len(self.body_e)
        assert u_brain_num == agent_num, f'There must be a Unity brain for each agent. e:{self.e}, brain: {u_brain_num} != agent: {agent_num}.'

    def check_u_agent_to_body(self, env_info_a, a):
        '''Check the size match between unity agent and body'''
        u_agent_num = len(env_info_a.agents)
        body_num = util.count_nonan(self.body_e[a])
        assert u_agent_num == body_num, f'There must be a Unity agent for each body; a:{a}, e:{self.e}, agent_num: {u_agent_num} != body_num: {body_num}.'

    def get_brain(self, a):
        '''Get the unity-equivalent of agent, i.e. brain, to access its info'''
        name_a = self.u_env.brain_names[a]
        brain_a = self.u_env.brains[name_a]
        return brain_a

    def get_env_info(self, env_info_dict, a):
        name_a = self.u_env.brain_names[a]
        env_info_a = env_info_dict[name_a]
        return env_info_a

    @lab_api
    def post_body_init(self):
        '''Run init for components that need bodies to exist first, e.g. memory or architecture.'''
        self.nanflat_body_e = util.nanflatten(self.body_e)
        for idx, body in enumerate(self.nanflat_body_e):
            body.nanflat_e_idx = idx
        self.body_num = len(self.nanflat_body_e)
        self.check_u_brain_to_agent()
        logger.info(util.self_desc(self))

    def is_discrete(self, a):
        '''Check if an agent (brain) is subject to discrete actions'''
        return self.get_brain(a).is_discrete()

    def get_action_dim(self, a):
        '''Get the action dim for an agent (brain) in env'''
        return self.get_brain(a).get_action_dim()

    def get_action_space(self, a):
        return self.action_spaces[a]

    def get_observable_dim(self, a):
        '''Get the observable dim for an agent (brain) in env'''
        return self.get_brain(a).get_observable_dim()

    def get_observable_types(self, a):
        '''Get the observable for an agent (brain) in env'''
        return self.get_brain(a).get_observable_types()

    def get_observation_space(self, a):
        return self.observation_spaces[a]

    @lab_api
    def reset(self):
        self.done = False
        env_info_dict = self.u_env.reset(train_mode=(util.get_lab_mode() != 'dev'), config=self.env_spec.get('unity'))
        _reward_e, state_e, done_e = self.env_space.aeb_space.init_data_s(ENV_DATA_NAMES, e=self.e)
        for (a, b), body in util.ndenumerate_nonan(self.body_e):
            env_info_a = self.get_env_info(env_info_dict, a)
            self.check_u_agent_to_body(env_info_a, a)
            state = env_info_a.states[b]
            state_e[(a, b)] = state
            done_e[(a, b)] = self.done
        return _reward_e, state_e, done_e

    @lab_api
    def step(self, action_e):
        # TODO implement clock_speed: step only if self.clock.to_step()
        if self.done:
            return self.reset()
        action_e = util.nanflatten(action_e)
        env_info_dict = self.u_env.step(action_e)
        reward_e, state_e, done_e = self.env_space.aeb_space.init_data_s(ENV_DATA_NAMES, e=self.e)
        for (a, b), body in util.ndenumerate_nonan(self.body_e):
            env_info_a = self.get_env_info(env_info_dict, a)
            reward_e[(a, b)] = env_info_a.rewards[b]
            state_e[(a, b)] = env_info_a.states[b]
            done_e[(a, b)] = env_info_a.local_done[b]
        self.done = (util.nonan_all(done_e) or self.clock.get('t') > self.max_timestep)
        return reward_e, state_e, done_e

    @lab_api
    def close(self):
        self.u_env.close()