def patch_gym_spaces(self, u_env): ''' For standardization, use gym spaces to represent observation and action spaces for Unity. This method iterates through the multiple brains (multiagent) then constructs and returns lists of observation_spaces and action_spaces ''' observation_spaces = [] action_spaces = [] for a in range(len(u_env.brain_names)): brain = self._get_brain(u_env, a) observation_shape = (brain.get_observable_dim()['state'],) if brain.is_discrete(): dtype = np.int32 action_space = spaces.Discrete(brain.get_action_dim()) else: dtype = np.float32 action_space = spaces.Box(low=0.0, high=1.0, shape=(1,), dtype=dtype) observation_space = spaces.Box(low=0, high=1, shape=observation_shape, dtype=dtype) set_gym_space_attr(observation_space) set_gym_space_attr(action_space) observation_spaces.append(observation_space) action_spaces.append(action_space) # set for singleton u_env.observation_space = observation_spaces[0] u_env.action_space = action_spaces[0] return observation_spaces, action_spaces
def patch_gym_spaces(self, env): r""" For standardization, use gym spaces to represent observation and action spaces for Unity. This method iterates through the multiple brains (multiagent) then constructs and returns lists of observation_spaces and action_spaces :param env: :return: """ observation_spaces = [] action_spaces = [] for brain_index in range(len(env.brain_names)): brain = self._get_brain(env, brain_index) # TODO: Logging utils.describe(brain) observation_shape = (brain.get_observable_dim()['state'],) action_dim = (brain.get_action_dim(),) if brain.is_discrete(): dtype = np.int32 action_space = spaces.Discrete(brain.get_action_dim()) else: dtype = np.float32 action_space = spaces.Box(low=0.0, high=1.0, shape=action_dim, dtype=dtype) observation_space = spaces.Box(low=0, high=1, shape=observation_shape, dtype=dtype) utils.set_gym_space_attr(observation_space) utils.set_gym_space_attr(action_space) observation_spaces.append(observation_space) action_spaces.append(action_space) # set for singleton env.observation_space = observation_spaces[0] env.action_space = action_spaces[0] return observation_spaces, action_spaces