示例#1
0
    def __init__(self, n=4, num_steps=100, reward_for_remembering=1000):
        """
        :param n: Number of different values that could be returned
        :param num_steps: How many steps the agent needs to remember.
        :param reward_for_remembering: The reward for remembering the number.
        """
        super().__init__()
        self.num_steps = num_steps
        self.n = n
        self._action_space = Discrete(self.n + 1)
        self._observation_space = Discrete(self.n + 1)
        self._t = 1
        self._reward_for_remembering = reward_for_remembering

        self._target = None
        self._next_obs = None
示例#2
0
def convert_gym_space(space):
    if isinstance(space, gym.spaces.Box):
        return Box(low=space.low, high=space.high)
    elif isinstance(space, gym.spaces.Discrete):
        return Discrete(n=space.n)
    else:
        raise NotImplementedError
示例#3
0
def convert_gym_space(space):
    if isinstance(space, Box):
        return Box(low=space.low, high=space.high)
    elif isinstance(space, gym.spaces.Discrete):
        return Discrete(n=space.n)
    elif isinstance(space, gym.spaces.Tuple):
        return Product([convert_gym_space(x) for x in space.spaces])
    else:
        raise NotImplementedError
示例#4
0
def convert_gym_space(space):
    # import IPython; IPython.embed()
    if isinstance(space, gym.spaces.Box):
        return Box(low=0, high=255, shape=(80, 80))
    elif isinstance(space, gym.spaces.Discrete):
        return Discrete(n=space.n)
    elif isinstance(space, gym.spaces.Tuple):
        return Product([convert_gym_space(x) for x in space.spaces])
    else:
        raise NotImplementedError
示例#5
0
    def __init__(self,
                 app_name,
                 time_state=False,
                 idx=0,
                 is_render=False,
                 no_graphics=False,
                 recording=True):
        Serializable.quick_init(self, locals())

        # Unity scene
        self._env = UnityEnvironment(file_name=app_name,
                                     worker_id=idx,
                                     no_graphics=no_graphics)
        self.id = 0

        self.name = app_name
        self.idx = idx
        self.is_render = is_render

        self.time_state = time_state
        self.time_step = 0

        # Check brain configuration
        assert len(self._env.brains) == 1
        self.brain_name = self._env.external_brain_names[0]
        brain = self._env.brains[self.brain_name]

        # Check for number of agents in scene
        initial_info = self._env.reset()[self.brain_name]
        self.use_visual = (brain.number_visual_observations == 1) and False
        self.recording = brain.number_visual_observations == 1 and recording

        # Set observation and action spaces
        if brain.vector_action_space_type == "discrete":
            self._action_space = Discrete(1)
        else:
            high = np.array([np.inf] * (brain.vector_action_space_size))
            self._action_space = Box(-high, high)
        # ----------------------------------
        if self.use_visual and False and no_graphic:
            high = np.array([np.inf] * brain.camera_resolutions[0]["height"] *
                            brain.camera_resolutions[0]["width"] * 3)
            self._observation_space = Box(-high, high)
        else:
            if self.time_state:
                high = np.array([np.inf] *
                                (brain.vector_observation_space_size + 1))
            else:
                high = np.array([np.inf] *
                                (brain.vector_observation_space_size))
            self._observation_space = Box(-high, high)

        # video buffer
        self.frames = []
示例#6
0
def convert_gym_space(space, box_additional_dim=0):
    if isinstance(space, gym.spaces.Box):
        if box_additional_dim != 0:
            low = np.concatenate([space.low, [-np.inf] * box_additional_dim])
            high = np.concatenate([space.high, [np.inf] * box_additional_dim])
            return Box(low=low, high=high)
        return Box(low=space.low, high=space.high)
    elif isinstance(space, gym.spaces.Discrete):
        return Discrete(n=space.n)
    elif isinstance(space, gym.spaces.Tuple):
        return Product([convert_gym_space(x) for x in space.spaces])
    else:
        raise NotImplementedError
示例#7
0
def convert_gym_space(space):
    if isinstance(space, gym.spaces.Box):
        return Box(low=space.low, high=space.high)
    elif isinstance(space, gym.spaces.Discrete):
        return Discrete(n=space.n)
    elif isinstance(space, gym.spaces.Tuple):
        return Product([convert_gym_space(x) for x in space.spaces])
    elif isinstance(space, list):
        # For multiagent envs
        return list(map(convert_gym_space, space))
        # TODO(cathywu) refactor multiagent envs to use gym.spaces.Tuple
        # (may be needed for pickling?)
    else:
        raise NotImplementedError
示例#8
0
def convert_gym_space(space, n_agents=1):
    if isinstance(space, gym.spaces.Box) or isinstance(space, Box):
        if len(space.shape) > 1:
            assert n_agents == 1, "multi-dimensional inputs for centralized agents not supported"
            return Box(low=np.min(space.low),
                       high=np.max(space.high),
                       shape=space.shape)
        else:
            return Box(low=np.min(space.low),
                       high=np.max(space.high),
                       shape=(space.shape[0] * n_agents, ))
    elif isinstance(space, gym.spaces.Discrete) or isinstance(space, Discrete):
        return Discrete(n=space.n**n_agents)
    else:
        raise NotImplementedError
示例#9
0
def convert_gym_space(space):
    """
    Convert a gym.space to an rllab.space
    :param space: (obj:`gym.Space`) The Space object to convert
    :return: converted rllab.Space object
    """
    if isinstance(space, gym.spaces.Box):
        return Box(low=space.low, high=space.high)
    elif isinstance(space, gym.spaces.Discrete):
        return Discrete(n=space.n)
    elif isinstance(space, gym.spaces.Tuple):
        return Product([convert_gym_space(x) for x in space.spaces])
    elif isinstance(space, gym.spaces.Dict):
        return Dict(space.spaces)
    else:
        raise TypeError
示例#10
0
文件: gym_env.py 项目: Obarack/rllab
def convert_gym_space(space):
    if isinstance(space, gym.spaces.Box):
        return Box(low=space.low, high=space.high)
    elif isinstance(space, gym.spaces.Discrete):
        return Discrete(n=space.n)
    elif isinstance(space, gym.spaces.Tuple):
        return Product([convert_gym_space(x) for x in space.spaces])
    # added for robotics enviroments
    elif isinstance(space, gym.spaces.Dict):
        b_low = np.concatenate((space.spaces["desired_goal"].low,space.spaces["achieved_goal"].low,space.spaces["observation"].low))
        b_high = np.concatenate((space.spaces["desired_goal"].high,space.spaces["achieved_goal"].high,space.spaces["observation"].high))
        name = Box(low=b_low, high=b_high)
        print(name)
        return name
    # end addition    
    else:
        raise NotImplementedError
示例#11
0
class OneCharMemory(Env, SupervisedLearningEnv):
    """
    A simple env whose output is a value `X` the first time step, followed by a
    fixed number of zeros.

    The reward is 1 for all steps if the agent outputs zero except at the end,
    where the agent gets a large reward if `X` is returned correctly.

    Both the actions and observations are represented as one-hot vectors.
    There are `n` different values that `X` can take on (excluding 0),
    so the one-hot vector's dimension is n+1.
    """
    def __init__(self, n=4, num_steps=100, reward_for_remembering=1000):
        """
        :param n: Number of different values that could be returned
        :param num_steps: How many steps the agent needs to remember.
        :param reward_for_remembering: The reward for remembering the number.
        """
        super().__init__()
        self.num_steps = num_steps
        self.n = n
        self._action_space = Discrete(self.n + 1)
        self._observation_space = Discrete(self.n + 1)
        self._t = 1
        self._reward_for_remembering = reward_for_remembering

        self._target = None
        self._next_obs = None

    def step(self, action):
        # flatten = to one hot...not sure why it was given that name.
        observation = self._get_next_observation()
        self._next_obs = 0

        done = self._t == self.num_steps
        self._t += 1

        if done:
            reward = self._reward_for_remembering * int(
                self._observation_space.unflatten(action) == self._target)
        else:
            reward = int(self._observation_space.unflatten(action) == 0)
        info = {'target': self.n}
        return observation, reward, done, info

    @property
    def action_space(self):
        return self._action_space

    @property
    def horizon(self):
        return self.num_steps

    def reset(self):
        self._target = randint(1, self.n)
        self._next_obs = self._target
        self._t = 1
        return self._get_next_observation()

    def _get_next_observation(self):
        return self._observation_space.flatten(self._next_obs)

    @property
    def observation_space(self):
        return self._observation_space

    def get_batch(self, batch_size):
        targets = np.random.randint(
            low=1,
            high=self.n,
            size=batch_size,
        )
        onehot_targets = special.to_onehot_n(targets, self.feature_dim)
        X = np.zeros((batch_size, self.sequence_length, self.feature_dim))
        X[:, :, 0] = 1  # make the target 0
        X[:, 0, :] = onehot_targets
        Y = np.zeros((batch_size, self.sequence_length, self.target_dim))
        Y[:, :, 0] = 1  # make the target 0
        Y[:, -1, :] = onehot_targets
        return X, Y

    @property
    def feature_dim(self):
        return self.n + 1

    @property
    def target_dim(self):
        return self.n + 1

    @property
    def sequence_length(self):
        return self.horizon