示例#1
0
def make_env(id_or_path: str,
             *,
             max_episode_timesteps: Optional[int] = None) -> gym.Env:
    try:
        env = make_po_env(id_or_path)

    except ValueError:

        try:
            print('Loading using gym.make')
            env = gym.make(id_or_path)

        except gym.error.Error:
            print(f'Environment with id {id_or_path} not found.'
                  'Trying as a GV YAML environment')
            env = make_gv_env(id_or_path)

    checkraise(
        hasattr(env, 'state_space'),
        ValueError,
        f'env {id_or_path} does not have state_space',
    )

    if isinstance(env.unwrapped, gym_pomdps.POMDP):
        env = FlatPaddingWrapper(env)

    if max_episode_timesteps is not None:
        env = TimeLimit(env, max_episode_timesteps)

    return env
示例#2
0
    def __init__(self, space: gym.spaces.Dict):
        super().__init__()
        checkraise(
            'agent' in space.spaces,
            KeyError,
            'space does not contain `agent` key',
        )

        self.space = space
示例#3
0
    def __init__(self, input_space: gym.spaces.Discrete):
        super().__init__()

        checkraise(
            isinstance(input_space, gym.spaces.Discrete),
            TypeError,
            'input_space must be Discrete',
        )

        self.__num_classes = input_space.n
示例#4
0
    def __init__(self, input_space: gym.spaces.Box):
        super().__init__()

        checkraise(
            isinstance(input_space, gym.spaces.Box)
            and len(input_space.shape) == 1,
            TypeError,
            'input_space must be Box',
        )

        (self.__out_dim,) = input_space.shape
示例#5
0
    def __init__(self, env: gym.Env):
        checkraise(
            isinstance(env.observation_space, gym.spaces.Discrete),
            ValueError,
            'env.observation_space must be Discrete',
        )

        super().__init__(env)

        self.observation_space = gym.spaces.Discrete(env.observation_space.n +
                                                     1)
示例#6
0
def returns(rewards: np.ndarray, discount: float) -> np.ndarray:
    """Return the empirical episodic returns from rewards.

    :param rewards:  (B, T) np.ndarray of rewards
    :param discount:  discount factor
    :rtype: (B,) np.ndarray of empirical returns
    """
    checkraise(rewards.ndim > 1, ValueError, 'invalid rewards.ndim {}',
               rewards.ndim)

    num_steps = rewards.shape[-1]
    return np.einsum('j,...j->...', discounts(num_steps, discount), rewards)
示例#7
0
def discounts_uncached(num_steps: int, discount: float) -> np.ndarray:
    """Return the discounts array $[1., \\gamma, \\gamma^2, \\ldots]$.

    :param num_steps:  size of the output array
    :param discount:  discount factor
    :rtype: (N,) np.ndarray of discounts
    """
    checkraise(num_steps > 0, ValueError, 'invalid `num_steps` {}', num_steps)
    checkraise(0.0 <= discount <= 1.0, ValueError, 'invalid `discount` {}',
               discount)

    return discount**np.arange(num_steps, dtype=float)
示例#8
0
def _check_gv_state_space_keys(space: gym.Space) -> bool:
    checkraise(
        isinstance(space, gym.spaces.Dict),
        TypeError,
        'incorrect state space type',
    )

    for key in ['grid', 'agent_id_grid', 'agent', 'item']:
        checkraise(
            key in space.spaces,
            KeyError,
            f'space does not contain `{key}` key',
        )
示例#9
0
def mc_q_estimator(
    rewards: torch.Tensor,
    values: torch.Tensor,  # pylint: disable=unused-argument
    *,
    discount: float,
) -> torch.Tensor:
    checkraise(rewards.ndim == 1, ValueError,
               '`rewards` must have 1 dimension')

    size = rewards.size(-1)
    indices = torch.arange(size)
    exponents = indices.unsqueeze(0) - indices.unsqueeze(-1)
    discounts = (discount**exponents).triu()
    return discounts @ rewards
示例#10
0
    def __init__(
        self,
        space: gym.spaces.Dict,
        embedding: EmbeddingRepresentation,
    ):
        super().__init__()
        checkraise(
            'grid' in space.spaces,
            KeyError,
            'space does not contain `grid` key',
        )

        self.space = space
        self.embedding = embedding
示例#11
0
 def torch(self) -> Batch:
     checkraise(
         isinstance(self.states, np.ndarray)
         and isinstance(self.observations, np.ndarray)
         and isinstance(self.actions, np.ndarray)
         and isinstance(self.rewards, np.ndarray)
         and isinstance(self.dones, np.ndarray),
         TypeError,
         'Batch is not numpy to begin with??',
     )
     return Batch(
         states=numpy2torch(self.states),
         observations=numpy2torch(self.observations),
         actions=numpy2torch(self.actions),
         rewards=numpy2torch(self.rewards),
         next_states=numpy2torch(self.next_states),
         next_observations=numpy2torch(self.next_observations),
         dones=numpy2torch(self.dones),
     )
示例#12
0
    def __init__(self, env: gym.Env, indices: List[int]):
        checkraise(
            isinstance(env.observation_space, gym.spaces.Box)
            and len(env.observation_space.shape) == 1,
            ValueError,
            'env.observation_space must be flat Box',
        )

        checkraise(
            len(set(indices)) == len(indices),
            ValueError,
            'indices must be unique',
        )

        checkraise(
            len(indices) <= env.observation_space.shape[0],
            ValueError,
            'number of indices must not exceed state dimensions',
        )

        checkraise(
            min(indices) >= 0,
            ValueError,
            'indices must be non-negative',
        )

        checkraise(
            max(indices) < env.observation_space.shape[0],
            ValueError,
            'indices must be lower than state dimensions',
        )

        super().__init__(env)

        self.indices = indices
        self.state_space = env.observation_space
        self.observation_space = gym.spaces.Box(
            env.observation_space.low[self.indices],
            env.observation_space.high[self.indices],
        )

        self.state = None
示例#13
0
 def torch(self) -> Episode:
     checkraise(
         (isinstance(self.states, np.ndarray)
          or isinstance(self.states, dict)
          and all(isinstance(v, np.ndarray) for v in self.states.values()))
         and (isinstance(self.observations, np.ndarray)
              or isinstance(self.observations, dict) and all(
                  isinstance(v, np.ndarray)
                  for v in self.observations.values()))
         and isinstance(self.actions, np.ndarray)
         and isinstance(self.rewards, np.ndarray),
         TypeError,
         'Episode is not numpy to begin with??',
     )
     return Episode(
         states=numpy2torch(self.states),
         observations=numpy2torch(self.observations),
         actions=numpy2torch(self.actions),
         rewards=numpy2torch(self.rewards),
     )
示例#14
0
    def __init__(
        self,
        space: gym.spaces.Dict,
        embedding: EmbeddingRepresentation,
    ):
        super().__init__()
        checkraise(
            'grid' in space.spaces,
            KeyError,
            'space does not contain `grid` key',
        )
        checkraise(
            'agent_id_grid' in space.spaces,
            KeyError,
            'space does not contain `agent_id_grid` key',
        )

        self.space = space
        self.embedding = embedding

        in_channels = 3 * embedding.dim + 1  # adding one for agent_id_grid
        self.cnn = gv_cnn(in_channels)
示例#15
0
def discounts(num_steps: int, discount: float) -> np.ndarray:
    """Return the discounts array $[1., \\gamma, \\gamma^2, \\ldots]$.

    :param num_steps:  size of the output array
    :param discount:  discount factor
    :rtype: (N,) np.ndarray of discounts
    """
    checkraise(num_steps > 0, ValueError, 'invalid `num_steps` {}', num_steps)
    checkraise(0.0 <= discount <= 1.0, ValueError, 'invalid `discount` {}',
               discount)

    cached_discounts = discounts_cache[discount]

    if cached_discounts.size >= num_steps:
        discounts_ = cached_discounts[:num_steps]

    if cached_discounts.size < num_steps:
        discounts_ = discounts_uncached(num_steps, discount)
        discounts_cache[discount] = discounts_
        return discounts_

    return discounts_
示例#16
0
    def __init__(self, input_space: gym.spaces.Box, dims: Sequence[int]):
        super().__init__()

        checkraise(
            isinstance(input_space, gym.spaces.Box)
            and len(input_space.shape) == 1,
            TypeError,
            'input_space must be Box',
        )
        checkraise(
            len(dims) > 0,
            ValueError,
            'dims must be non-empty',
        )

        (input_dim,) = input_space.shape
        self.dims = list(itt.chain([input_dim], dims))

        modules = mitt.flatten(
            (make_module('linear', 'relu', in_dim, out_dim), nn.ReLU())
            for in_dim, out_dim in mitt.pairwise(self.dims)
        )
        self.model = nn.Sequential(*modules)
示例#17
0
def filter_models(models: nn.ModuleDict, keys) -> nn.ModuleDict:
    if isinstance(keys, list):
        missing_keys = set(keys) - models.keys()
        checkraise(
            len(missing_keys) == 0,
            ValueError,
            'models dictionary does not contains keys {}',
            missing_keys,
        )
        return nn.ModuleDict({k: models[k] for k in keys})

    if isinstance(keys, dict):
        missing_keys = set(keys.keys()) - models.keys()
        checkraise(
            len(missing_keys) == 0,
            ValueError,
            'models dictionary does not contains keys {}',
            missing_keys,
        )
        return nn.ModuleDict(
            {k: filter_models(models[k], v)
             for k, v in keys.items()})

    raise NotImplementedError
示例#18
0
def td0_q_estimator(
    rewards: torch.Tensor,
    values: torch.Tensor,
    *,
    discount: float,
) -> torch.Tensor:
    checkraise(rewards.ndim == 1, ValueError,
               '`rewards` must have 1 dimension')
    checkraise(values.ndim == 1, ValueError, '`values` must have 1 dimension')
    checkraise(
        rewards.shape == values.shape,
        ValueError,
        '`rewards` and `values` must have the same shape',
    )

    values = values.roll(-1)
    values[-1] = 0.0
    return rewards + discount * values
示例#19
0
def make_schedule(
    name: str,
    *,
    const: Optional[int] = None,
    value_from: Optional[float] = None,
    value_to: Optional[float] = None,
    nsteps: Optional[int] = None,
    halflife: Optional[int] = None,
) -> Schedule:

    if name == 'constant':
        checkraise(
            const is not None,
            ValueError,
            f'invalid arguments {const}',
        )
        return functools.partial(constant_schedule, const=const)

    if name == 'linear':
        checkraise(
            None not in [value_from, value_to, nsteps],
            ValueError,
            f'invalid arguments {value_from} {value_to} {nsteps}',
        )
        return functools.partial(
            linear_schedule,
            value_from=value_from,
            value_to=value_to,
            nsteps=nsteps,
        )

    if name == 'exponential':
        checkraise(
            None not in [value_from, halflife],
            ValueError,
            f'invalid arguments {value_from} {halflife}',
        )
        return functools.partial(
            exponential_schedule,
            value_from=value_from,
            halflife=halflife,
        )

    raise ValueError(f'invalid schedule name {name}')
示例#20
0
def tdn_q_estimator(
    rewards: torch.Tensor,
    values: torch.Tensor,
    *,
    discount: float,
    n: int,
) -> torch.Tensor:
    checkraise(rewards.ndim == 1, ValueError,
               '`rewards` must have 1 dimension')
    checkraise(values.ndim == 1, ValueError, '`values` must have 1 dimension')
    checkraise(
        rewards.shape == values.shape,
        ValueError,
        '`rewards` and `values` must have the same shape',
    )

    size = rewards.size(-1)
    indices = torch.arange(size)
    exponents = indices.unsqueeze(0) - indices.unsqueeze(-1)
    discounts = (discount**exponents).triu().tril(n - 1)
    values = values.roll(-n)
    values[-n:] = 0.0
    return discounts @ rewards + (discount**n) * values
示例#21
0
def tdlambda_q_estimator(
    rewards: torch.Tensor,
    values: torch.Tensor,
    *,
    discount: float,
    lambda_: float,
) -> torch.Tensor:
    checkraise(rewards.ndim == 1, ValueError,
               '`rewards` must have 1 dimension')
    checkraise(values.ndim == 1, ValueError, '`values` must have 1 dimension')
    checkraise(
        rewards.shape == values.shape,
        ValueError,
        '`rewards` and `values` must have the same shape',
    )

    size = rewards.size(-1)
    indices = torch.arange(size)
    exponents = indices.unsqueeze(0) - indices.unsqueeze(-1)
    discounts = ((discount * lambda_)**exponents).triu()
    values = values.roll(-1)
    values[-1] = 0.0
    return discounts @ (rewards + discount * (1 - lambda_) * values)
示例#22
0
    def _make_gv_model(self, name: str):
        if name == 'agent':
            checkraise(
                'agent' in self.space.spaces,
                KeyError,
                'space does not contain `agent` key',
            )
            return GV_Agent_Representation(self.space)

        if name == 'item':
            checkraise(
                'item' in self.space.spaces,
                KeyError,
                'space does not contain `item` key',
            )
            return GV_Item_Representation(self.space, self.embedding)

        if name == 'grid-cnn':
            checkraise(
                'grid' in self.space.spaces,
                KeyError,
                'space does not contain `grid` key',
            )
            return GV_Grid_CNN_Representation(self.space, self.embedding)

        if name == 'grid-fc':
            checkraise(
                'grid' in self.space.spaces,
                KeyError,
                'space does not contain `grid` key',
            )
            return GV_Grid_FC_Representation(self.space, self.embedding)

        if name == 'agent-grid-cnn':
            checkraise(
                'grid' in self.space.spaces,
                KeyError,
                'space does not contain `grid` key',
            )
            checkraise(
                'agent_id_grid' in self.space.spaces,
                KeyError,
                'space does not contain `agent_id_grid` key',
            )
            return GV_AgentGrid_CNN_Representation(self.space, self.embedding)

        if name == 'agent-grid-fc':
            checkraise(
                'grid' in self.space.spaces,
                KeyError,
                'space does not contain `grid` key',
            )
            checkraise(
                'agent_id_grid' in self.space.spaces,
                KeyError,
                'space does not contain `agent_id_grid` key',
            )
            return GV_AgentGrid_FC_Representation(self.space, self.embedding)

        raise ValueError(f'invalid gv model name {name}')
示例#23
0
def make_po_env(name: str) -> gym.Env:
    m = po_env_id_re.match(name)
    # m[0] is the full name
    # m[1] is the first capture, i.e., the type of partial observability
    # m[2] is the second capture, i.e., the name w/o the version
    # m[3] is the third capture, i.e., the version

    checkraise(m is not None, ValueError,
               f'env name {name} does not satisfy regex')

    assert m is not None  # silly forcing of type checking
    po_type = m[1]
    env_name = m[2]
    version = m[3]
    non_po_name = f'{env_name}-v{version}'

    if env_name == 'CartPole':
        indices_dict = {
            'pos': [0, 2],  # ignore velocities
            'vel': [1, 3],  # ignore positions
            'full': [0, 1, 2, 3],  # ignore nothing
        }

        checkraise(
            po_type in indices_dict.keys(),
            ValueError,
            f'invalid partial observability {po_type}',
        )

        env = gym.make(non_po_name)
        indices = indices_dict[po_type]
        return IndexWrapper(env, indices)

    if env_name == 'LunarLander':
        indices_dict = {
            'pos': [0, 1, 4, 6, 7],  # ignore velocities
            'vel': [2, 3, 5, 6, 7],  # ignore positions
            'full': [0, 1, 2, 3, 4, 5, 6, 7],  # ignore nothing
        }

        checkraise(
            po_type in indices_dict.keys(),
            ValueError,
            f'invalid partial observability {po_type}',
        )

        env = gym.make(non_po_name)
        indices = indices_dict[po_type]
        return IndexWrapper(env, indices)

    if env_name == 'Acrobot':
        indices_dict = {
            'pos': [0, 1, 2, 3],  # ignore velocities
            'vel': [4, 5],  # ignore positions
            'full': [0, 1, 2, 3, 4, 5],  # ignore nothing
        }

        checkraise(
            po_type in indices_dict.keys(),
            ValueError,
            f'invalid partial observability {po_type}',
        )

        env = gym.make(non_po_name)
        indices = indices_dict[po_type]
        return IndexWrapper(env, indices)

    raise ValueError('invalid env name {env_name}')