Пример #1
0
    def __init__(self,
                 *,
                 num_actions: int = 1,
                 num_observations: int = 1,
                 action_dtype=np.int32,
                 obs_dtype=np.int32,
                 obs_shape: Sequence[int] = (),
                 discount_spec: Optional[types.NestedSpec] = None,
                 reward_spec: Optional[types.NestedSpec] = None,
                 **kwargs):
        """Initialize the environment."""
        if reward_spec is None:
            reward_spec = specs.Array((), np.float32)

        if discount_spec is None:
            discount_spec = specs.BoundedArray((), np.float32, 0.0, 1.0)

        actions = specs.DiscreteArray(num_actions, dtype=action_dtype)
        observations = specs.BoundedArray(shape=obs_shape,
                                          dtype=obs_dtype,
                                          minimum=obs_dtype(0),
                                          maximum=obs_dtype(num_observations -
                                                            1))

        super().__init__(spec=specs.EnvironmentSpec(observations=observations,
                                                    actions=actions,
                                                    rewards=reward_spec,
                                                    discounts=discount_spec),
                         **kwargs)
Пример #2
0
 def action_spec(self) -> Dict[str, specs.DiscreteArray]:
     action_specs = {}
     for agent in self.possible_agents:
         spec = self._environment.action_spec()
         action_specs[agent] = specs.DiscreteArray(spec["num_actions"],
                                                   np.int64)
     return action_specs
Пример #3
0
def _make_fake_env() -> dm_env.Environment:
  env_spec = specs.EnvironmentSpec(
      observations=specs.Array(shape=(10, 5), dtype=np.float32),
      actions=specs.DiscreteArray(num_values=3),
      rewards=specs.Array(shape=(), dtype=np.float32),
      discounts=specs.BoundedArray(
          shape=(), dtype=np.float32, minimum=0., maximum=1.),
  )
  return fakes.Environment(env_spec, episode_length=10)
Пример #4
0
def _convert_to_spec(space: gym.Space,
                     name: Optional[str] = None) -> types.NestedSpec:
    """Converts an OpenAI Gym space to a dm_env spec or nested structure of specs.

  Box, MultiBinary and MultiDiscrete Gym spaces are converted to BoundedArray
  specs. Discrete OpenAI spaces are converted to DiscreteArray specs. Tuple and
  Dict spaces are recursively converted to tuples and dictionaries of specs.

  Args:
    space: The Gym space to convert.
    name: Optional name to apply to all return spec(s).

  Returns:
    A dm_env spec or nested structure of specs, corresponding to the input
    space.
  """
    if isinstance(space, spaces.Discrete):
        return specs.DiscreteArray(num_values=space.n,
                                   dtype=space.dtype,
                                   name=name)

    elif isinstance(space, spaces.Box):
        return specs.BoundedArray(shape=space.shape,
                                  dtype=space.dtype,
                                  minimum=space.low,
                                  maximum=space.high,
                                  name=name)

    elif isinstance(space, spaces.MultiBinary):
        return specs.BoundedArray(shape=space.shape,
                                  dtype=space.dtype,
                                  minimum=0.0,
                                  maximum=1.0,
                                  name=name)

    elif isinstance(space, spaces.MultiDiscrete):
        return specs.BoundedArray(shape=space.shape,
                                  dtype=space.dtype,
                                  minimum=np.zeros(space.shape),
                                  maximum=space.nvec - 1,
                                  name=name)

    elif isinstance(space, spaces.Tuple):
        return tuple(_convert_to_spec(s, name) for s in space.spaces)

    elif isinstance(space, spaces.Dict):
        return {
            key: _convert_to_spec(value, key)
            for key, value in space.spaces.items()
        }

    else:
        raise ValueError('Unexpected gym space: {}'.format(space))
Пример #5
0
    def __init__(self,
                 *,
                 num_actions: int = 1,
                 num_observations: int = 1,
                 action_dtype=np.int32,
                 obs_dtype=np.int32,
                 reward_dtype=np.float32,
                 obs_shape: Sequence[int] = (),
                 **kwargs):
        """Initialize the environment."""
        actions = specs.DiscreteArray(num_actions, dtype=action_dtype)
        observations = specs.BoundedArray(shape=obs_shape,
                                          dtype=obs_dtype,
                                          minimum=obs_dtype(0),
                                          maximum=obs_dtype(num_observations -
                                                            1))
        rewards = specs.Array((), reward_dtype)
        discounts = specs.BoundedArray((), reward_dtype, 0.0, 1.0)

        super().__init__(spec=specs.EnvironmentSpec(observations=observations,
                                                    actions=actions,
                                                    rewards=rewards,
                                                    discounts=discounts),
                         **kwargs)
Пример #6
0
 def action_spec(self) -> specs.DiscreteArray:
     """Returns the action spec."""
     return specs.DiscreteArray(dtype=np.int,
                                num_values=len(list(Action)),
                                name="action")
Пример #7
0
 def action_spec(self) -> specs.DiscreteArray:
     return specs.DiscreteArray(
         self._environment.game.num_distinct_actions())
Пример #8
0
def discretize_spec(spec, num_actions):
    assert isinstance(spec.actions, specs.BoundedArray)
    return spec._replace(actions=specs.DiscreteArray(num_actions))
Пример #9
0
 def action_spec(self) -> types.NestedSpec:
   return specs.DiscreteArray(
       num_values=self.max_task,
       dtype=np.int32
   )
Пример #10
0
    def init(self, params):

        if not _TF_USE_GPU:
            tf.config.set_visible_devices([], 'GPU')
        tf.config.threading.set_inter_op_parallelism_threads(_TF_NUM_THREADS)
        tf.config.threading.set_intra_op_parallelism_threads(_TF_NUM_THREADS)

        if params.seed:
            agent_seed = params.seed + sum([ord(c) for c in params.name])
            random.seed(agent_seed)
            np.random.seed(agent_seed)
            tf.random.set_seed(agent_seed)

        # Internalize params.
        self._params = params

        self._name = params.name

        # Whether learning stopped.
        self._stop = False

        # Define specs. Everything needs to be single precision by default.
        observation_spec = specs.Array(shape=(params.states.rank, ),
                                       dtype=np.float32,
                                       name='obs')
        action_spec = specs.DiscreteArray(dtype=np.int32,
                                          num_values=params.actions.depth,
                                          name="action")
        reward_spec = specs.Array(shape=(), dtype=np.float32, name='reward')
        discount_spec = specs.BoundedArray(shape=(),
                                           dtype=np.float32,
                                           minimum=0.,
                                           maximum=1.,
                                           name='discount')

        env_spec = specs.EnvironmentSpec(observations=observation_spec,
                                         actions=action_spec,
                                         rewards=reward_spec,
                                         discounts=discount_spec)

        # Logger.
        dir_path = f'{params.exp_path}/logs/{self._name}'
        self._logger = make_default_logger(directory=dir_path,
                                           label=self._name)
        agent_logger = make_default_logger(directory=dir_path,
                                           label=f'{self._name}-learning')

        network = Network(num_actions=env_spec.actions.num_values,
                          rnn_hidden_size=params.rnn_hidden_size,
                          head_layers=params.head_layers)

        self.agent = acme_agent.R2D2(
            environment_spec=env_spec,
            network=network,
            batch_size=params.batch_size,
            samples_per_insert=params.samples_per_insert,
            burn_in_length=params.burn_in_length,
            trace_length=params.trace_length,
            replay_period=params.replay_period,
            min_replay_size=params.min_replay_size,
            max_replay_size=params.max_replay_size,
            discount=params.discount_factor,
            prefetch_size=params.prefetch_size,
            target_update_period=params.target_update_period,
            importance_sampling_exponent=params.importance_sampling_exponent,
            priority_exponent=params.priority_exponent,
            epsilon_init=params.epsilon_init,
            epsilon_final=params.epsilon_final,
            epsilon_schedule_timesteps=params.epsilon_schedule_timesteps,
            learning_rate=params.learning_rate,
            store_lstm_state=params.store_lstm_state,
            max_priority_weight=params.max_priority_weight,
            logger=agent_logger,
            checkpoint=False,
        )

        # Observations counter.
        self._obs_counter = 0
Пример #11
0
 def action_spec(self):
   return specs.DiscreteArray(num_values=self.num_actions)