Python DiscreteArray примеры, acme.specs.DiscreteArray Python примеры использования

Пример #1

0

Показать файл

Файл: fakes.py Проект: hyunjay/acme

    def __init__(self,
                 *,
                 num_actions: int = 1,
                 num_observations: int = 1,
                 action_dtype=np.int32,
                 obs_dtype=np.int32,
                 obs_shape: Sequence[int] = (),
                 discount_spec: Optional[types.NestedSpec] = None,
                 reward_spec: Optional[types.NestedSpec] = None,
                 **kwargs):
        """Initialize the environment."""
        if reward_spec is None:
            reward_spec = specs.Array((), np.float32)

        if discount_spec is None:
            discount_spec = specs.BoundedArray((), np.float32, 0.0, 1.0)

        actions = specs.DiscreteArray(num_actions, dtype=action_dtype)
        observations = specs.BoundedArray(shape=obs_shape,
                                          dtype=obs_dtype,
                                          minimum=obs_dtype(0),
                                          maximum=obs_dtype(num_observations -
                                                            1))

        super().__init__(spec=specs.EnvironmentSpec(observations=observations,
                                                    actions=actions,
                                                    rewards=reward_spec,
                                                    discounts=discount_spec),
                         **kwargs)

Пример #2

0

Показать файл

 def action_spec(self) -> Dict[str, specs.DiscreteArray]:
     action_specs = {}
     for agent in self.possible_agents:
         spec = self._environment.action_spec()
         action_specs[agent] = specs.DiscreteArray(spec["num_actions"],
                                                   np.int64)
     return action_specs

Пример #3

0

Показать файл

Файл: actors_test.py Проект: EXYNOS-999/acme

def _make_fake_env() -> dm_env.Environment:
  env_spec = specs.EnvironmentSpec(
      observations=specs.Array(shape=(10, 5), dtype=np.float32),
      actions=specs.DiscreteArray(num_values=3),
      rewards=specs.Array(shape=(), dtype=np.float32),
      discounts=specs.BoundedArray(
          shape=(), dtype=np.float32, minimum=0., maximum=1.),
  )
  return fakes.Environment(env_spec, episode_length=10)

Пример #4

0

Показать файл

Файл: gym_wrapper.py Проект: deepmind/acme

def _convert_to_spec(space: gym.Space,
                     name: Optional[str] = None) -> types.NestedSpec:
    """Converts an OpenAI Gym space to a dm_env spec or nested structure of specs.

  Box, MultiBinary and MultiDiscrete Gym spaces are converted to BoundedArray
  specs. Discrete OpenAI spaces are converted to DiscreteArray specs. Tuple and
  Dict spaces are recursively converted to tuples and dictionaries of specs.

  Args:
    space: The Gym space to convert.
    name: Optional name to apply to all return spec(s).

  Returns:
    A dm_env spec or nested structure of specs, corresponding to the input
    space.
  """
    if isinstance(space, spaces.Discrete):
        return specs.DiscreteArray(num_values=space.n,
                                   dtype=space.dtype,
                                   name=name)

    elif isinstance(space, spaces.Box):
        return specs.BoundedArray(shape=space.shape,
                                  dtype=space.dtype,
                                  minimum=space.low,
                                  maximum=space.high,
                                  name=name)

    elif isinstance(space, spaces.MultiBinary):
        return specs.BoundedArray(shape=space.shape,
                                  dtype=space.dtype,
                                  minimum=0.0,
                                  maximum=1.0,
                                  name=name)

    elif isinstance(space, spaces.MultiDiscrete):
        return specs.BoundedArray(shape=space.shape,
                                  dtype=space.dtype,
                                  minimum=np.zeros(space.shape),
                                  maximum=space.nvec - 1,
                                  name=name)

    elif isinstance(space, spaces.Tuple):
        return tuple(_convert_to_spec(s, name) for s in space.spaces)

    elif isinstance(space, spaces.Dict):
        return {
            key: _convert_to_spec(value, key)
            for key, value in space.spaces.items()
        }

    else:
        raise ValueError('Unexpected gym space: {}'.format(space))

Пример #5

0

Показать файл

Файл: fakes.py Проект: wilixx/acme

    def __init__(self,
                 *,
                 num_actions: int = 1,
                 num_observations: int = 1,
                 action_dtype=np.int32,
                 obs_dtype=np.int32,
                 reward_dtype=np.float32,
                 obs_shape: Sequence[int] = (),
                 **kwargs):
        """Initialize the environment."""
        actions = specs.DiscreteArray(num_actions, dtype=action_dtype)
        observations = specs.BoundedArray(shape=obs_shape,
                                          dtype=obs_dtype,
                                          minimum=obs_dtype(0),
                                          maximum=obs_dtype(num_observations -
                                                            1))
        rewards = specs.Array((), reward_dtype)
        discounts = specs.BoundedArray((), reward_dtype, 0.0, 1.0)

        super().__init__(spec=specs.EnvironmentSpec(observations=observations,
                                                    actions=actions,
                                                    rewards=rewards,
                                                    discounts=discounts),
                         **kwargs)

Пример #6

0

Показать файл

Файл: rules.py Проект: cristianvasquez/thingies_gym

 def action_spec(self) -> specs.DiscreteArray:
     """Returns the action spec."""
     return specs.DiscreteArray(dtype=np.int,
                                num_values=len(list(Action)),
                                name="action")

Пример #7

0

Показать файл

Файл: open_spiel_wrapper.py Проект: vishalbelsare/acme

 def action_spec(self) -> specs.DiscreteArray:
     return specs.DiscreteArray(
         self._environment.game.num_distinct_actions())

Пример #8

0

Показать файл

Файл: builder.py Проект: google-research/google-research

def discretize_spec(spec, num_actions):
    assert isinstance(spec.actions, specs.BoundedArray)
    return spec._replace(actions=specs.DiscreteArray(num_actions))

Пример #9

0

Показать файл

Файл: mazeenv.py Проект: srsohn/mtsgi

 def action_spec(self) -> types.NestedSpec:
   return specs.DiscreteArray(
       num_values=self.max_task,
       dtype=np.int32
   )

Пример #10

0

Показать файл

    def init(self, params):

        if not _TF_USE_GPU:
            tf.config.set_visible_devices([], 'GPU')
        tf.config.threading.set_inter_op_parallelism_threads(_TF_NUM_THREADS)
        tf.config.threading.set_intra_op_parallelism_threads(_TF_NUM_THREADS)

        if params.seed:
            agent_seed = params.seed + sum([ord(c) for c in params.name])
            random.seed(agent_seed)
            np.random.seed(agent_seed)
            tf.random.set_seed(agent_seed)

        # Internalize params.
        self._params = params

        self._name = params.name

        # Whether learning stopped.
        self._stop = False

        # Define specs. Everything needs to be single precision by default.
        observation_spec = specs.Array(shape=(params.states.rank, ),
                                       dtype=np.float32,
                                       name='obs')
        action_spec = specs.DiscreteArray(dtype=np.int32,
                                          num_values=params.actions.depth,
                                          name="action")
        reward_spec = specs.Array(shape=(), dtype=np.float32, name='reward')
        discount_spec = specs.BoundedArray(shape=(),
                                           dtype=np.float32,
                                           minimum=0.,
                                           maximum=1.,
                                           name='discount')

        env_spec = specs.EnvironmentSpec(observations=observation_spec,
                                         actions=action_spec,
                                         rewards=reward_spec,
                                         discounts=discount_spec)

        # Logger.
        dir_path = f'{params.exp_path}/logs/{self._name}'
        self._logger = make_default_logger(directory=dir_path,
                                           label=self._name)
        agent_logger = make_default_logger(directory=dir_path,
                                           label=f'{self._name}-learning')

        network = Network(num_actions=env_spec.actions.num_values,
                          rnn_hidden_size=params.rnn_hidden_size,
                          head_layers=params.head_layers)

        self.agent = acme_agent.R2D2(
            environment_spec=env_spec,
            network=network,
            batch_size=params.batch_size,
            samples_per_insert=params.samples_per_insert,
            burn_in_length=params.burn_in_length,
            trace_length=params.trace_length,
            replay_period=params.replay_period,
            min_replay_size=params.min_replay_size,
            max_replay_size=params.max_replay_size,
            discount=params.discount_factor,
            prefetch_size=params.prefetch_size,
            target_update_period=params.target_update_period,
            importance_sampling_exponent=params.importance_sampling_exponent,
            priority_exponent=params.priority_exponent,
            epsilon_init=params.epsilon_init,
            epsilon_final=params.epsilon_final,
            epsilon_schedule_timesteps=params.epsilon_schedule_timesteps,
            learning_rate=params.learning_rate,
            store_lstm_state=params.store_lstm_state,
            max_priority_weight=params.max_priority_weight,
            logger=agent_logger,
            checkpoint=False,
        )

        # Observations counter.
        self._obs_counter = 0

Пример #11

0

Показать файл

Файл: minerl_wrapper.py Проект: dzorlu/acme

 def action_spec(self):
   return specs.DiscreteArray(num_values=self.num_actions)

Python DiscreteArray примеры использования