Пример #1
0
    def _act_impl(self, observation, reward, done):
        """Returns an action from `self.action_space`.

    Args:
      observation: An observation in self.observation_space.
      reward: A scalar value that can be used as a supervising signal.
      done: A boolean indicating whether the episode is over.

    Raises:
      core.EpisodeDoneError if `done` is True.
      core.InvalidObservationError if observation is not in
        `self.observation_space`.
      core.InvalidRewardError if reward is not a scalar or None.
    """
        if done:
            raise core.EpisodeDoneError("Called act on a done episode.")

        if not self.observation_space.contains(observation):
            raise core.InvalidObservationError("Invalid observation: %s" %
                                               observation)

        core.validate_reward(reward)
        # Use `_sample_from` so that the randomness comes from the agent's random
        # state rather than the action_space's random_state may be changed by other
        # parties.
        return self.sample_from(self.action_space)
Пример #2
0
    def _act_impl(self, observation, reward, done):
        """Returns a treatment action.

    Args:
      observation: An observation in Dict Space with 'population' and
        'population_graph' keys.
      reward: A scalar float reward value.
      done: A boolean indicating whether the simulation has finished.

    Returns:
      A numpy ndarray containing population indices that represents a treatment
      action.
    """
        if done:
            raise core.EpisodeDoneError('Called act on a done episode.')
        if not self.observation_space.contains(observation):
            raise core.InvalidObservationError('Invalid observation: %s.' %
                                               observation)

        return self._triage(observation)
Пример #3
0
  def _act_impl(self, observation, reward,
                done):
    """Returns an action from `self.action_space`.

    Args:
      observation: An observation in self.observation_space.
      reward: A scalar value that can be used as a supervising signal.
      done: A boolean indicating whether the episode is over.

    Raises:
      core.EpisodeDoneError if `done` is True.
      core.InvalidObservationError if observation is not in
        `self.observation_space`.
    """
    if done:
      raise core.EpisodeDoneError('Called act on a done episode.')

    if not self.observation_space.contains(observation):
      raise core.InvalidObservationError('Invalid observation: %s' %
                                         observation)

    features = self._feature_selection_fn(observation)
    labels = self._label_fn(observation)

    # No need to build up a dataset of observations when the model is frozen.
    if not self._freeze_classifier():
      # Update dataset and fit new model
      self._update_dataset(features, labels)
      self._train_model()
    # Since this method is called afetr initial action in the first step.
    self._steps += 1
    if self._steps < self._burnin:
      return {
          'threshold': np.asarray(self._initial_threshold),
          'epsilon_prob': np.asarray(self._get_epsilon_prob())
      }

    return {
        'threshold': np.asarray(self._threshold),
        'epsilon_prob': np.asarray(self._get_epsilon_prob())
    }
Пример #4
0
    def _act_impl(self, observation, reward, done):
        """Returns a fixed threshold.

    Args:
      observation: An observation in self.observation_space.
      reward: A scalar value that can be used as a supervising signal.
      done: A boolean indicating whether the episode is over.

    Raises:
      core.EpisodeDoneError if `done` is True.
      core.InvalidObservationError if observation is not in
        `self.observation_space`.
    """
        if done:
            raise core.EpisodeDoneError("Called act on a done episode.")

        if not self.observation_space.contains(observation):
            raise core.InvalidObservationError("Invalid observation: %s" %
                                               observation)
        self._steps += 1
        return self.initial_action()
Пример #5
0
    def _act_impl(self, observation, reward, done):
        """Returns an action from 'self.action_space'.

    Args:
      observation: An observation in self.observation_space.
      reward: A scalar value that can be used as a supervising signal.
      done: A boolean indicating whether the episode is over.

    Returns:
      An action from self.action space.

    Raises:
      core.EpisodeDoneError if `done` is True.
      core.InvalidObservationError if observation is not contained in
        'self.observation_space'.
      gym.error.InvalidAction if the generated action to return is not contained
        in 'self.action_space'.
    """
        if done:
            raise core.EpisodeDoneError("Called act on a done episode.")

        if not self.observation_space.contains(observation):
            raise core.InvalidObservationError("Invalid ovservation: %s" %
                                               observation)
        if self.params.observation_adjustment_fn:
            observation = self.params.observation_adjustment_fn(
                self.rng, self.beliefs, observation)

        features = self.feature_selection_fn(observation)
        self.beliefs = self._update_beliefs(features, self.beliefs)
        action = self._allocate(self._n_resource, self.beliefs)

        if not self.action_space.contains(action):
            raise gym.error.InvalidAction("Invalid action: %s" % action)

        return action