def _act_impl(self, observation, reward, done): """Returns an action from `self.action_space`. Args: observation: An observation in self.observation_space. reward: A scalar value that can be used as a supervising signal. done: A boolean indicating whether the episode is over. Raises: core.EpisodeDoneError if `done` is True. core.InvalidObservationError if observation is not in `self.observation_space`. core.InvalidRewardError if reward is not a scalar or None. """ if done: raise core.EpisodeDoneError("Called act on a done episode.") if not self.observation_space.contains(observation): raise core.InvalidObservationError("Invalid observation: %s" % observation) core.validate_reward(reward) # Use `_sample_from` so that the randomness comes from the agent's random # state rather than the action_space's random_state may be changed by other # parties. return self.sample_from(self.action_space)
def _act_impl(self, observation, reward, done): """Returns a treatment action. Args: observation: An observation in Dict Space with 'population' and 'population_graph' keys. reward: A scalar float reward value. done: A boolean indicating whether the simulation has finished. Returns: A numpy ndarray containing population indices that represents a treatment action. """ if done: raise core.EpisodeDoneError('Called act on a done episode.') if not self.observation_space.contains(observation): raise core.InvalidObservationError('Invalid observation: %s.' % observation) return self._triage(observation)
def _act_impl(self, observation, reward, done): """Returns an action from `self.action_space`. Args: observation: An observation in self.observation_space. reward: A scalar value that can be used as a supervising signal. done: A boolean indicating whether the episode is over. Raises: core.EpisodeDoneError if `done` is True. core.InvalidObservationError if observation is not in `self.observation_space`. """ if done: raise core.EpisodeDoneError('Called act on a done episode.') if not self.observation_space.contains(observation): raise core.InvalidObservationError('Invalid observation: %s' % observation) features = self._feature_selection_fn(observation) labels = self._label_fn(observation) # No need to build up a dataset of observations when the model is frozen. if not self._freeze_classifier(): # Update dataset and fit new model self._update_dataset(features, labels) self._train_model() # Since this method is called afetr initial action in the first step. self._steps += 1 if self._steps < self._burnin: return { 'threshold': np.asarray(self._initial_threshold), 'epsilon_prob': np.asarray(self._get_epsilon_prob()) } return { 'threshold': np.asarray(self._threshold), 'epsilon_prob': np.asarray(self._get_epsilon_prob()) }
def _act_impl(self, observation, reward, done): """Returns a fixed threshold. Args: observation: An observation in self.observation_space. reward: A scalar value that can be used as a supervising signal. done: A boolean indicating whether the episode is over. Raises: core.EpisodeDoneError if `done` is True. core.InvalidObservationError if observation is not in `self.observation_space`. """ if done: raise core.EpisodeDoneError("Called act on a done episode.") if not self.observation_space.contains(observation): raise core.InvalidObservationError("Invalid observation: %s" % observation) self._steps += 1 return self.initial_action()
def _act_impl(self, observation, reward, done): """Returns an action from 'self.action_space'. Args: observation: An observation in self.observation_space. reward: A scalar value that can be used as a supervising signal. done: A boolean indicating whether the episode is over. Returns: An action from self.action space. Raises: core.EpisodeDoneError if `done` is True. core.InvalidObservationError if observation is not contained in 'self.observation_space'. gym.error.InvalidAction if the generated action to return is not contained in 'self.action_space'. """ if done: raise core.EpisodeDoneError("Called act on a done episode.") if not self.observation_space.contains(observation): raise core.InvalidObservationError("Invalid ovservation: %s" % observation) if self.params.observation_adjustment_fn: observation = self.params.observation_adjustment_fn( self.rng, self.beliefs, observation) features = self.feature_selection_fn(observation) self.beliefs = self._update_beliefs(features, self.beliefs) action = self._allocate(self._n_resource, self.beliefs) if not self.action_space.contains(action): raise gym.error.InvalidAction("Invalid action: %s" % action) return action