def test_confusion_metric_correct_for_sequence_prediction_rule(self): dim = 10 def _ground_truth_fn(history_item): state, _ = history_item return state.x env = test_util.DeterministicDummyEnv(test_util.DummyParams(dim=dim)) env.set_scalar_reward(rewards.NullReward()) # Always predict a sequence of 1s. metric = error_metrics.ConfusionMetric( env=env, prediction_fn=lambda x: [1 for _ in range(dim)], ground_truth_fn=_ground_truth_fn, stratify_fn=lambda x: [1 for _ in range(dim)], ) measurement = test_util.run_test_simulation(env=env, agent=None, metric=metric) logging.info("Measurement: %s.", measurement) self.assertEqual(measurement[1].fp, 50) self.assertEqual(measurement[1].tp, 50) self.assertNotIn(0, measurement)
def test_confusion_metric_correct_for_atomic_prediction_rule(self): def _ground_truth_fn(history_item): state, _ = history_item return state.x[0] env = test_util.DeterministicDummyEnv(test_util.DummyParams(dim=1)) env.set_scalar_reward(rewards.NullReward()) # Always predict 1. metric = error_metrics.ConfusionMetric( env=env, prediction_fn=lambda x: 1, ground_truth_fn=_ground_truth_fn, stratify_fn=lambda x: 1, ) measurement = test_util.run_test_simulation(env=env, agent=None, metric=metric) logging.info("Measurement: %s.", measurement) # The keys in measurement are given by group membership, which in this case # is defined to always be 1. self.assertEqual(measurement[1].fp, 5) self.assertEqual(measurement[1].tp, 5) self.assertNotIn(0, measurement)
def instantiate_environment_and_agent( agent_class, population_graph, initial_health_state, infection_probability=0.5, num_treatments=5, max_treatments=10, seed=100, agent_seed=50, ): env = infectious_disease.build_si_model( population_graph=population_graph, infection_probability=infection_probability, num_treatments=num_treatments, initial_health_state=initial_health_state, max_treatments=max_treatments, ) agent = agent_class( env.action_space, rewards.NullReward(), env.observation_space, infectious_disease_agents.env_to_agent_params(env.initial_params), ) env.seed(seed) agent.seed(agent_seed) _ = env.reset() return env, agent
def __init__( self, action_space, reward_fn, observation_space, threshold = 0.5, epsilon_greedy = False, initial_epsilon_prob = 0.7, decay_steps = 10, epsilon_prob_decay_rate = 0.02, ): """Initializes the agent. Args: action_space: a `gym.Space` that contains valid actions. reward_fn: a `RewardFn` object. observation_space: a `gym.Space` that contains valid observations. threshold: Fixed threshold. epsilon_greedy: Bool. Whether we want this agent to follow an epsilon greedy policy. initial_epsilon_prob: Float. Initial value of probablity for an epsilon greedy agent. decay_steps: A positive integer. epsilon_prob_decay_rate: A positive float. """ if reward_fn is None: reward_fn = rewards.NullReward() super(FixedJury, self).__init__(action_space, reward_fn, observation_space) self._threshold = threshold self._epsilon_greedy = epsilon_greedy self._initial_epsilon_prob = initial_epsilon_prob self._decay_rate = epsilon_prob_decay_rate self._decay_steps = decay_steps self._steps = 0 self.rng = np.random.RandomState()
def scenario_builder(self): """Returns an agent and environment pair.""" graph = GRAPHS[self.graph_name] env = infectious_disease.build_sir_model( population_graph=graph, infection_probability=self.infection_probability, infected_exit_probability=self.infected_exit_probability, num_treatments=self.num_treatments, max_treatments=1, burn_in=self.burn_in, # Treatments turn susceptible people into recovered without having them # get sick. treatment_transition_matrix=np.array([[0, 0, 1], [0, 1, 0], [0, 0, 1]]), # Everybody starts out healthy. initial_health_state=[0] * graph.number_of_nodes(), initial_health_state_seed=self.env_seed, ) agent = self.agent_constructor( env.action_space, rewards.NullReward(), env.observation_space, params=infectious_disease_agents.env_to_agent_params( env.initial_params), ) return env, agent
def test_final_credit_distribution_metric_can_interact_with_lending(self): env = lending.DelayedImpactEnv() env.set_scalar_reward(rewards.NullReward()) # Use step=-1 to get the final credit distribution. final_distribution = lending_metrics.CreditDistribution(env, step=-1) initial_distribution = lending_metrics.CreditDistribution(env, step=0) test_util.run_test_simulation( env=env, metric=[final_distribution, initial_distribution])
def __init__(self, action_space, reward_fn, observation_space, params): self.initial_params = copy.deepcopy(params) if reward_fn is None: reward_fn = rewards.NullReward() super(_BaseAgent, self).__init__(action_space, reward_fn, observation_space) self.rng = np.random.RandomState()
def test_accuracy_metric_can_interact_with_dummy(self): def _is_zero(history_item): _, action = history_item return int(action == 0) env = test_util.DummyEnv() env.set_scalar_reward(rewards.NullReward()) metric = error_metrics.AccuracyMetric(env=env, numerator_fn=_is_zero) test_util.run_test_simulation(env=env, metric=metric)
def _setup_test_simulation(dim=1, calc_mean=False, modifier_fn=_modifier_fn): env = test_util.DeterministicDummyEnv(test_util.DummyParams(dim=dim)) env.set_scalar_reward(rewards.NullReward()) metric = value_tracking_metrics.AggregatorMetric( env=env, modifier_fn=modifier_fn, selection_fn=_selection_fn, stratify_fn=_stratify_fn, calc_mean=calc_mean) return env, metric
def test_summing_metric_give_correct_sum_dummy_env(self): env = test_util.DeterministicDummyEnv(test_util.DummyParams(dim=1)) env.set_scalar_reward(rewards.NullReward()) metric = value_tracking_metrics.SummingMetric( env=env, selection_fn=_selection_fn) measurement = test_util.run_test_simulation(env, agent=None, metric=metric, seed=0) self.assertTrue(np.all(np.equal(measurement, [5])))
def test_recall_with_zero_denominator(self): env = test_util.DeterministicDummyEnv(test_util.DummyParams(dim=1)) env.set_scalar_reward(rewards.NullReward()) # Ground truth is always 0, recall will have a zero denominator. metric = error_metrics.RecallMetric( env=env, prediction_fn=lambda x: 0, ground_truth_fn=lambda x: 0, stratify_fn=lambda x: 1) measurement = test_util.run_test_simulation( env=env, agent=None, metric=metric, num_steps=50) self.assertEqual({1: 0}, measurement)
def test_one_hot_conversion(self): observation_space = gym.spaces.Dict({"x": multinomial.Multinomial(10, 1)}) params = classifier_agents.ScoringAgentParams( default_action_fn=lambda: 0, feature_keys=["x"], convert_one_hot_to_integer=True, threshold_policy=threshold_policies.ThresholdPolicy.SINGLE_THRESHOLD, ) agent = classifier_agents.ThresholdAgent( observation_space=observation_space, reward_fn=rewards.NullReward(), params=params ) self.assertEqual(agent._get_features({"x": _one_hot(5)}), [5])
def test_precision_with_zero_denominator(self): def _ground_truth_fn(history_item): state, _ = history_item return state.x[0] env = test_util.DeterministicDummyEnv(test_util.DummyParams(dim=1)) env.set_scalar_reward(rewards.NullReward()) # Always predict 0, precision will have a zero denominator. metric = error_metrics.PrecisionMetric( env=env, prediction_fn=lambda x: 0, ground_truth_fn=_ground_truth_fn, stratify_fn=lambda x: 1) measurement = test_util.run_test_simulation( env=env, agent=None, metric=metric, num_steps=50) self.assertEqual({1: 0}, measurement)
def test_recall_metric_correct_for_atomic_prediction_rule(self): def _ground_truth_fn(history_item): state, _ = history_item return state.x[0] env = test_util.DeterministicDummyEnv(test_util.DummyParams(dim=1)) env.set_scalar_reward(rewards.NullReward()) # Always predict 1. metric = error_metrics.RecallMetric( env=env, prediction_fn=lambda x: 1, ground_truth_fn=_ground_truth_fn, stratify_fn=lambda x: 1) measurement = test_util.run_test_simulation( env=env, agent=None, metric=metric, num_steps=50) logging.info('Measurement: %s.', measurement) self.assertEqual({1: 1}, measurement)
def __init__(self, action_space, reward_fn, observation_space, default_action=None): """Initializes the random agent, which takes randomly sampled actions. Args: action_space: A gym.space defining the space of possible actions. reward_fn: A function that takes an observation and calculates the agents' reward. observation_space: A gym.space defining the space of possible observations. default_action: The first action of the agent when no observation is given. """ if reward_fn is None: reward_fn = rewards.NullReward() super(RandomAgent, self).__init__(action_space, reward_fn, observation_space) self.default_action = default_action
def test_cost_metric_correct_for_atomic_prediction_rule(self): def _ground_truth_fn(history_item): state, _ = history_item return state.x[0] env = test_util.DeterministicDummyEnv(test_util.DummyParams(dim=1)) env.set_scalar_reward(rewards.NullReward()) cost_metric = error_metrics.CostedConfusionMetric( env=env, prediction_fn=lambda x: 1, ground_truth_fn=_ground_truth_fn, stratify_fn=lambda x: 1, cost_matrix=params.CostMatrix(tp=1, fp=-2, tn=-1, fn=-1)) measurement = test_util.run_test_simulation( env=env, agent=None, metric=cost_metric) logging.info('Cost measurement: %s.', measurement) self.assertEqual(measurement[1], -5) self.assertNotIn(0, measurement)
def test_stratified_accuracy_metric_correct_sequence_prediction(self): """Check correctness when stratifying into (wrong, right) bins.""" def _x_select(history_item): return [i == 1 for i in history_item.state.x] def _x_stratify(history_item): return history_item.state.x env = test_util.DeterministicDummyEnv(test_util.DummyParams(dim=10)) env.set_scalar_reward(rewards.NullReward()) metric = error_metrics.AccuracyMetric( env=env, numerator_fn=_x_select, stratify_fn=_x_stratify) measurement = test_util.run_test_simulation( env=env, agent=None, metric=metric) logging.info('Measurement: %s.', measurement) self.assertEqual(measurement[0], 0) self.assertEqual(measurement[1], 1)
def __init__(self, action_space, reward_fn, observation_space, params=None): if reward_fn is None: reward_fn = rewards.NullReward() super(AllocationAgent, self).__init__(action_space, reward_fn, observation_space) if params is None: params = AllocationAgentParams() self.params = params self._n_bins = len(action_space.nvec) self.rng = np.random.RandomState() self._n_resource = self.action_space.n self.beliefs = np.zeros(self._n_bins).tolist() self.feature_selection_fn = params.feature_selection_fn or ( lambda obs: _get_added_vector_features(obs, self._n_bins) ) # type: Callable
def test_agent_on_one_hot_vectors(self): # Space of 1-hot vectors of length 10. observation_space = gym.spaces.Dict( {'x': multinomial.Multinomial(10, 1)}) params = classifier_agents.ScoringAgentParams( default_action_fn=lambda: 0, feature_keys=['x'], convert_one_hot_to_integer=True, burnin=999, threshold_policy=threshold_policies.ThresholdPolicy. SINGLE_THRESHOLD) agent = classifier_agents.ThresholdAgent( observation_space=observation_space, reward_fn=rewards.NullReward(), params=params) observation_space.seed(100) # Train a boundary at 3 using 1-hot vectors. observation = observation_space.sample() agent._act_impl(observation, reward=None, done=False) for _ in range(1000): last_observation = observation observation = observation_space.sample() agent._act_impl(observation, reward=int(np.argmax(last_observation['x']) >= 3), done=False) if agent._training_corpus.examples: assert int(agent._training_corpus.examples[-1].features[0] >= 3 ) == agent._training_corpus.examples[-1].label agent.frozen = True self.assertTrue(agent.act({'x': _one_hot(3)}, done=False)) self.assertFalse(agent.act({'x': _one_hot(2)}, done=False))
def test_stratified_accuracy_metric_correct_atomic_prediction(self): """Check correctness when stratifying into (wrong, right) bins.""" def _x_select(history_item): state, _ = history_item return int(state.x[0] == 1) def _x_stratify(history_item): state, _ = history_item return state.x[0] env = test_util.DeterministicDummyEnv() env.set_scalar_reward(rewards.NullReward()) metric = error_metrics.AccuracyMetric(env=env, numerator_fn=_x_select, stratify_fn=_x_stratify) measurement = test_util.run_test_simulation(env=env, agent=None, metric=metric) logging.info("Measurement: %s.", measurement) self.assertEqual(measurement[0], 0) self.assertEqual(measurement[1], 1)