def test_agent_raises_with_improper_number_of_features(self):
        env = test_util.DummyEnv()

        single_feature_params = classifier_agents.ScoringAgentParams(
            default_action_fn=env.action_space.sample, feature_keys=["x"]
        )

        many_feature_params = classifier_agents.ScoringAgentParams(
            default_action_fn=env.action_space.sample, feature_keys=["x", "y"]
        )

        no_feature_params = classifier_agents.ScoringAgentParams(
            default_action_fn=env.action_space.sample, feature_keys=[]
        )

        initialize = functools.partial(
            classifier_agents.ThresholdAgent,
            action_space=env.action_space,
            observation_space=env.observation_space,
            reward_fn=rewards.BinarizedScalarDeltaReward("x"),
        )

        agent = initialize(params=single_feature_params)
        # This should succeed.
        agent.act(env.observation_space.sample(), done=False)

        agent = initialize(params=many_feature_params)
        with self.assertRaises(ValueError):
            agent.act(env.observation_space.sample(), done=False)

        agent = initialize(params=no_feature_params)
        with self.assertRaises(ValueError):
            agent.act(env.observation_space.sample(), done=False)
    def test_oracle_maxutil_classifier_is_stable(self):
        env = lending.DelayedImpactEnv()

        agent_params = classifier_agents.ScoringAgentParams(
            feature_keys=["applicant_features"],
            group_key="group",
            default_action_fn=(lambda: 1),
            burnin=1,
            threshold_policy=threshold_policies.ThresholdPolicy.
            SINGLE_THRESHOLD,
            convert_one_hot_to_integer=True,
            cost_matrix=params.CostMatrix(fn=0,
                                          fp=-1,
                                          tp=env.initial_params.interest_rate,
                                          tn=0),
        )

        agent = oracle_lending_agent.OracleThresholdAgent(
            action_space=env.action_space,
            observation_space=env.observation_space,
            reward_fn=rewards.BinarizedScalarDeltaReward("bank_cash"),
            params=agent_params,
            env=env,
        )

        test_util.run_test_simulation(env=env, agent=agent)
        # Drop 0 threshold associated with burn-in.
        first_nonzero_threshold = None
        for thresh in agent.global_threshold_history:
            if thresh > 0:
                if first_nonzero_threshold is None:
                    first_nonzero_threshold = thresh
                self.assertAlmostEqual(first_nonzero_threshold, thresh)
        # Make sure there is at least one non-zero threshold.
        self.assertIsNotNone(first_nonzero_threshold)
    def test_threshold_history_is_recorded(self):
        observation_space = gym.spaces.Dict(
            {
                "x": gym.spaces.Box(low=0, high=1, shape=(1,), dtype=np.float32),
                "group": gym.spaces.MultiDiscrete([1]),
            }
        )
        observation_space.seed(100)

        params = classifier_agents.ScoringAgentParams(
            default_action_fn=lambda: 0,
            feature_keys=["x"],
            group_key="group",
            burnin=0,
            threshold_policy=threshold_policies.ThresholdPolicy.EQUALIZE_OPPORTUNITY,
        )

        agent = classifier_agents.ThresholdAgent(
            observation_space=observation_space,
            reward_fn=rewards.BinarizedScalarDeltaReward("x"),
            params=params,
        )

        for _ in range(10):
            agent.act(observation_space.sample(), False)

        self.assertLen(agent.global_threshold_history, 10)
        self.assertTrue(agent.group_specific_threshold_history)
        for _, history in agent.group_specific_threshold_history.items():
            # Takes 2 extra steps (one to observe features and one to observe label)
            # before any learned group-specific threshold is available.
            self.assertLen(history, 8)
    def test_skip_retraining_fn(self):
        env = test_util.DummyEnv()
        burnin = 10

        def _skip_retraining(action, observation):
            """Always skip retraining."""
            del action, observation
            return True

        params = classifier_agents.ScoringAgentParams(
            burnin=burnin,
            freeze_classifier_after_burnin=False,
            default_action_fn=env.action_space.sample,
            feature_keys=["x"],
            skip_retraining_fn=_skip_retraining,
        )

        agent = classifier_agents.ThresholdAgent(
            action_space=env.action_space,
            observation_space=env.observation_space,
            reward_fn=rewards.BinarizedScalarDeltaReward("x"),
            params=params,
        )

        for _ in range(burnin + 1):
            self.assertFalse(agent.frozen)
            _ = agent.act(env.observation_space.sample(), False)

        self.assertFalse(agent.frozen)  # Agent is not frozen.
        self.assertFalse(agent.global_threshold)  # Agent has not learned.
    def test_interact_with_env_replicable(self):
        env = test_util.DummyEnv()
        params = classifier_agents.ScoringAgentParams(
            default_action_fn=env.action_space.sample, feature_keys=["x"], burnin=5
        )

        agent = classifier_agents.ClassifierAgent(
            action_space=env.action_space,
            observation_space=env.observation_space,
            reward_fn=rewards.BinarizedScalarDeltaReward("x"),
            params=params,
        )
        test_util.run_test_simulation(env=env, agent=agent)
    def test_agent_can_learn_different_thresholds(self):

        observation_space = gym.spaces.Dict(
            {
                "x": gym.spaces.Box(low=0, high=1, shape=(1,), dtype=np.float32),
                "group": gym.spaces.Discrete(2),
            }
        )

        params = classifier_agents.ScoringAgentParams(
            default_action_fn=lambda: 0,
            feature_keys=["x"],
            group_key="group",
            threshold_policy=threshold_policies.ThresholdPolicy.EQUALIZE_OPPORTUNITY,
        )

        rng = np.random.RandomState(100)

        agent = classifier_agents.ThresholdAgent(
            observation_space=observation_space,
            reward_fn=rewards.BinarizedScalarDeltaReward("x"),
            params=params,
            rng=rng,
        )

        # Train over the whole range of observations. Expect slightly different
        # thresholds to be learned.
        for observation in rng.rand(100):
            for group in [0, 1]:
                agent._act_impl(
                    {"x": np.array([observation]), "group": np.array([group])},
                    reward=observation > 0.5 + 0.1 * group,
                    done=False,
                )

        agent.frozen = True

        actions = {}
        for group in [0, 1]:
            actions[group] = []
            for observation in np.linspace(0, 1, 1000):
                actions[group].append(
                    agent.act(
                        {"x": np.array([observation]), "group": np.array([group])}, done=False
                    )
                )

        # The two groups are classified with different policies so they are not
        # exactly equal.
        self.assertNotEqual(actions[0], actions[1])
        self.assertLen(agent.group_specific_thresholds, 2)
    def test_one_hot_conversion(self):
        observation_space = gym.spaces.Dict({"x": multinomial.Multinomial(10, 1)})

        params = classifier_agents.ScoringAgentParams(
            default_action_fn=lambda: 0,
            feature_keys=["x"],
            convert_one_hot_to_integer=True,
            threshold_policy=threshold_policies.ThresholdPolicy.SINGLE_THRESHOLD,
        )

        agent = classifier_agents.ThresholdAgent(
            observation_space=observation_space, reward_fn=rewards.NullReward(), params=params
        )

        self.assertEqual(agent._get_features({"x": _one_hot(5)}), [5])
    def test_agent_trains_with_two_features(self):
        params = classifier_agents.ScoringAgentParams(
            default_action_fn=lambda: 0, feature_keys=["x", "y"], burnin=200
        )

        agent = classifier_agents.ClassifierAgent(
            action_space=gym.spaces.Discrete(2),
            observation_space=gym.spaces.Dict(
                {
                    "x": gym.spaces.Box(low=-np.inf, high=np.inf, shape=[1]),
                    "y": gym.spaces.Box(low=-np.inf, high=np.inf, shape=[1]),
                }
            ),
            reward_fn=rewards.BinarizedScalarDeltaReward("x"),
            params=params,
        )

        # Train with points that are nearly separable but have some overlap between
        # 0.3 and 0.4 with 1s in the lower region and 0s on the higher region.
        # A linear transform of x -> -x is expected to be learned so that a
        # threshold classifier can be successful.
        # `y` is the relevant feature. `x` is a constant.
        const = np.array([1])

        for observation in np.linspace(0, 0.4, 100):
            agent._act_impl({"y": np.array([observation]), "x": const}, reward=1, done=False)

        for observation in np.linspace(0.3, 0.8, 100):
            agent._act_impl({"y": np.array([observation]), "x": const}, reward=0, done=False)

        # Add a positive point at the top of the range so that the training labels
        # are not fit perfectly by a threshold.
        agent._act_impl({"y": np.array([0.9]), "x": const}, reward=1, done=False)

        agent.frozen = True
        actions = []
        for obs in np.linspace(0, 0.95, 100):
            actions.append(agent.act({"y": np.array([obs]), "x": const}, done=False))

        # Assert some actions are 0 and some are 1.
        self.assertSameElements(actions, {0, 1})
        # Assert actions are reverse-sorted - i.e., 1s followed by 0s.
        self.assertSequenceEqual(actions, sorted(actions, reverse=True))
    def test_frozen_classifier_never_trains(self):
        env = test_util.DummyEnv()
        params = classifier_agents.ScoringAgentParams(
            burnin=0, default_action_fn=env.action_space.sample, feature_keys=["x"]
        )

        agent = classifier_agents.ThresholdAgent(
            action_space=env.action_space,
            observation_space=env.observation_space,
            reward_fn=rewards.BinarizedScalarDeltaReward("x"),
            params=params,
            frozen=True,
        )
        # Initialize global_threshold with a distinctive value.
        agent.global_threshold = 0.123

        # Run for some number of steps, global_threshold should not change.
        for _ in range(10):
            agent.act(env.observation_space.sample(), False)
        self.assertEqual(agent.global_threshold, 0.123)
    def test_insufficient_burnin_raises(self):
        env = test_util.DummyEnv()
        burnin = 5
        params = classifier_agents.ScoringAgentParams(
            default_action_fn=env.action_space.sample, feature_keys=["x"], burnin=burnin
        )

        agent = classifier_agents.ClassifierAgent(
            action_space=env.action_space,
            observation_space=env.observation_space,
            reward_fn=rewards.BinarizedScalarDeltaReward("x"),
            params=params,
        )

        # Only give positive points to train.
        for _ in range(burnin):
            agent._act_impl(env.observation_space.sample(), reward=1, done=False)

        # Should raise a ValueError since the burnin has passed and the classifier
        # cannot train to make a decision.
        with self.assertRaises(ValueError):
            agent._act_impl(env.observation_space.sample(), reward=1, done=False)
    def test_agent_seed(self):
        env = test_util.DummyEnv()

        params = classifier_agents.ScoringAgentParams(
            burnin=10,
            freeze_classifier_after_burnin=False,
            default_action_fn=env.action_space.sample,
            feature_keys=["x"],
        )

        agent = classifier_agents.ThresholdAgent(
            action_space=env.action_space,
            observation_space=env.observation_space,
            reward_fn=rewards.BinarizedScalarDeltaReward("x"),
            params=params,
        )

        agent.seed(100)
        a = agent.rng.randint(0, 1000)
        agent.seed(100)
        b = agent.rng.randint(0, 1000)
        self.assertEqual(a, b)
    def test_freeze_after_burnin(self):
        env = test_util.DummyEnv()
        burnin = 10
        params = classifier_agents.ScoringAgentParams(
            burnin=burnin,
            freeze_classifier_after_burnin=True,
            default_action_fn=env.action_space.sample,
            feature_keys=["x"],
        )

        agent = classifier_agents.ThresholdAgent(
            action_space=env.action_space,
            observation_space=env.observation_space,
            reward_fn=rewards.BinarizedScalarDeltaReward("x"),
            params=params,
        )

        for _ in range(burnin + 1):
            self.assertFalse(agent.frozen)
            _ = agent.act(env.observation_space.sample(), False)

        self.assertTrue(agent.frozen)
        self.assertTrue(agent.global_threshold)  # Agent has learned something.
示例#13
0
    def scenario_builder(self):
        """Returns an agent and environment pair."""
        env_params = lending_params.DelayedImpactParams(
            applicant_distribution=lending_params.two_group_credit_clusters(
                cluster_probabilities=self.cluster_probabilities,
                group_likelihoods=[self.group_0_prob, 1 - self.group_0_prob],
            ),
            bank_starting_cash=self.bank_starting_cash,
            interest_rate=self.interest_rate,
            cluster_shift_increment=self.cluster_shift_increment,
        )
        env = lending.DelayedImpactEnv(env_params)

        agent_params = classifier_agents.ScoringAgentParams(
            feature_keys=["applicant_features"],
            group_key="group",
            default_action_fn=(lambda: 1),
            burnin=self.burnin,
            convert_one_hot_to_integer=True,
            threshold_policy=self.threshold_policy,
            skip_retraining_fn=lambda action, observation: action == 0,
            cost_matrix=params.CostMatrix(fn=0,
                                          fp=-1,
                                          tp=env_params.interest_rate,
                                          tn=0),
        )

        agent = oracle_lending_agent.OracleThresholdAgent(
            action_space=env.action_space,
            reward_fn=rewards.BinarizedScalarDeltaReward(
                "bank_cash", baseline=env.initial_params.bank_starting_cash),
            observation_space=env.observation_space,
            params=agent_params,
            env=env,
        )
        agent.seed(100)
        return env, agent
    def test_agent_on_one_hot_vectors(self):

        # Space of 1-hot vectors of length 10.
        observation_space = gym.spaces.Dict({"x": multinomial.Multinomial(10, 1)})

        params = classifier_agents.ScoringAgentParams(
            default_action_fn=lambda: 0,
            feature_keys=["x"],
            convert_one_hot_to_integer=True,
            burnin=999,
            threshold_policy=threshold_policies.ThresholdPolicy.SINGLE_THRESHOLD,
        )

        agent = classifier_agents.ThresholdAgent(
            observation_space=observation_space, reward_fn=rewards.NullReward(), params=params
        )

        observation_space.seed(100)
        # Train a boundary at 3 using 1-hot vectors.
        observation = observation_space.sample()
        agent._act_impl(observation, reward=None, done=False)
        for _ in range(1000):
            last_observation = observation
            observation = observation_space.sample()
            agent._act_impl(
                observation, reward=int(np.argmax(last_observation["x"]) >= 3), done=False
            )
            if agent._training_corpus.examples:
                assert (
                    int(agent._training_corpus.examples[-1].features[0] >= 3)
                    == agent._training_corpus.examples[-1].label
                )

        agent.frozen = True

        self.assertTrue(agent.act({"x": _one_hot(3)}, done=False))
        self.assertFalse(agent.act({"x": _one_hot(2)}, done=False))
    def test_agent_trains(self):
        env = test_util.DummyEnv()
        params = classifier_agents.ScoringAgentParams(
            burnin=200, default_action_fn=env.action_space.sample, feature_keys=["x"]
        )

        agent = classifier_agents.ThresholdAgent(
            action_space=env.action_space,
            observation_space=env.observation_space,
            reward_fn=rewards.BinarizedScalarDeltaReward("x"),
            params=params,
        )

        # Train with points that are nearly separable but have some overlap between
        # 0.3 and 0.4.
        for observation in np.linspace(0, 0.4, 100):
            agent._act_impl({"x": np.array([observation])}, reward=0, done=False)

        for observation in np.linspace(0.3, 0.8, 100):
            agent._act_impl({"x": np.array([observation])}, reward=1, done=False)

        # Add a negative point at the top of the range so that the training labels
        # are not fit perfectly by a threshold.
        agent._act_impl({"x": np.array([0.9])}, reward=0, done=False)

        agent.frozen = True
        actions = [
            agent.act({"x": np.array([obs])}, done=False) for obs in np.linspace(0, 0.95, 100)
        ]

        # Assert some actions are 0 and some are 1.
        self.assertSameElements(actions, {0, 1})
        # Assert actions are sorted - i.e., 0s followed by 1s.
        self.assertSequenceEqual(actions, sorted(actions))

        self.assertGreater(agent.global_threshold, 0)
        self.assertFalse(agent.group_specific_thresholds)
    def test_agent_trains(self):
        env = test_util.DummyEnv()
        params = classifier_agents.ScoringAgentParams(
            default_action_fn=env.action_space.sample, feature_keys=["x"], burnin=200
        )

        agent = classifier_agents.ClassifierAgent(
            action_space=env.action_space,
            observation_space=env.observation_space,
            reward_fn=rewards.BinarizedScalarDeltaReward("x"),
            params=params,
        )

        # Train with points that are nearly separable but have some overlap between
        # 0.3 and 0.4 with 1s in the lower region and 0s on the higher region.
        # A linear transform of x -> -x is expected to be learned so that a
        # threshold classifier can be successful.
        for observation in np.linspace(0, 0.4, 100):
            agent._act_impl({"x": np.array([observation])}, reward=1, done=False)

        for observation in np.linspace(0.3, 0.8, 100):
            agent._act_impl({"x": np.array([observation])}, reward=0, done=False)

        # Add a positive point at the top of the range so that the training labels
        # are not fit perfectly by a threshold.
        agent._act_impl({"x": np.array([0.9])}, reward=1, done=False)

        agent.frozen = True
        actions = [
            agent.act({"x": np.array([obs])}, done=False) for obs in np.linspace(0, 0.95, 100)
        ]

        # Assert some actions are 0 and some are 1.
        self.assertSameElements(actions, {0, 1})
        # Assert actions are reverse-sorted - i.e., 1s followed by 0s.
        self.assertSequenceEqual(actions, sorted(actions, reverse=True))
    def test_oracle_lending_agent_interacts(self):
        env = lending.DelayedImpactEnv()

        agent_params = classifier_agents.ScoringAgentParams(
            feature_keys=["applicant_features"],
            group_key="group",
            default_action_fn=(lambda: 1),
            burnin=1,
            convert_one_hot_to_integer=True,
            cost_matrix=params.CostMatrix(fn=0,
                                          fp=-1,
                                          tp=env.initial_params.interest_rate,
                                          tn=0),
        )

        agent = oracle_lending_agent.OracleThresholdAgent(
            action_space=env.action_space,
            observation_space=env.observation_space,
            reward_fn=rewards.BinarizedScalarDeltaReward("bank_cash"),
            params=agent_params,
            env=env,
        )

        test_util.run_test_simulation(env=env, agent=agent)