Пример #1
0
    def test_sparse_xa_with_strings(self):
        encoder = InteractionsEncoder(["xa"])
        interactions = encoder.encode(x={"1": "z", "2": 2}, a={"1": 3, "2": 4})

        self.assertEqual(
            dict([("x1za1", 3), ("x1za2", 4), ("x2a1", 6), ("x2a2", 8)]),
            interactions)
Пример #2
0
    def test_sparse_xa_with_numeric_keys(self):
        encoder = InteractionsEncoder(["xa"])
        interactions = encoder.encode(x={1: "z", 2: 2}, a={1: 3, 2: 4})

        self.assertEqual(
            dict([("x1za1", 3), ("x1za2", 4), ("x2a1", 6), ("x2a2", 8)]),
            interactions)
Пример #3
0
    def test_sparse_xxa(self):
        encoder = InteractionsEncoder(["xxa"])
        interactions = encoder.encode(x={"1": 1, "2": 2}, a={"1": 3, "2": 4})

        self.assertEqual(
            dict([("x1x1a1", 3), ("x1x1a2", 4), ("x1x2a1", 6), ("x1x2a2", 8),
                  ("x2x2a1", 12), ("x2x2a2", 16)]), interactions)
Пример #4
0
    def test_singular_numeric_xa(self):
        encoder = InteractionsEncoder(["xa"])

        interactions1 = encoder.encode(x=(1, 2, 3), a=2)
        interactions2 = encoder.encode(x=(1, 2, 3), a=2)

        self.assertEqual([2, 4, 6], interactions1)
        self.assertEqual(interactions1, interactions2)
Пример #5
0
    def test_string_tuple(self):
        encoder = InteractionsEncoder(["xa"])

        interactions1 = encoder.encode(x=('d', 2), a=2)
        interactions2 = encoder.encode(x=('d', 2), a=2)

        self.assertEqual(dict([('x0da0', 2), ('x1a0', 4)]), interactions1)
        self.assertEqual(interactions1, interactions2)
Пример #6
0
    def test_dense_interaction_xx_encode_performance(self):
        encoder = InteractionsEncoder(["xx"])

        x = list(range(100))

        time = timeit.timeit(lambda: encoder.encode(x=x), number=100)

        #best observed was 0.03
        self.assertLess(time, 0.3)
Пример #7
0
    def test_sparse_interaction_xx_encode_performance(self):
        encoder = InteractionsEncoder(["xx"])

        x = dict(zip(map(str, range(100)), range(100)))

        time = timeit.timeit(lambda: encoder.encode(x=x), number=100)

        #best observed was 0.09
        self.assertLess(time, 0.9)
Пример #8
0
    def test_sparse_interaction_xxa_encode_performance(self):
        encoder = InteractionsEncoder(["xxa"])

        x = dict(zip(map(str, range(100)), range(100)))
        a = [1, 2, 3]

        time = timeit.timeit(lambda: encoder.encode(x=x, a=a), number=50)

        #best observed was 0.20
        self.assertLess(time, 2.0)
Пример #9
0
    def test_dense_x_a_xa_xxa(self):
        encoder = InteractionsEncoder(["x", "a", "xa", "xxa"])

        interactions1 = encoder.encode(x=[1, 2, 3], a=[1, 2])
        interactions2 = encoder.encode(x=[1, 2, 3], a=[1, 2])

        self.assertCountEqual([
            1, 2, 3, 1, 2, 1, 2, 3, 2, 4, 6, 1, 2, 3, 4, 6, 9, 2, 4, 6, 8, 12,
            18
        ], interactions1)
        self.assertEqual(interactions1, interactions2)
Пример #10
0
    def test_sparse_interaction_abc_encode_performance(self):
        encoder = InteractionsEncoder(["aabc"])

        a = dict(zip(map(str, range(100)), range(100)))
        b = [1, 2]
        c = [2, 3]

        time = timeit.timeit(lambda: encoder.encode(a=a, b=b, c=c), number=25)

        #best observed was 0.17
        self.assertLess(time, 1.7)
Пример #11
0
    def test_sparse_meta_x_a(self):
        encoder = InteractionsEncoder(["x", "a"])
        interactions = encoder.encode(x=SparseWithMeta({
            "1": 1,
            "2": 2
        }),
                                      a={
                                          "1": 3,
                                          "2": 4
                                      })

        self.assertEqual(dict([("x1", 1), ("x2", 2), ("a1", 3), ("a2", 4)]),
                         interactions)
Пример #12
0
    def test_multiple_sparse_with_empty_and_non_empty(self):
        encoder = InteractionsEncoder(['ab', 'abc'])

        interactions = encoder.encode(a={'A': 2}, b='b', c=None)

        self.assertEqual({'aAb0b': 2}, interactions)
Пример #13
0
    def test_multiple_dense_with_only_empty(self):
        encoder = InteractionsEncoder(['ab', 'abc'])

        interactions = encoder.encode(a=2, b=None, c=None)

        self.assertEqual([], interactions)
Пример #14
0
    def test_singular_string_abc(self):
        encoder = InteractionsEncoder(["abc"])

        interactions = encoder.encode(a=2, b=3, c=4)

        self.assertEqual([24], interactions)
Пример #15
0
class LinUCBLearner(Learner):
    """A contextual bandit learner that represents expected reward as a 
    linear function of context and action features. Exploration is carried
    out according to upper confidence bound estimates.
    
    This is an implementation of the Chu et al. (2011) LinUCB algorithm using the 
    `Sherman-Morrison formula`__ to iteratively calculate the inversion matrix. This 
    implementation's computational complexity is linear with respect to feature count.

    Remarks:
        The Sherman-Morrsion implementation used below is given in long form `here`__.

    References:
        Chu, Wei, Lihong Li, Lev Reyzin, and Robert Schapire. "Contextual bandits 
        with linear payoff functions." In Proceedings of the Fourteenth International 
        Conference on Artificial Intelligence and Statistics, pp. 208-214. JMLR Workshop 
        and Conference Proceedings, 2011.

    __ https://en.wikipedia.org/wiki/Sherman%E2%80%93Morrison_formula
    __ https://research.navigating-the-edge.net/assets/publications/linucb_alternate_formulation.pdf
    """

    def __init__(self, alpha: float = 1, X: Sequence[str] = ['a', 'ax']) -> None:
        """Instantiate a LinUCBLearner.

        Args:
            alpha: This parameter controls the exploration rate of the algorithm. A value of 0 will cause actions 
                to be selected based on the current best point estimate (i.e., no exploration) while a value of inf
                means that actions will be selected based solely on the bounds of the action point estimates (i.e., 
                we will always take actions that have the largest bound on their point estimate).
            X: Feature set interactions to use when calculating action value estimates. Context features
                are indicated by x's while action features are indicated by a's. For example, xaa means to cross the 
                features between context and actions and actions.
        """
        PackageChecker.numpy("LinUCBLearner.__init__")

        self._alpha = alpha

        self._X = X
        self._X_encoder = InteractionsEncoder(X)

        self._theta = None
        self._A_inv = None

    @property
    def params(self) -> Dict[str, Any]:
        return {'family': 'LinUCB', 'alpha': self._alpha, 'X': self._X}

    def predict(self, context: Context, actions: Sequence[Action]) -> Probs:

        import numpy as np #type: ignore

        if isinstance(actions[0], dict) or isinstance(context, dict):
            raise CobaException("Sparse data cannot be handled by this algorithm.")

        if not context:
            self._X_encoder = InteractionsEncoder(list(set(filter(None,[ f.replace('x','') for f in self._X]))))
 
        context = list(Flatten().filter([list(context)]))[0] if context else []
        features: np.ndarray = np.array([[1]+self._X_encoder.encode(x=context,a=action) for action in actions]).T

        if(self._A_inv is None):
            self._theta = np.zeros(features.shape[0])
            self._A_inv = np.identity(features.shape[0])

        point_estimate = self._theta @ features
        point_bounds   = np.diagonal(features.T @ self._A_inv @ features)

        action_values = point_estimate + self._alpha*np.sqrt(point_bounds)
        max_indexes   = np.where(action_values == np.amax(action_values))[0]

        return [ int(ind in max_indexes)/len(max_indexes) for ind in range(len(actions))]

    def learn(self, context: Context, action: Action, reward: float, probability: float, info: Info) -> None:

        import numpy as np

        if isinstance(action, dict) or isinstance(context, dict):
            raise CobaException("Sparse data cannot be handled by this algorithm.")

        if not context:
            self._X_encoder = InteractionsEncoder(list(set(filter(None,[ f.replace('x','') for f in self._X]))))

        context = list(Flatten().filter([list(context)]))[0] if context else []
        features: np.ndarray = np.array([1]+self._X_encoder.encode(x=context,a=action)).T

        if(self._A_inv is None):
            self._theta = np.zeros((features.shape[0]))
            self._A_inv = np.identity(features.shape[0])

        r = self._theta @ features
        w = self._A_inv @ features
        v = features    @ w

        self._A_inv = self._A_inv - np.outer(w,w)/(1+v)
        self._theta = self._theta + (reward-r)/(1+v) * w
Пример #16
0
    def test_dense_x(self):
        encoder = InteractionsEncoder(["x"])

        interactions = encoder.encode(x=[1, 2, 3], a=[1, 2])

        self.assertEqual([1, 2, 3], interactions)
Пример #17
0
    def test_sparse_xa_is_string(self):
        encoder = InteractionsEncoder(["xa"])
        interactions = encoder.encode(x={"1": 1, "2": 2}, a="a")

        self.assertEqual(dict([("x1a0a", 1), ("x2a0a", 2)]), interactions)
Пример #18
0
 def test_empty_interactions_dense(self):
     encoder = InteractionsEncoder([])
     interactions = encoder.encode(a=2, b=None, c=None)
     self.assertEqual([], interactions)
Пример #19
0
    def test_string_numeric_xa(self):
        encoder = InteractionsEncoder(["xa"])
        interactions = encoder.encode(x=[2], a=["d", "e"])

        self.assertEqual(dict([("x0a0d", 2), ("x0a1e", 2)]), interactions)
Пример #20
0
    def test_string_xa(self):
        encoder = InteractionsEncoder(["xa"])

        interactions = encoder.encode(x=["a"], a=["d", "e"])

        self.assertEqual(dict([("x0aa0d", 1), ("x0aa1e", 1)]), interactions)
Пример #21
0
    def test_string_x(self):
        encoder = InteractionsEncoder(["x"])
        interactions = encoder.encode(x=["a", "b", "c"], a=["d", "e"])

        self.assertEqual(dict([("x0a", 1), ("x1b", 1), ("x2c", 1)]),
                         interactions)
Пример #22
0
 def test_dense_xxx(self):
     encoder = InteractionsEncoder(["xxx"])
     interactions = encoder.encode(x=[1, 2, 3], a=[1, 2])
     self.assertEqual([1, 2, 3, 4, 6, 9, 8, 12, 18, 27], interactions)
Пример #23
0
    def test_dense_meta_x_a(self):
        encoder = InteractionsEncoder(["x", "a"])
        interactions = encoder.encode(x=DenseWithMeta([1, 2, 3]), a=[1, 2])

        self.assertEqual([1, 2, 3, 1, 2], interactions)
Пример #24
0
    def test_multiple_sparse_with_only_empty(self):
        encoder = InteractionsEncoder(['ab', 'abc'])

        interactions = encoder.encode(a={'A': 2}, b=None, c=None)

        self.assertEqual({}, interactions)
Пример #25
0
    def test_singular_string_a(self):
        encoder = InteractionsEncoder(["a"])
        interactions = encoder.encode(x=["a"], a="d")

        self.assertEqual(dict([("a0d", 1)]), interactions)
Пример #26
0
 def test_empty_interactions_sparse(self):
     encoder = InteractionsEncoder([])
     interactions = encoder.encode(a={'A': 2}, b=None, c=None)
     self.assertEqual({}, interactions)
Пример #27
0
    def test_singular_string_xa(self):
        encoder = InteractionsEncoder(["xa"])
        interactions = encoder.encode(x="abc", a="dbc")

        self.assertEqual(dict([("x0abca0dbc", 1)]), interactions)
Пример #28
0
    def __init__(self,
                 n_interactions: int,
                 n_actions: int = 10,
                 n_context_features: int = 10,
                 n_action_features: int = 10,
                 reward_features: Sequence[str] = ["a", "xa"],
                 seed: int = 1) -> None:
        """Instantiate a LinearSyntheticSimulation.

        Args:
            n_interactions: The number of interactions the simulation should have.
            n_actions: The number of actions each interaction should have.
            n_context_features: The number of features each context should have.
            n_action_features: The number of features each action should have.
            reward_features: The features in the simulation's linear reward function.
            seed: The random number seed used to generate all features, weights and noise in the simulation.
        """

        self._args = (n_interactions, n_actions, n_context_features,
                      n_action_features, reward_features, seed)

        self._n_actions = n_actions
        self._n_context_features = n_context_features
        self._n_action_features = n_action_features
        self._reward_features = reward_features
        self._seed = seed

        if not self._n_context_features:
            reward_features = list(
                set(filter(None,
                           [f.replace('x', '') for f in reward_features])))

        if not self._n_action_features:
            reward_features = list(
                set(filter(None,
                           [f.replace('a', '') for f in reward_features])))

        rng = CobaRandom(seed)
        feat_encoder = InteractionsEncoder(reward_features)

        #to try and make sure high-order polynomials are well behaved
        #we center our context and action features on 1 and give them
        #a very small amount of variance. Then, in post processing, we
        #shift and re-scale our reward to center and fill in [0,1].
        max_degree = max([len(f)
                          for f in reward_features]) if reward_features else 1
        feat_gen = lambda n: tuple([
            g * rng.choice([1, -1])
            for g in rng.gausses(n, mu=1, sigma=1 / (2 * max_degree))
        ])
        one_hot_acts = OneHotEncoder().fit_encodes(range(n_actions))

        feature_count = len(
            feat_encoder.encode(x=[1] * n_context_features,
                                a=[1] * n_action_features))
        weight_parts = 1 if n_action_features else n_actions
        weight_count = 1 if feature_count == 0 else feature_count

        self._weights = [[1 - 2 * w for w in rng.randoms(weight_count)]
                         for _ in range(weight_parts)]

        self._bias = 0
        self._clip = False

        def context(index: int) -> Context:
            return feat_gen(n_context_features) if n_context_features else None

        def actions(index: int, context: Context) -> Sequence[Action]:
            return [feat_gen(n_action_features) for _ in range(n_actions)
                    ] if n_action_features else one_hot_acts

        def reward(index: int, context: Context, action: Action) -> float:

            F = feat_encoder.encode(x=context, a=action) or [1]
            W = self._weights[0 if n_action_features else action.index(1)]

            return self._bias + sum([w * f for w, f in zip(W, F)])

        rewards = [
            reward(i, c, a) for i in range(100) for c in [context(i)]
            for a in actions(i, c)
        ]

        m = mean(rewards)
        s = (max(rewards) - min(rewards)) or 1

        self._bias = 0.5 - m / s
        self._weights = [[w / s for w in W] for W in self._weights]
        self._clip = True

        super().__init__(n_interactions, context, actions, reward)
Пример #29
0
    def test_sparse_x_a_numeric_keys(self):
        encoder = InteractionsEncoder(["x", "a"])
        interactions = encoder.encode(x={1: 1, 2: 2}, a={1: 3, 2: 4})

        self.assertEqual(dict([("x1", 1), ("x2", 2), ("a1", 3), ("a2", 4)]),
                         interactions)