def feedback(self, result): super().feedback(result) x = ThompsonLogisticAgent.design(self.last_context, self.last_action) for i in range(len(self.lrs)): if random.random() > .5: p = lr_predict(x, self.lrs[i], self.i, self.learnrate, self.regulizer, regression.random_coef) with self.lrs_lock: self.lrs[i] = lsr_update(self.last_success, p, x, self.lrs[i], self.learnrate, regression.random_coef) self.lrs[i] = lsr_update(self.last_success, p, x, self.lrs[i], self.learnrate, regression.random_coef)
def decide(self, context): self.last_context = context["context"] lr = random.sample(self.lrs, 1)[0] best_value = -1e100 self.last_action = None actions = random.sample(self.actions, self.action_n) xs = map(lambda x: ThompsonLogisticAgent.design(self.last_context, x), actions) for x_i, x in enumerate(xs): # x = ThompsonLogisticAgent.design(self.last_context, action) p = lr_predict(x, lr, self.i, self.learnrate, self.regulizer, regression.random_coef) value = p * actions[x_i]['price'] if value > best_value: best_value = value self.last_action = actions[x_i] return self.last_action