def _create_algorithm(self, base_algorithm, batch_train: bool, **kwargs): if self._algorithm_name == 'egreedy': self._algorithm = EpsilonGreedy( base_algorithm, nchoices=self._K, random_state=self._rnd.randint(1000), explore_prob=kwargs.get('epsilon', 0.1), batch_train=batch_train ) elif self._algorithm_name == 'bootstrapped_ucb': self._algorithm = BootstrappedUCB( base_algorithm, nchoices=self._K, random_state=self._rnd.randint(1000), batch_train=batch_train ) else: sys.exit("no such algorithm: %s" % algorithm)
## The base algorithm is embedded in different metaheuristics bootstrapped_ucb = BootstrappedUCB(deepcopy(base_algorithm), nchoices=nchoices, beta_prior=beta_prior, batch_train=True) bootstrapped_ts = BootstrappedTS(deepcopy(base_algorithm), nchoices=nchoices, beta_prior=beta_prior, batch_train=True) one_vs_rest = SeparateClassifiers(deepcopy(base_algorithm), nchoices=nchoices, beta_prior=beta_prior, batch_train=True) epsilon_greedy = EpsilonGreedy(deepcopy(base_algorithm), nchoices=nchoices, beta_prior=beta_prior, batch_train=True) epsilon_greedy_nodecay = EpsilonGreedy(deepcopy(base_algorithm), nchoices=nchoices, beta_prior=beta_prior, decay=None, batch_train=True) adaptive_greedy_thr = AdaptiveGreedy(deepcopy(base_algorithm), nchoices=nchoices, decay_type='threshold', batch_train=True) adaptive_greedy_perc = AdaptiveGreedy(deepcopy(base_algorithm), nchoices=nchoices, beta_prior=beta_prior, decay_type='percentile', decay=0.9997,
decay_type='percentile', decay=0.9997, batch_train=True, random_state=4444) active_explorer = ActiveExplorer(deepcopy(base_sgd), smoothing=None, nchoices=nchoices, beta_prior=((3. / nchoices, 4.), 2), batch_train=True, refit_buffer=50, deep_copy_buffer=False, random_state=5555) epsilon_greedy_nodecay = EpsilonGreedy(deepcopy(base_ols), nchoices=nchoices, smoothing=(1, 2), beta_prior=None, decay=None, batch_train=True, deep_copy_buffer=False, random_state=6666) models = [ linucb, adaptive_active_greedy, softmax_explorer, adaptive_greedy_perc, active_explorer, epsilon_greedy_nodecay ] # These lists will keep track of the rewards obtained by each policy rewards_lucb, rewards_aac, rewards_sft, rewards_agr, \ rewards_ac, rewards_egr = [list() for i in range(len(models))] lst_rewards = [ rewards_lucb, rewards_aac, rewards_sft, rewards_agr, rewards_ac,