def param_study(n_bandits=2000, n_steps=1000, title='Figure 2.6', fn='fig2_6', nonstat=False, print_freq=10, start_timestep=np.inf): results = {(method, hyper): 0 for (method, hyperparams) in HYPERPARMS.items() for hyper in hyperparams} y_label = (f"Average Reward over last {n_steps-start_timestep} steps" if nonstat else f"Average Reward over first {n_steps} steps") for t in range(1, n_bandits + 1): print(f"{t}/{n_bandits}") bandit = Bandit() for method, hyperparams in HYPERPARMS.items(): for hyper in hyperparams: results[(method, hyper)] += apply_method(bandit, n_steps, method, hyper, nonstat, start_timestep)[-1] bandit.reset() # need to reset q values after random walk if (t % print_freq == 0): plot_current(n_steps, results, t, title, fn, y_label)
class PredictionMarketEnv(object): def __init__(self, predict_market, num_bids, trials, label='Multi-Armed Prediction Market Bandit'): self.predict_market = predict_market self.n_arms = predict_market.arms self.agents = predict_market.agents self.data = predict_market.dataframe self.num_bids = num_bids self.label = label self.bandit = Bandit(self.n_arms, self.data) self.trials = trials self.scores = None self.optimal = None def run(self, experiments=1, market=True): """Run the trial with or without the prediction market""" scores = np.zeros((self.trials, len(self.agents))) if market is False: for _ in range(experiments): for trial in range(self.trials): self.bandit.reset() for i, agent in enumerate(self.agents): action = agent.choose() reward, max_reward = self.bandit.pull(action) agent.observe(reward, max_reward, update=True) scores[trial, i] += reward else: for _ in range(experiments): for trial in range(self.trials): self.predict_market.reset() for i, agent in enumerate(self.agents): bids = [] for i in range(self.num_bids): bids.append(agent.bid()) bid = np.mean(bids, axis=0) # column-wise mean self.predict_market.get_bids(bid, agent.id) normal_params = self.predict_market.settle_market() arm_samples = [] for i, params in enumerate(normal_params): arm_samples.append(np.random.normal(params[0], params[1]**0.5)) action = np.argmax(arm_samples) reward, max_reward = self.bandit.pull(action) for i, agent in enumerate(self.agents): agent.current_action = action agent.observe(reward, max_reward, update=True) scores[trial, i] += reward self.scores = scores / experiments return self.scores def plot_results(self, market=True): scores = self.scores fig = plt.figure() ax = plt.subplot(111) if market: ax.set_title('Multi-Armed Bandit Market Reward') ax.plot(scores, 'b.') box = ax.get_position() ax.set_position([box.x0, box.y0, box.width * 0.8, box.height]) ax.set_ylabel('Average Reward') ax.legend(['Prediction Market'], loc='center left', bbox_to_anchor=(1, 0.5)) plt.show() else: ax.set_title('Multi-Armed Bandit Rewards') ax.plot(scores, '.') box = ax.get_position() ax.set_position([box.x0, box.y0, box.width * 0.8, box.height]) ax.set_ylabel('Average Reward') ax.legend(['Prediction Market'], loc='center left', bbox_to_anchor=(1, 0.5)) plt.show()