Python Bandit.reset示例

def param_study(n_bandits=2000,
                n_steps=1000,
                title='Figure 2.6',
                fn='fig2_6',
                nonstat=False,
                print_freq=10,
                start_timestep=np.inf):
    results = {(method, hyper): 0
               for (method, hyperparams) in HYPERPARMS.items()
               for hyper in hyperparams}
    y_label = (f"Average Reward over last {n_steps-start_timestep} steps"
               if nonstat else f"Average Reward over first {n_steps} steps")
    for t in range(1, n_bandits + 1):
        print(f"{t}/{n_bandits}")
        bandit = Bandit()
        for method, hyperparams in HYPERPARMS.items():
            for hyper in hyperparams:
                results[(method,
                         hyper)] += apply_method(bandit, n_steps, method,
                                                 hyper, nonstat,
                                                 start_timestep)[-1]
                bandit.reset()  # need to reset q values after random walk
        if (t % print_freq == 0):
            plot_current(n_steps, results, t, title, fn, y_label)

示例#2

显示文件

class PredictionMarketEnv(object):
    
    def __init__(self, predict_market, num_bids, trials,
                 label='Multi-Armed Prediction Market Bandit'):
        self.predict_market = predict_market
        self.n_arms = predict_market.arms
        self.agents = predict_market.agents
        self.data = predict_market.dataframe
        self.num_bids = num_bids
        self.label = label
        self.bandit = Bandit(self.n_arms, self.data)
        self.trials = trials
        self.scores = None
        self.optimal = None
        
    def run(self, experiments=1, market=True):
        """Run the trial with or without the prediction market"""
        scores = np.zeros((self.trials, len(self.agents)))
        
        if market is False:
            for _ in range(experiments):
                for trial in range(self.trials):
                    self.bandit.reset()
                    for i, agent in enumerate(self.agents):
                        action = agent.choose()
                        reward, max_reward = self.bandit.pull(action)
                        agent.observe(reward, max_reward, update=True)
                        scores[trial, i] += reward
        else:
            for _ in range(experiments):
                for trial in range(self.trials):
                    self.predict_market.reset()
                    for i, agent in enumerate(self.agents):
                        bids = []
                        for i in range(self.num_bids):
                            bids.append(agent.bid())
                        bid = np.mean(bids, axis=0)  # column-wise mean
                        self.predict_market.get_bids(bid, agent.id)
                    normal_params = self.predict_market.settle_market()
                    arm_samples = []
                    for i, params in enumerate(normal_params):
                        arm_samples.append(np.random.normal(params[0], params[1]**0.5))
                    action = np.argmax(arm_samples)
                    reward, max_reward = self.bandit.pull(action)
                    for i, agent in enumerate(self.agents):
                        agent.current_action = action
                        agent.observe(reward, max_reward, update=True)
                        scores[trial, i] += reward
        self.scores = scores / experiments
                    
        return self.scores
    
    def plot_results(self, market=True):
        scores = self.scores
        fig = plt.figure()
        ax = plt.subplot(111)
        if market:
            ax.set_title('Multi-Armed Bandit Market Reward')
            ax.plot(scores, 'b.')
            box = ax.get_position()
            ax.set_position([box.x0, box.y0, box.width * 0.8, box.height])
            ax.set_ylabel('Average Reward')
            ax.legend(['Prediction Market'], loc='center left', 
                      bbox_to_anchor=(1, 0.5))
            plt.show() 
        else:
            ax.set_title('Multi-Armed Bandit Rewards')
            ax.plot(scores, '.')
            box = ax.get_position()
            ax.set_position([box.x0, box.y0, box.width * 0.8, box.height])
            ax.set_ylabel('Average Reward')
            ax.legend(['Prediction Market'], loc='center left', 
                      bbox_to_anchor=(1, 0.5))
            plt.show()