Exemplo n.º 1
0
 def generate_data_rl(self, ngames=50, fname=''):
     """
     generate a new batch of data with the latest prediction model
     self.model_predict
     rl vs. smithy bot
     """
     vf = lambda x: self.model_predict.predict(x)
     p1 = RLPlayer(vf)
     p1.epsilon = self.epsilon
     p1.record_history = 1
     p2 = RLPlayer(vf)
     p2.epsilon = self.epsilon
     p2.record_history = 1
     d_this = record_game(ngames, [p1, p2], fname)
     self.add_data(d_this)
     return d_this
Exemplo n.º 2
0
def run_agent(fn='sarsa_v0',
              version='SarsaBootstrapAgent',
              gamma_comp=0.02,
              epsilon=0.05,
              dropout=0.2):
    if version == 'SarsaBootstrapAgent':
        dql = SarsaBootstrapAgent()
    elif version == 'DQLSarsaAgent':
        dql = DQLSarsaAgent()
    p1 = SmithyBot()
    p1.record_history = 1
    p2 = SmithyBot()
    p2.record_history = 0
    dbuy, _ = dql.record_game(1, [p1, p2])
    sa = np.array([np.concatenate([s, a]) for s, a, r, _, _, _ in dbuy])
    r = np.array([r for s, a, r, _, _, _ in dbuy])

    dql.epsilon = 0.05
    dql.mtrain = 5000
    # one iteration creates roughly 1e4 samples
    # therefore this remembers the data of pass 40 iterations.
    dql.replaybuffer = int(4e5)
    dql.target_iterations = 1
    dql.predict_iterations = 10
    dql.epochs = 10
    dql.gamma = 1 - gamma_comp
    # incentivize short games
    dql.reward_points_per_turn = 0.0
    # I think having win reward makes it too noisy
    dql.win_reward = 0
    # print the settings
    print('mtrain {:d}, replaybuffer {:d}, predict iter {:d}, epochs {:d}, gamma {:.02f}'.\
        format(dql.mtrain, dql.replaybuffer, dql.predict_iterations, dql.epochs, dql.gamma))
    # a very dumb way to initiate the network weights
    dql.length = sa.shape[0]
    dql.create_model(sa, r, dropout=dropout)
    # start training iterations.
    for i in range(1000):
        print('data generation iteration {:d}'.format(i))
        dql.epsilon = 0.05 / ((i + 1) / 20)
        print('dql epsilon: {:.04f}'.format(dql.epsilon))
        dql.generate_data_smithy(100)
        dql.generate_data_rl(100)
        #dql.generate_data(100)
        print('data sample size = {:d}'.format(dql.data.shape[0]))
        dql.do_target_iteration()
        dql.save_model('./model/{:s}_iteration_{:03d}'.format(fn, i + 1))
        # evaluate against random bot and smithy bot
        p1 = RLPlayer(lambda x: dql.model_predict.predict(x))
        p1.epsilon = 0.0
        p_smith = SmithyBot()
        print(compare_bots([p1, RandomPlayer()], 10))
        print(compare_bots([p1, p_smith], 10))
        # print output of Q(s,a) estimates for the first SmithyBot game
        print('Q(s,a) estimates of a SmithyBot game')
        print(dql.model_predict.predict(sa).T)
Exemplo n.º 3
0
 def generate_data_rl(self, ngames=50, fname=''):
     """
     generate a new batch of data with the latest prediction model self.model_predict
     by playing against self.
     """
     vf = lambda x: self.model.predict(x)
     p2 = RLPlayer(vf)
     p2.epsilon = self.epsilon
     p2.record_history = 1
     p2.include_action = 1
     return self.generate_data_bot(p2, ngames, fname)
Exemplo n.º 4
0
 def generate_data_rl(self, ngames=50, fname=''):
     """
     generate a new batch of data with the latest prediction model
     self.model_predict
     rl vs. smithy bot
     """
     vbuy = lambda x: self.model_predict.predict(x)
     #vact = lambda x: self.model_act.predict(x)
     # p1 = BuyActRLplayer(vbuy, vact)
     p1 = RLPlayer(vbuy)
     p1.epsilon = self.epsilon
     p1.record_history = 1
     p1.include_action = 1
     p2 = RLPlayer(vbuy)
     p2.epsilon = self.epsilon
     p2.record_history = 1
     p2.include_action = 1
     d_this, _ = self.record_game(ngames, [p1, p2], fname, verbose=1)
     self.add_data(d_this)
     return d_this
Exemplo n.º 5
0
def compare_rl_bots(fn1, fn2, num_games=50, order=0):
    # set up the two rl bots, and make them fight.
    print('setting up bot1...')
    dql = SarsaAgent()
    dql.create_model_5layers()
    data = dql.load_game_data('1game')
    dql.fit(data)
    dql.load_model(fn1)
    p1 = RLPlayer(lambda x: dql.model.predict(x))
    p1.name = fn1
    p1.epsilon = 0
    print('setting up bot2...')
    dql2 = SarsaAgent()
    dql2.create_model_5layers()
    data = dql2.load_game_data('1game')
    dql2.fit(data)
    dql2.load_model(fn1)
    p2 = RLPlayer(lambda x: dql2.model.predict(x))
    p2.name = fn2
    p2.epsilon = 0
    print('fight!')
    return compare_bots([p1, p2], num_games, order)
Exemplo n.º 6
0
 def generate_data_bot(self, bot, ngames=50, fname=''):
     """
     generate a new batch of data with the latest prediction model self.model
     rl vs. specified bot
     """
     vf = lambda x: self.model.predict(x)
     p1 = RLPlayer(vf)
     p1.epsilon = self.epsilon
     p1.record_history = 1
     p1.include_action = 1
     bot.record_history = 0
     d_this = self.record_game(ngames, [p1, bot], fname)
     self.add_data(d_this)
     return d_this
Exemplo n.º 7
0
 def generate_data_smithy(self, ngames=50, fname=''):
     """
     generate a new batch of data with the latest prediction model
     self.model_predict
     rl vs. smithy bot
     """
     vbuy = lambda x: self.model_predict.predict(x)
     # vact = lambda x: self.model_act.predict(x)
     # p1 = BuyActRLplayer(vbuy, vact)
     p1 = RLPlayer(vbuy)
     p1.epsilon = self.epsilon
     p1.record_history = 1
     p1.include_action = 1
     p2 = SmithyBot()
     # try including smithy bot's data in the training.
     p2.record_history = 0
     d_this, _ = self.record_game(ngames, [p1, p2], fname)
     self.add_data(d_this)
     return d_this
Exemplo n.º 8
0
def load_rl_bot(fn, dql='', version='SarsaActBuyAgent', pre_witch=0):
    if dql == '':
        if version == 'SarsaActBuyAgent':
            pr = RandomPlayer()
            pr.record_history = 1
            dql = SarsaActBuyAgent()
            data = dql.record_game(1, [pr, SmithyBot()], verbose=0)
            dql.length = (data[0].shape[1] + data[1].shape[1])
            dql.create_model_5layers()
            dql.create_act_model()
            dql.fit(data)
            dql.fit_act(data)
            dql.load_model(fn)
            p1 = BuyActRLplayer(lambda x: dql.model.predict(x),
                                lambda x: dql.model_act.predict(x))
            p1.name = fn
        elif version == 'SarsaAgent':
            if pre_witch:
                variable_cards_this = [
                    village, cellar, smithy, festival, market, laboratory,
                    chapel, warehouse, council_room, militia, moat
                ]
                pr = RandomPlayer()
                pr.record_history = 1
                dql = SarsaAgent()
                dql.variable_cards = variable_cards_this
                data = dql.record_game(1, [pr, SmithyBot()])
                print(data[0].shape)
                print(data[1].shape)
            else:
                pr = RandomPlayer()
                pr.record_history = 1
                data = record_game(1, [pr, SmithyBot()])
            dql = SarsaAgent(length=(data[0].shape[1] + data[1].shape[1]))
            dql.create_model(num_layers=get_num_layers(fn + '_ar.h5'))
            dql.fit(data)
            dql.load_model(fn)
            p1 = RLPlayer(lambda x: dql.model.predict(x))
            p1.name = fn
        elif version == 'DQLSarsaAgent':
            dql = DQLSarsaAgent()
            p1 = SmithyBot()
            p1.record_history = 1
            dbuy, _ = dql.record_game(1, [p1, SmithyBot()])
            sa = np.array(
                [np.concatenate([s, a]) for s, a, r, _, _, _ in dbuy])
            r = np.array([r for s, a, r, _, _, _ in dbuy])
            dql.create_model(sa, r)
            dql.load_model(fn)
            p1 = RLPlayer(lambda x: dql.model_predict.predict(x))
            p1.name = fn
        else:
            print('No such version of Agent!')
            raise
    else:
        # if dql is NN is already initialized.
        dql.load_model(fn)
        if version == 'DQLSarsaAgent':
            p1 = RLPlayer(lambda x: dql.model_predict.predict(x))
        else:
            p1 = RLPlayer(lambda x: dql.model.predict(x))
        p1.name = fn
    p1.epsilon = 0
    return (p1, dql)