def train(self, n_iterations=30, n_games=50, n_eval_games=15, threshold=0.55): for i in range(n_iterations): print('Iteration', i) train_set = [] for j in range(n_games): print('Game', j) game_clone = self.game.clone() self.play_out(game_clone, train_set) self.net.save_model() self.old_net.load_model() self.net.train(train_set) new_mcts = MCTS(self.net) old_mcts = MCTS(self.old_net) arena = Arena(self.game, old_mcts, new_mcts) win_rate = arena.fight(n_eval_games) print('Win rate:', win_rate) if win_rate > threshold: print('New best model') self.net.save_model() else: print('Leaving old model') self.net.load_model()
def main(botname, player): competition_io = CompetitionIo() quiet_interactive_io = QuietInteractiveIo() prop_bot = getattr(B, botname)(bot_io = competition_io) opp_bot = B.InteractiveBot(bot_io = quiet_interactive_io) if player == 0: bot1, bot2 = prop_bot, opp_bot else: bot2, bot1 = prop_bot, opp_bot try: arena_competition = Arena(arena_io = quiet_interactive_io, bot1 = bot1, bot2 = bot2) arena_competition.fight() except EOFError: pass
bot2 = IdleBot(bot_io = thunk_io)) #arena_interactive.fight() # Two randoms duking it out arena_random = Arena(arena_io = game_io, bot1 = RandomBot(bot_io = CompositeIo(game_io, WriteReplayIo(fd = rpl_fd))), bot2 = RandomBot(bot_io = thunk_io)) #arena_random.fight() # Non-interactive faux strat against idle with replay strategy_bot_test = StrategyBot(bot_io = CompositeIo(game_io, WriteReplayIo(fd = rpl_fd))) strategy_bot_test.add_strategy( SequenceStrategy( #GenerateValueStrategy(slot = 0, target = 15), #GenerateValueStrategy(slot = 1, target = 3), #GenerateValueStrategy(slot = 3, target = 15), #AppNTo0Strategy(slot = 2, n_slot = 4), #GetIStrategy(slot = 100, i_slot = 1), #AppFIJNStrategy(slot = 2, f_card = cards.help, i_num = 3, j_num = 3, n_num = 8192), #AppFIJNStrategy(slot = 2, f_card = cards.attack, i_num = 3, j_num = 3, n_num = 1024), DumbSlotKiller(battery_slot = 3, target_slot = 252), #IdleStrategy(), )) arena_strategy = Arena(arena_io = game_io, bot1 = IdleBot(bot_io = thunk_io), bot2 = strategy_bot_test) arena_strategy.fight() game_io.notify_total_time(clock() - start)
def main(): a = Arena() a.fight()
def run(self): config.verbose = {key: False for key in config.verbose} get_epsilon = lambda episode: np.exp(-episode * self.e_decay) for eps in range(self.episodes + 1): full_games_counter = 0 game_copy = None storage1 = self.storage_class() storage2 = self.storage_class() if self.train_on_fixed: self.policy.train_on_fixed = True print('---STARTING SIMULATIONS') for n_game in tqdm(range(self.n_games)): n_opps_agents = 1 n_rl_agents = 1 players = [] rl_agents = [ Player(policy=self.policy, player_id=str(idx) + '_rl', storage=storage1) for idx in range(n_rl_agents) ] if self.policy.policy_name == 'dqn': self.policy.policy.update_epsilon(get_epsilon(eps)) if not self.self_play: opp_agents = [ Player(policy=FixedAgent(high=350, low=150, jail=100), player_id=str(idx) + '_fixed', storage=self.storage_class()) for idx in range(n_opps_agents) ] else: opp_agents = [ Player(policy=self.policy, player_id=str(idx + 1) + '_rl', storage=storage2) for idx in range(n_rl_agents) ] players.extend(rl_agents) players.extend(opp_agents) # shuffle(players) # print('----- Players: {} fixed, {} rl'.format(n_fixed_agents, n_rl_agents)) game = Game(players=players, max_rounds=self.n_rounds) game_copy = game for player in players: player.set_game(game, n_game) game_finished = False for n_round in range(self.n_rounds): if game_finished: break game.update_round() for player in game.players: if not game.is_game_active(): # stopping rounds loop player.won() game_finished = True break # player.reset_mortgage_buy() if player.is_bankrupt: # must change it. do it two times because some players can go bankrupt when must pay bank interest game.remove_player( player) # other player's mortgaged spaces break game.pass_dice() while True: if not game.is_game_active( ): # stopping players loop break player.optional_actions() # player.reset_mortgage_buy() game.dice.roll() if player.is_in_jail(): stay_in_jail = player.jail_strategy( dice=game.dice) if stay_in_jail: player.optional_actions() break if player.is_bankrupt: game.remove_player(player) break if game.dice.double_counter == 3: player.go_to_jail() break player.move(game.dice.roll_sum) if player.position == 30: player.go_to_jail() break # TODO: add card go to jail space = game.board[player.position] player.act(space) if player.is_bankrupt: game.remove_player(player) break if game.dice.double: continue # end turn break if game.players_left == 1: full_games_counter += 1 else: for player in game.players: player.draw() losses = [] for player in game_copy.players: if 'rl' in player.id: self.update(player, losses) for player in game_copy.lost_players: if 'rl' in player.id: self.update(player, losses) if eps % self.target_update == 0: self.target_policy.load_state_dict(self.policy.state_dict()) rewards = [] for player in game_copy.players: if 'rl' in player.id: rewards.append(player.storage.get_mean_reward()) for player in game_copy.lost_players: if 'rl' in player.id: rewards.append(player.storage.get_mean_reward()) with open(self.file_metrics, 'a') as metrics: metrics.write('{},{},{}\n'.format(eps, n_rl_agents, np.average(losses))) if eps % self.verbose_eval == 0: if self.train_on_fixed: self.policy.train_on_fixed = False print('------Arena') arena = Arena( n_games=self.n_eval_games, n_rounds=self.n_rounds, verbose=0 ) # add 3 types of logging. 0 - only show win rates. print('--------RL vs Random') winrate_random = arena.fight(agent=self.target_policy, opponent=RandomAgent(), opp_id='random') print('--------RL vs Fixed') winrate_fixed = arena.fight(agent=self.target_policy, opponent=FixedAgent(high=350, low=150, jail=100), opp_id='fixed') with open(self.file_winrates, 'a') as winrates: winrates.write('{},{},{}\n'.format(eps, winrate_random, winrate_fixed)) if eps % self.checkpoint_step == 0: torch.save(self.target_policy, os.path.join('models', 'model-{}.pt'.format(eps))) print('---Full games {} / {}'.format(full_games_counter, self.n_games))
def main(): parser = argparse.ArgumentParser() parser.add_argument( '--model', default=-1, help='model to load; to load specific model use model number') parser.add_argument('--opponent', default='fixed', help='opponent to play against') args = parser.parse_args() config.device = torch.device( 'cuda:0' if torch.cuda.is_available() else 'cpu') print('device', config.device) args.opponent = 'random' # args.model = 420 config.train_on_fixed = False if args.model == -1 and len(os.listdir('models/')) != 0: models = list( filter(lambda name: 'model' in name, os.listdir('./models/'))) model_number = sorted([ int(model_name.split('-')[1].split('.')[0]) for model_name in models ])[-1] model_name = 'model-{}.pt'.format(model_number) print('Loading model:', model_name) if config.device.type == 'cpu': policy = torch.load(os.path.join('./models', model_name), map_location=lambda storage, loc: storage) policy.fixed_agent.device = config.device else: policy = torch.load(os.path.join('./models', model_name)) policy.train_on_fixed = False elif args.model == 'init' or len(os.listdir('models/')) == 0: policy = NNWrapper('dqn', config.state_space, config.action_space, config.train_on_fixed) policy.policy.epsilon = 0. policy.to(config.device) else: model_name = 'model-{}.pt'.format(np.abs(args.model)) print('Loading model:', model_name) if config.device.type == 'cpu': policy = torch.load(os.path.join('./models', model_name), map_location=lambda storage, loc: storage) policy.fixed_agent.device = config.device else: policy = torch.load(os.path.join('./models', model_name)) policy.train_on_fixed = False if args.opponent == 'random': opponent = RandomAgent() else: opponent = FixedAgent(high=350, low=150, jail=100) policy.eval() print('SHOW MATCH') arena = Arena(n_games=20, verbose=0, n_rounds=500) start = datetime.datetime.now() winrate = arena.fight(agent=policy, opponent=opponent, log_rewards=False) end = datetime.datetime.now() diff = end - start print('Took {} sec'.format(np.round(diff.total_seconds(), 3)))