Exemplo n.º 1
0
    def train(self,
              n_iterations=30,
              n_games=50,
              n_eval_games=15,
              threshold=0.55):
        for i in range(n_iterations):
            print('Iteration', i)

            train_set = []

            for j in range(n_games):
                print('Game', j)
                game_clone = self.game.clone()
                self.play_out(game_clone, train_set)

            self.net.save_model()
            self.old_net.load_model()

            self.net.train(train_set)

            new_mcts = MCTS(self.net)
            old_mcts = MCTS(self.old_net)

            arena = Arena(self.game, old_mcts, new_mcts)

            win_rate = arena.fight(n_eval_games)
            print('Win rate:', win_rate)

            if win_rate > threshold:
                print('New best model')
                self.net.save_model()
            else:
                print('Leaving old model')
                self.net.load_model()
Exemplo n.º 2
0
def main(botname, player):
    competition_io = CompetitionIo()
    quiet_interactive_io = QuietInteractiveIo()
    
    prop_bot = getattr(B, botname)(bot_io = competition_io)
    opp_bot = B.InteractiveBot(bot_io = quiet_interactive_io)

    if player == 0:
        bot1, bot2 = prop_bot, opp_bot
    else:
        bot2, bot1 = prop_bot, opp_bot

    try:
        arena_competition = Arena(arena_io = quiet_interactive_io, bot1 = bot1, bot2 = bot2)
        arena_competition.fight()
    except EOFError:
        pass
Exemplo n.º 3
0
                       bot2 = IdleBot(bot_io = thunk_io))
        #arena_interactive.fight()
    
        # Two randoms duking it out
        arena_random = Arena(arena_io = game_io,
                       bot1 = RandomBot(bot_io = CompositeIo(game_io, WriteReplayIo(fd = rpl_fd))),
                       bot2 = RandomBot(bot_io = thunk_io))
        #arena_random.fight()
    
        # Non-interactive faux strat against idle with replay
        strategy_bot_test = StrategyBot(bot_io = CompositeIo(game_io, WriteReplayIo(fd = rpl_fd)))
        strategy_bot_test.add_strategy(
                SequenceStrategy(
                                 #GenerateValueStrategy(slot = 0, target = 15),
                                 #GenerateValueStrategy(slot = 1, target = 3),
                                 #GenerateValueStrategy(slot = 3, target = 15),
                                 #AppNTo0Strategy(slot = 2, n_slot = 4),
                                 #GetIStrategy(slot = 100, i_slot = 1),
                                 #AppFIJNStrategy(slot = 2, f_card = cards.help, i_num = 3, j_num = 3, n_num = 8192),
                                 #AppFIJNStrategy(slot = 2, f_card = cards.attack, i_num = 3, j_num = 3, n_num = 1024),
                                 DumbSlotKiller(battery_slot = 3, target_slot = 252),
                                 #IdleStrategy(),
                                ))
        arena_strategy = Arena(arena_io = game_io,
                       bot1 = IdleBot(bot_io = thunk_io),
                       bot2 = strategy_bot_test)
        arena_strategy.fight()

    game_io.notify_total_time(clock() - start)

Exemplo n.º 4
0
def main():
    a = Arena()
    a.fight()
Exemplo n.º 5
0
    def run(self):
        config.verbose = {key: False for key in config.verbose}

        get_epsilon = lambda episode: np.exp(-episode * self.e_decay)

        for eps in range(self.episodes + 1):

            full_games_counter = 0

            game_copy = None

            storage1 = self.storage_class()
            storage2 = self.storage_class()

            if self.train_on_fixed:
                self.policy.train_on_fixed = True

            print('---STARTING SIMULATIONS')
            for n_game in tqdm(range(self.n_games)):

                n_opps_agents = 1
                n_rl_agents = 1
                players = []

                rl_agents = [
                    Player(policy=self.policy,
                           player_id=str(idx) + '_rl',
                           storage=storage1) for idx in range(n_rl_agents)
                ]

                if self.policy.policy_name == 'dqn':
                    self.policy.policy.update_epsilon(get_epsilon(eps))

                if not self.self_play:
                    opp_agents = [
                        Player(policy=FixedAgent(high=350, low=150, jail=100),
                               player_id=str(idx) + '_fixed',
                               storage=self.storage_class())
                        for idx in range(n_opps_agents)
                    ]
                else:
                    opp_agents = [
                        Player(policy=self.policy,
                               player_id=str(idx + 1) + '_rl',
                               storage=storage2) for idx in range(n_rl_agents)
                    ]

                players.extend(rl_agents)
                players.extend(opp_agents)
                # shuffle(players)
                # print('----- Players: {} fixed, {} rl'.format(n_fixed_agents, n_rl_agents))

                game = Game(players=players, max_rounds=self.n_rounds)
                game_copy = game

                for player in players:
                    player.set_game(game, n_game)

                game_finished = False

                for n_round in range(self.n_rounds):
                    if game_finished:
                        break

                    game.update_round()

                    for player in game.players:
                        if not game.is_game_active():  # stopping rounds loop
                            player.won()
                            game_finished = True
                            break

                        # player.reset_mortgage_buy()

                        if player.is_bankrupt:  # must change it. do it two times because some players can go bankrupt when must pay bank interest
                            game.remove_player(
                                player)  # other player's mortgaged spaces
                            break

                        game.pass_dice()

                        while True:
                            if not game.is_game_active(
                            ):  # stopping players loop
                                break

                            player.optional_actions()

                            # player.reset_mortgage_buy()

                            game.dice.roll()

                            if player.is_in_jail():
                                stay_in_jail = player.jail_strategy(
                                    dice=game.dice)
                                if stay_in_jail:
                                    player.optional_actions()
                                    break

                            if player.is_bankrupt:
                                game.remove_player(player)
                                break

                            if game.dice.double_counter == 3:
                                player.go_to_jail()
                                break

                            player.move(game.dice.roll_sum)

                            if player.position == 30:
                                player.go_to_jail()
                                break

                            # TODO: add card go to jail

                            space = game.board[player.position]

                            player.act(space)

                            if player.is_bankrupt:
                                game.remove_player(player)
                                break

                            if game.dice.double:
                                continue

                            # end turn
                            break

                if game.players_left == 1:
                    full_games_counter += 1
                else:
                    for player in game.players:
                        player.draw()

            losses = []

            for player in game_copy.players:
                if 'rl' in player.id:
                    self.update(player, losses)

            for player in game_copy.lost_players:
                if 'rl' in player.id:
                    self.update(player, losses)

            if eps % self.target_update == 0:
                self.target_policy.load_state_dict(self.policy.state_dict())

            rewards = []
            for player in game_copy.players:
                if 'rl' in player.id:
                    rewards.append(player.storage.get_mean_reward())

            for player in game_copy.lost_players:
                if 'rl' in player.id:
                    rewards.append(player.storage.get_mean_reward())

            with open(self.file_metrics, 'a') as metrics:
                metrics.write('{},{},{}\n'.format(eps, n_rl_agents,
                                                  np.average(losses)))

            if eps % self.verbose_eval == 0:
                if self.train_on_fixed:
                    self.policy.train_on_fixed = False
                print('------Arena')
                arena = Arena(
                    n_games=self.n_eval_games,
                    n_rounds=self.n_rounds,
                    verbose=0
                )  # add 3 types of logging. 0 - only show win rates.
                print('--------RL vs Random')
                winrate_random = arena.fight(agent=self.target_policy,
                                             opponent=RandomAgent(),
                                             opp_id='random')
                print('--------RL vs Fixed')
                winrate_fixed = arena.fight(agent=self.target_policy,
                                            opponent=FixedAgent(high=350,
                                                                low=150,
                                                                jail=100),
                                            opp_id='fixed')

                with open(self.file_winrates, 'a') as winrates:
                    winrates.write('{},{},{}\n'.format(eps, winrate_random,
                                                       winrate_fixed))

            if eps % self.checkpoint_step == 0:
                torch.save(self.target_policy,
                           os.path.join('models', 'model-{}.pt'.format(eps)))

            print('---Full games {} / {}'.format(full_games_counter,
                                                 self.n_games))
Exemplo n.º 6
0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument(
        '--model',
        default=-1,
        help='model to load; to load specific model use model number')
    parser.add_argument('--opponent',
                        default='fixed',
                        help='opponent to play against')
    args = parser.parse_args()

    config.device = torch.device(
        'cuda:0' if torch.cuda.is_available() else 'cpu')
    print('device', config.device)

    args.opponent = 'random'
    # args.model = 420

    config.train_on_fixed = False
    if args.model == -1 and len(os.listdir('models/')) != 0:
        models = list(
            filter(lambda name: 'model' in name, os.listdir('./models/')))
        model_number = sorted([
            int(model_name.split('-')[1].split('.')[0])
            for model_name in models
        ])[-1]
        model_name = 'model-{}.pt'.format(model_number)
        print('Loading model:', model_name)
        if config.device.type == 'cpu':
            policy = torch.load(os.path.join('./models', model_name),
                                map_location=lambda storage, loc: storage)
            policy.fixed_agent.device = config.device
        else:
            policy = torch.load(os.path.join('./models', model_name))

        policy.train_on_fixed = False
    elif args.model == 'init' or len(os.listdir('models/')) == 0:
        policy = NNWrapper('dqn', config.state_space, config.action_space,
                           config.train_on_fixed)
        policy.policy.epsilon = 0.
        policy.to(config.device)
    else:
        model_name = 'model-{}.pt'.format(np.abs(args.model))
        print('Loading model:', model_name)
        if config.device.type == 'cpu':
            policy = torch.load(os.path.join('./models', model_name),
                                map_location=lambda storage, loc: storage)
            policy.fixed_agent.device = config.device
        else:
            policy = torch.load(os.path.join('./models', model_name))
        policy.train_on_fixed = False

    if args.opponent == 'random':
        opponent = RandomAgent()
    else:
        opponent = FixedAgent(high=350, low=150, jail=100)
    policy.eval()

    print('SHOW MATCH')
    arena = Arena(n_games=20, verbose=0, n_rounds=500)

    start = datetime.datetime.now()

    winrate = arena.fight(agent=policy, opponent=opponent, log_rewards=False)

    end = datetime.datetime.now()
    diff = end - start
    print('Took {} sec'.format(np.round(diff.total_seconds(), 3)))