示例#1
0
def generate_one_card_training_data(num_games):
    """ generate training data using @p num_games parties simulation """
    training_data = []
    game = Game(RandomPlayer(), RandomPlayer())
    for i in range(num_games):
        # simulating a new game
        game.reset()
        player_id = 0
        while True:
            player = game.players[player_id]
            opponent = game.get_opponent(player_id)
            player_vector = stack_state_to_vector(player.stack_state)
            opponent_vector = stack_state_to_vector(opponent.stack_state)
            # generating input / expected for each card in hand
            for card in player.hand:
                is_valid = player.has_valid_play(opponent, card)
                input_vector = np.concatenate((player_vector, opponent_vector, value_to_one_hot(card.value)))
                output_vector = 1 if is_valid else 0
                training_data.append((
                    input_vector,
                    output_vector
                ))
            valid = game.play_one_turn(player_id)
            if not valid: break
            if game.players[player_id].win_condition():
                verbose_report("player {} has won".format(player_id))
                break
            # switch to next player
            player_id = 1 - player_id
    return training_data
示例#2
0
 def play_on_increasing(self, card):
     verbose_report("plays {} on increasing".format(card))
     if self.is_valid_play_on_increasing(card):
         self.use_hand_card(card)
         self.increasing_list.append(card)
     else:
         raise ForbiddenPlay
示例#3
0
 def play_on_opponent_decreasing(self, opponent, card):
     verbose_report("plays {} on opponent decreasing".format(card))
     if self.is_valid_play_on_opponent_decreasing(opponent, card):
         self.use_hand_card(card)
         opponent.decreasing_list.append(card.get_opp())
         self.has_played_on_opp_this_turn = True
     else:
         raise ForbiddenPlay
示例#4
0
def main_trainable_player():
    parser = argparse.ArgumentParser(description="NN based IA for the Game")
    parser.add_argument("--num", type=int, default=100, help="number of party to simulate for NN training")
    parser.add_argument("--epochs", type=int, default=5, help="number of epoch to run for training")
    parser.add_argument("--save-file", type=str, default=None, help="NN weights will be saved to this file")
    parser.add_argument("--load-file", type=str, default=None, help="NN weights will be stored from this file (bypass training)")
    parser.add_argument("--skip-training", action="store_const", default=False, const=True, help="skip training phase")
    args = parser.parse_args()

    # train and evaluate model
    nn_player = TrainablePlayer()
    nn_player.build_model()
    if args.load_file:
        nn_player.model.load_weights(args.load_file)

    if not args.skip_training:
        nn_player.train_model(args.num, args.epochs)

    if args.save_file:
        nn_player.model.save_weights(args.save_file)
        print("evaluating NN during one game")

        # execute model on one game
        game = Game(RandomPlayer(), RandomPlayer())
        # simulating a new game
        game.reset()
        player_id = 0
        while True:
            player = game.players[player_id]
            opponent = game.get_opponent(player_id)
            # evaluating model
            print("\nnew evaluation")
            player.display_state(str(player_id))
            nn_player.execute_model(player, opponent)

            #
            valid = game.play_one_turn(player_id)
            if not valid: break
            if game.players[player_id].win_condition():
                verbose_report("player {} has won".format(player_id))
                break
            # switch to next player
            player_id = 1 - player_id

    nn_player.evaluate_model()
示例#5
0
 def get_action_to_play(self, opponent):
     """ determine an action to play using the training NN """
     player_action = self.educated_play(opponent)
     if player_action is None:
         verbose_report("invalid card from educated play")
         verbose_report("fallback to random play")
         player_action = self.get_random_action(opponent)
     elif not self.is_action_valid(player_action, opponent):
         verbose_report("invalid action from educated play")
         verbose_report("fallback to random play")
         player_action = self.get_random_action(opponent)
     else:
         verbose_report("valid action")
         if player_action.cost == 1:
             self.valid_count_action0 += 1
         elif player_action.cost == 2:
             self.valid_count_action1 += 1
     self.play_count += 1
     return player_action
示例#6
0
    def train_model(self, num_party=100, epochs=5):
        print("training model")

        VALID_BONUS = 0  # 200
        WIN_BONUS = 0  # 500
        LOSS_MALUS = 0  # -500
        INVALID_MALUS = 0  # -500

        # now execute the q learning
        # learning parameters
        y = 0.0  # 0.95
        eps = 0.9
        decay_factor = 0.999
        r_avg_list = []
        game = Game(self, StarterPlayer())
        opponent = game.get_opponent(0)
        for i in range(num_party):
            # display statistics
            if i % (num_party / 20) == 0:
                print("Episode {} of {}".format(i + 1, num_party))
                evaluate_strategy(self, [opponent], num_eval_game=100)
            s = game.reset()
            eps *= decay_factor
            r_sum = 0
            game_ended = False
            while not game_ended:
                # start a new turn of NN player
                self.reset_turn()
                current_state = self.get_state(opponent)
                target_vec = self.model.predict(np.array([current_state]))[0]
                if np.random.random() < eps:
                    verbose_report("greedy random")
                    # random input to implement epsilon-greedy policy
                    action0 = np.random.randint(0, 24)
                    action1 = np.random.randint(0, 24)
                    a = action0 + 24 * action1
                else:
                    a = np.argmax(target_vec)
                    verbose_report("a={}".format(a))
                    action0 = int(a % 24)
                    action1 = int(a / 24)
                card0_id = int(action0 % 6)
                action0_id = int(action0 / 6)
                card1_id = int(action1 % 6)
                action1_id = int(action1 / 6)

                action0_obj = self.get_action(card0_id, action0_id)
                action1_obj = self.get_action(card1_id, action1_id)
                # initial reward
                reward = 0
                remaining_action = 0
                invalid_play = True
                if action0_obj is None or action1_obj is None:
                    # at least one invalid card
                    reward = 0
                    remaining_action = 2
                else:
                    # valid cards
                    opponent = game.get_opponent(0)
                    if not self.is_action_valid(action0_obj, opponent):
                        # at least one invalid action
                        reward = 0
                        remaining_action = 2
                    else:
                        self.execute(action0_obj, opponent)
                        if not self.is_action_valid(action1_obj, opponent):
                            reward = 0
                            remaining_action = 1
                        else:
                            reward += VALID_BONUS
                            reward += self.evaluate_state(opponent)
                            self.execute(action1_obj, opponent)
                            reward += self.evaluate_state(opponent)
                            remaining_action = 0
                            invalid_play = False
                game_ended = False
                while not game_ended and remaining_action > 0:
                    # random play to bridge missing actions
                    game_ended = not self.random_play(game)
                    remaining_action -= 1

                if not game_ended and self.win_condition():
                    # reward bonus
                    reward += WIN_BONUS
                    game_ended = True

                if not game_ended:
                    # plays opponent turns
                    game_ended = not game.play_one_turn(1)
                    if game_ended:
                        # game stopped, opponent has lost
                        reward += WIN_BONUS
                        game_ended = True
                    elif opponent.win_condition():
                        # check if opponent win in this stage
                        # reward malus
                        reward += LOSS_MALUS
                        game_ended = True

                next_state = self.get_state(opponent)

                if invalid_play:
                    reward = 0
                    target = reward
                elif game_ended:
                    # no next state
                    target = reward
                else:
                    # valid and next state
                    target = reward + y * np.max(
                        self.model.predict(np.array([next_state])))
                verbose_report("    target[{}]={}".format(a, target))
                target_vec[a] = target
                self.model.fit(np.array([current_state]),
                               np.array([target_vec]),
                               epochs=1,
                               verbose=0)
                r_sum += reward

            r_avg_list.append(r_sum / 1000)