def generate_one_card_training_data(num_games): """ generate training data using @p num_games parties simulation """ training_data = [] game = Game(RandomPlayer(), RandomPlayer()) for i in range(num_games): # simulating a new game game.reset() player_id = 0 while True: player = game.players[player_id] opponent = game.get_opponent(player_id) player_vector = stack_state_to_vector(player.stack_state) opponent_vector = stack_state_to_vector(opponent.stack_state) # generating input / expected for each card in hand for card in player.hand: is_valid = player.has_valid_play(opponent, card) input_vector = np.concatenate((player_vector, opponent_vector, value_to_one_hot(card.value))) output_vector = 1 if is_valid else 0 training_data.append(( input_vector, output_vector )) valid = game.play_one_turn(player_id) if not valid: break if game.players[player_id].win_condition(): verbose_report("player {} has won".format(player_id)) break # switch to next player player_id = 1 - player_id return training_data
def play_on_increasing(self, card): verbose_report("plays {} on increasing".format(card)) if self.is_valid_play_on_increasing(card): self.use_hand_card(card) self.increasing_list.append(card) else: raise ForbiddenPlay
def play_on_opponent_decreasing(self, opponent, card): verbose_report("plays {} on opponent decreasing".format(card)) if self.is_valid_play_on_opponent_decreasing(opponent, card): self.use_hand_card(card) opponent.decreasing_list.append(card.get_opp()) self.has_played_on_opp_this_turn = True else: raise ForbiddenPlay
def main_trainable_player(): parser = argparse.ArgumentParser(description="NN based IA for the Game") parser.add_argument("--num", type=int, default=100, help="number of party to simulate for NN training") parser.add_argument("--epochs", type=int, default=5, help="number of epoch to run for training") parser.add_argument("--save-file", type=str, default=None, help="NN weights will be saved to this file") parser.add_argument("--load-file", type=str, default=None, help="NN weights will be stored from this file (bypass training)") parser.add_argument("--skip-training", action="store_const", default=False, const=True, help="skip training phase") args = parser.parse_args() # train and evaluate model nn_player = TrainablePlayer() nn_player.build_model() if args.load_file: nn_player.model.load_weights(args.load_file) if not args.skip_training: nn_player.train_model(args.num, args.epochs) if args.save_file: nn_player.model.save_weights(args.save_file) print("evaluating NN during one game") # execute model on one game game = Game(RandomPlayer(), RandomPlayer()) # simulating a new game game.reset() player_id = 0 while True: player = game.players[player_id] opponent = game.get_opponent(player_id) # evaluating model print("\nnew evaluation") player.display_state(str(player_id)) nn_player.execute_model(player, opponent) # valid = game.play_one_turn(player_id) if not valid: break if game.players[player_id].win_condition(): verbose_report("player {} has won".format(player_id)) break # switch to next player player_id = 1 - player_id nn_player.evaluate_model()
def get_action_to_play(self, opponent): """ determine an action to play using the training NN """ player_action = self.educated_play(opponent) if player_action is None: verbose_report("invalid card from educated play") verbose_report("fallback to random play") player_action = self.get_random_action(opponent) elif not self.is_action_valid(player_action, opponent): verbose_report("invalid action from educated play") verbose_report("fallback to random play") player_action = self.get_random_action(opponent) else: verbose_report("valid action") if player_action.cost == 1: self.valid_count_action0 += 1 elif player_action.cost == 2: self.valid_count_action1 += 1 self.play_count += 1 return player_action
def train_model(self, num_party=100, epochs=5): print("training model") VALID_BONUS = 0 # 200 WIN_BONUS = 0 # 500 LOSS_MALUS = 0 # -500 INVALID_MALUS = 0 # -500 # now execute the q learning # learning parameters y = 0.0 # 0.95 eps = 0.9 decay_factor = 0.999 r_avg_list = [] game = Game(self, StarterPlayer()) opponent = game.get_opponent(0) for i in range(num_party): # display statistics if i % (num_party / 20) == 0: print("Episode {} of {}".format(i + 1, num_party)) evaluate_strategy(self, [opponent], num_eval_game=100) s = game.reset() eps *= decay_factor r_sum = 0 game_ended = False while not game_ended: # start a new turn of NN player self.reset_turn() current_state = self.get_state(opponent) target_vec = self.model.predict(np.array([current_state]))[0] if np.random.random() < eps: verbose_report("greedy random") # random input to implement epsilon-greedy policy action0 = np.random.randint(0, 24) action1 = np.random.randint(0, 24) a = action0 + 24 * action1 else: a = np.argmax(target_vec) verbose_report("a={}".format(a)) action0 = int(a % 24) action1 = int(a / 24) card0_id = int(action0 % 6) action0_id = int(action0 / 6) card1_id = int(action1 % 6) action1_id = int(action1 / 6) action0_obj = self.get_action(card0_id, action0_id) action1_obj = self.get_action(card1_id, action1_id) # initial reward reward = 0 remaining_action = 0 invalid_play = True if action0_obj is None or action1_obj is None: # at least one invalid card reward = 0 remaining_action = 2 else: # valid cards opponent = game.get_opponent(0) if not self.is_action_valid(action0_obj, opponent): # at least one invalid action reward = 0 remaining_action = 2 else: self.execute(action0_obj, opponent) if not self.is_action_valid(action1_obj, opponent): reward = 0 remaining_action = 1 else: reward += VALID_BONUS reward += self.evaluate_state(opponent) self.execute(action1_obj, opponent) reward += self.evaluate_state(opponent) remaining_action = 0 invalid_play = False game_ended = False while not game_ended and remaining_action > 0: # random play to bridge missing actions game_ended = not self.random_play(game) remaining_action -= 1 if not game_ended and self.win_condition(): # reward bonus reward += WIN_BONUS game_ended = True if not game_ended: # plays opponent turns game_ended = not game.play_one_turn(1) if game_ended: # game stopped, opponent has lost reward += WIN_BONUS game_ended = True elif opponent.win_condition(): # check if opponent win in this stage # reward malus reward += LOSS_MALUS game_ended = True next_state = self.get_state(opponent) if invalid_play: reward = 0 target = reward elif game_ended: # no next state target = reward else: # valid and next state target = reward + y * np.max( self.model.predict(np.array([next_state]))) verbose_report(" target[{}]={}".format(a, target)) target_vec[a] = target self.model.fit(np.array([current_state]), np.array([target_vec]), epochs=1, verbose=0) r_sum += reward r_avg_list.append(r_sum / 1000)