def play_to_train(self, Vo, Vx, n_iterations=10000, initial_player=Board.x_symbol): ''' Train :param Vo: the value functions of player 'o' (is a dictionary). Example: Vo[1000010000] = 0.35, where 1000010000 is the id of the state, 0.35 is the value function of the state 1000010000. :param Vx: the value functions of player 'x' (is a dictionary) :param n_iterations: the number of times we play :return: ''' assert (initial_player == Board.x_symbol or initial_player == Board.o_symbol) assert (len(Vx) == len(Vo)) assert (n_iterations > 0) current_turn = initial_player for iteration in range(n_iterations): if iteration % 1000 == 0: print(f'iteration = {iteration}') b = Board() board = b.board current_Vx = [] current_Vo = [] current_id_states = [] draw = False game_over = False while not draw and not game_over: # print('++++++++++++++++++++++++++++++') # retrieve all empty cells empty_cells = [] for i in range(b.height): for j in range(b.width): if board[i, j] == 0: empty_cells.append([i, j]) # print(f'Empty cells = {empty_cells}') # just choose one empty cell to play if len(empty_cells) > 0: played_cell = np.random.choice(len(empty_cells)) board[empty_cells[played_cell][0], empty_cells[played_cell][1]] = b.convert_turn_symbol2id( current_turn) # print(f'Play {current_turn} on {empty_cells[played_cell]}') # update states state = b.get_state() current_id_states.append(state) current_Vx.append(self.AVERAGE_REWARD) current_Vo.append(self.AVERAGE_REWARD) game_over = b.is_game_over() # print(f'Over: {over}') # print(f'Winner: {b.winner}') # print(f'State id: {b.get_state()}') # update turn if current_turn == Board.x_symbol: current_turn = Board.o_symbol else: current_turn = Board.x_symbol else: draw = True # b.draw() # update the value function of states if game_over: winner = b.get_winner() if winner == Board.x_symbol: current_Vx[-1] = self.HIGHEST_REWARD current_Vo[-1] = self.LOWEST_REWARD elif winner == Board.o_symbol: current_Vx[-1] = self.LOWEST_REWARD current_Vo[-1] = self.HIGHEST_REWARD Vx = self.update_value_function(Vx, current_id_states, current_Vx) Vo = self.update_value_function(Vo, current_id_states, current_Vo) else: # print('Draw') pass # change the turn in the next play # 50% the game starts with the player 1 # 50% the game starts with the player 2 if current_turn == Board.x_symbol: current_turn = Board.o_symbol else: current_turn = Board.x_symbol
def play_with_machine(V_machine, human=Board.x_symbol, machine=Board.o_symbol): ''' :param V: value functions of machine :param human: 'x' or 'o' :param machine: 'o' or 'x' :return: ''' current_turn = human b = Board() board = b.board states = [] states_reward = [] while not b.is_game_over(): if current_turn == human: print(f'>>> Human turn ({human})') else: print(f'>>> Machine turn ({machine})') if current_turn == human: # ask human for move available_move = False while not available_move: print('Let choose the cell you want to play (e.g., 1 2): ') inp = input().split(' ') row = int(inp[0]) col = int(inp[1]) if row > b.height or col > b.width or row < 0 or col < 0: available_move = False print('Wrong move!') elif board[row, col] == Board.empty_id: board[row, col] = b.convert_turn_symbol2id(current_turn) available_move = True else: available_move = False print('Wrong move!') else: # find potential moves potential_states = [] potential_moves = [] for i in range(b.height): for j in range(b.width): if board[i, j] == b.empty_id: board[i, j] = b.convert_turn_symbol2id(current_turn) potential_states.append(b.get_state()) potential_moves.append([i, j]) board[i, j] = b.empty_id # find the best move if len(potential_moves) > 0: best_move_value = 0 best_move = [] #print(f'potential_moves = {potential_moves}') for idx, state in enumerate(potential_states): print(f'potential move {potential_moves[idx]} = {np.round(V_machine[potential_states[idx]],2)}') for move, state in zip(potential_moves, potential_states): if V_machine[state] > best_move_value: best_move_value = V_machine[state] best_move = move # play print(f'best move: {best_move}') board[int(best_move[0]), int(best_move[1])] = b.convert_turn_symbol2id(current_turn) # store the state of the board states.append(b.get_state()) states_reward.append(Training.AVERAGE_REWARD) # change turn if current_turn == human: current_turn = machine else: current_turn = human b.draw_board() print(f'Winner = {b.winner_symbol}')