def move(self, board: Board) -> (GameResult, bool): """ Makes a move on the given input state :param board: The current state of the game :return: The GameResult after this move, Flag to indicate whether the move finished the game """ self.board_position_log.append(board.state.copy()) nn_input = self.board_state_to_nn_input(board.state) probs = self.get_valid_probs([nn_input], [board]) probs = probs[0] # Most of the time our next move is the one with the highest probability after removing all illegal ones. # Occasionally, however we randomly chose a random move to encourage exploration if (self.training is True) and \ (self.game_counter < self.pre_training_games): move = board.random_empty_spot() else: if np.isnan(probs).any(): # Can happen when all probabilities degenerate to 0. Best thing we can do is # make a random legal move move = board.random_empty_spot() else: move = np.random.choice(np.arange(len(probs)), p=probs) if not board.is_legal(move): # Debug case only, I hope print("Illegal move!") # We record the action we selected as well as the Q values of the current state for later use when # adjusting NN weights. self.action_log.append(move) _, res, finished = board.move(move, self.side) return res, finished
def move(self, board: Board) -> (GameResult, bool): """ Implements the Player interface and makes a move on Board `board` :param board: The Board to make a move on :return: A tuple of the GameResult and a flag indicating if the game is over after this move. """ # We record all game positions to feed them into the NN for training with the corresponding updated Q # values. self.board_position_log.append(board.state.copy()) nn_input = self.board_state_to_nn_input(board.state) probs, _ = self.get_valid_probs([nn_input], self.q_net, [board]) probs = probs[0] # Most of the time our next move is the one with the highest probability after removing all illegal ones. # Occasionally, however we randomly chose a random move to encourage exploration if (self.training is True) and \ ((self.game_counter < self.pre_training_games) or (np.random.rand(1) < self.random_move_prob)): move = board.random_empty_spot() else: move = np.argmax(probs) # We record the action we selected as well as the Q values of the current state for later use when # adjusting NN weights. self.action_log.append(move) # We execute the move and return the result _, res, finished = board.move(move, self.side) return res, finished
def get_move(self, board: Board) -> int: """ Return the next move given the board `board` based on the current values of next states :param board: The current board state :return: The next move based on the current values of next states, starting from input state """ if self.move_strategy == MoveStrategy.EXPLORATION: # exploratory random move m = board.random_empty_spot() _ = self.get_v( board) # just to ensure we have values for our board state return m else: # greedy move: exploiting current knowledge vals = self.get_v(board) # type: np.ndarray while True: maxv_idxs = np.argwhere( vals == np.amax(vals)) # positions of max values in array m = np.random.choice(maxv_idxs.flatten().tolist()) # type: int #m = np.argmax(vals) # type: int # this instead would return 1st occurance if board.is_legal(m): # print("vals=", end='') # print(vals) # print("m={}".format(m)) return m else: vals[m] = -1.0
def move(self, board: Board) -> (GameResult, bool): """ Making a random move :param board: The board to make a move on :return: The result of the move """ _, res, finished = board.move(board.random_empty_spot(), self.side) return res, finished
def play_random_game(): board = Board() finished = False last_play = NAUGHT next_play = CROSS while not finished: _, result, finished = board.move(board.random_empty_spot(), next_play) print_board(board) last_play, next_play = next_play, last_play if result == GameResult.DRAW: print("Game is a draw") elif last_play == CROSS: print("Cross won!") else: print("Naught won!")
def move(self, board: Board) -> (GameResult, bool): """ Implements the Player interface and makes a move on Board `board` :param board: The Board to make a move on :return: A tuple of the GameResult and a flag indicating if the game is over after this move. """ # We record all game positions to feed them into the NN for training with the corresponding updated Q # values. self.board_position_log.append(board.state.copy()) nn_input = self.board_state_to_nn_input(board.state) probs, qvalues = self.get_probs(nn_input) qvalues = np.copy(qvalues) # We filter out all illegal moves by setting the probability to 0. We don't change the q values # as we don't want the NN to waste any effort of learning different Q values for moves that are illegal # anyway. for index, p in enumerate(qvalues): if not board.is_legal(index): probs[index] = -1 elif probs[index] < 0: probs[index] = 0.0 # Most of the time our next move is the one with the highest probability after removing all illegal ones. # Occasionally, however we randomly chose a random move to encourage exploration if (self.training is True) and (np.random.rand(1) < self.random_move_prob): move = board.random_empty_spot() else: move = np.argmax(probs) # Unless this is the very first move, the max Q value of this state is also the max Q value of # the move that got the game from the previous state to this one. if len(self.action_log) > 0: self.next_max_log.append(qvalues[np.argmax(probs)]) # We record the action we selected as well as the Q values of the current state for later use when # adjusting NN weights. self.action_log.append(move) self.values_log.append(qvalues) # We execute the move and return the result _, res, finished = board.move(move, self.side) return res, finished