def evaluate_model_at_gamestate(gamestate, model): """ Evaluates the model at a given GameState :param gamestate: GameState: represents the state of the game :return: int: value, list: policy """ # Transform full board into 2 boards of 0s and 1s board = gamestate.board player1_board = np.array( [[0 if val == 0 or val == 2 else 1 for val in row] for row in board]) player2_board = np.array( [[0 if val == 0 or val == 1 else 1 for val in row] for row in board]) # Player is the player who created this state (NOT next) if other_player(gamestate.next_player) == 1: channels = [player1_board, player2_board] else: channels = [player2_board, player1_board] x_train = np.stack(channels).reshape(1, 2, 8, 8) value, policy = _evaluate(x_train, load_model(model)) value_np = value.detach().numpy( ) # value.cpu().detach().numpy() when running remotely policy_np = policy.detach().numpy( ) # policy.cpu().detach().numpy() when running remotely return value_np[0], np.exp(policy_np.flatten().reshape(8, 8))
def export_game_data(self, path): """ Appends game data to a json file. :param path: string: path to json file. """ game_meta = [ { "created_by": other_player(state[0].next_player), "board": state[0].board, "move_visits": state[1], "winloss": state[2] } for state in self.all_game_states ] try: with open(path) as infile: data = json.load(infile) data.extend(game_meta) print("TRAINING SET SIZE: {}".format(len(data))) with open(path, 'w') as outfile: json.dump(data, outfile) except: with open(path, 'w+') as outfile: json.dump(game_meta, outfile)
def promising(r, c): """ Check that piece at row r and column c was played by opponent. :param r: int: Represents the row. :param c: int: Represents the column. :return: bool. True if piece was placed by opponent, False otherwise. """ if in_bounds(r, c) and \ self.board[r][c] == other_player(action.player): return True return False
def store_game_result(self, result): """ Adds the result to each training example for this game. :param result: int: win/loss value for the game. """ for state in self.all_game_states: # Other player created this state, other player won if other_player(state[0].next_player) == result: state.append(1) # This player created this state, other player won elif state[0].next_player == result: state.append(0) else: state.append(0.5)
def game_result(self): """ Returns player who won (1 or 2) or None if winner is unknown. :return: int: winning player """ next_player_legal_moves = self.get_player_legal_moves(self.next_player) other_legal_moves = self.get_player_legal_moves( other_player(self.next_player)) # Neither player can make legal moves if not next_player_legal_moves and not other_legal_moves: total_next = sum(row.count(self.next_player) for row in self.board) total_other = sum( row.count(other_player(self.next_player)) for row in self.board) if total_next > total_other: return self.next_player elif total_other > total_next: return other_player(self.next_player) else: return 0 # At least one player can still make a legal move return None
def backpropagate(self, game_result): """ Updates visit and win values for all nodes in path. :param game_result: int: result of the game after rollout. """ self.visit_count += 1 # other_player would have created this state # Add to other player's wins if game_result == other_player(self.state.next_player): self.win_score += 1 # Game ended in a Draw elif game_result == 0: self.win_score += 0.5 # Backpropogate from parent node if self.parent: self.parent.backpropagate(game_result)
def move(self, action): """ Make a move and return updated game state. :param action: Move: represents a move :return: GameState: updated game state """ # If no move was made by player, copy state and switch player. # TODO: In future iterations, the "pass" move should be treated as # its own kind of move. Currently, the nn ignores passes and the mcts # doesn't backpropogate a pass. This could affect the model's loss # significantly if players are passing often and this information is # not being reflected. if action.row is None and action.col is None: new_state = copy.deepcopy(self.board) return GameState(other_player(action.player), new_state) if not self.move_is_legal(action): raise Exception("Illegal move") return None new_state = copy.deepcopy(self.board) new_state[action.row][action.col] = action.player # Checks that piece was placed by opponent def promising(r, c): """ Check that piece at row r and column c was played by opponent. :param r: int: Represents the row. :param c: int: Represents the column. :return: bool. True if piece was placed by opponent, False otherwise. """ if in_bounds(r, c) and \ self.board[r][c] == other_player(action.player): return True return False def flip_in_direction(direction): """ Flip opponent's pieces in all valid directions. :param direction: string: Represents the direction. One of: ["N", "S", "E", "W", "NE", "NW", "SE", "SW"] :return: GameState: Represents the updated GameState after flips. """ row, col = increment_row_col(action.row, action.col, direction) if promising(row, col): flips = [] while promising(row, col): flips.append([row, col]) row, col = increment_row_col(row, col, direction) if in_bounds(row, col) and self.board[row][col] == action.player: for location in flips: new_state[location[0]][location[1]] = action.player flip_in_direction("N") flip_in_direction("S") flip_in_direction("E") flip_in_direction("W") flip_in_direction("NE") flip_in_direction("NW") flip_in_direction("SE") flip_in_direction("SW") return GameState(other_player(action.player), new_state)