def update_board(self, game_state: GameState): assert isinstance(game_state, SpookState) super().update_board(game_state) valid_moves = game_state.get_valid_moves() volume = game_state.calculate_volume() is_pass_valid = bool(valid_moves[volume]) self.ui.pass_button.setVisible(is_pass_valid)
def analyse(self, board: GameState) -> typing.Tuple[float, np.ndarray]: policy = self.get_policy(board) player = board.get_active_player() if board.is_win(player): value = 1.0 elif board.is_win(-player): value = -1.0 else: value = 0.0 return value, policy
def analyse(self, board: GameState) -> typing.Tuple[float, np.ndarray]: if board.is_ended(): return self.analyse_end_game(board) outputs = self.model.predict(board.get_spaces().reshape( (1, self.board_height, self.board_width, 1))) policy = outputs[0, :-1] value = outputs[0, -1] return value, policy
def update_board(self, game_state: GameState): assert isinstance(game_state, SpaijiState) valid_moves = game_state.get_valid_moves() volume = game_state.calculate_volume() is_black_valid = valid_moves[:volume].any() is_white_valid = valid_moves[volume:].any() visible_move_types = [] if is_black_valid: visible_move_types.append(MoveType.BLACK) if is_white_valid: visible_move_types.append(MoveType.WHITE) self.visible_move_types = visible_move_types super().update_board(game_state)
def get_policy(self, board: GameState): valid_moves = board.get_valid_moves() if not valid_moves.any(): valid_moves = (valid_moves == 0) raw_policy = np.multiply(valid_moves, 0.9**np.arange(len(valid_moves))) policy = raw_policy / raw_policy.sum() return policy
def get_move_probabilities( self, game_state: GameState, limit: int = 10 ) -> typing.List[typing.Tuple[str, float, int, float]]: """ Report the probability that each move is the best choice. :param game_state: the starting position :param limit: the maximum number of moves to report :return: [(move_display, probability, value_count, avg_value)], where value_count is the number of times the value was probed from the move, and avg_value is the average value from all those probes. """ self.find_node(game_state) children = self.current_node.find_all_children() temperature = 1.0 probabilities = self.current_node.rank_children(children, temperature) value_counts = [child.value_count for child in children] ranked_children = sorted(zip(value_counts, probabilities, children), key=itemgetter(0), reverse=True) top_children = ranked_children[:limit] child_node: SearchNode top_moves = [(game_state.display_move(child_node.move), probability, value_count, child_node.average_value) for value_count, probability, child_node in top_children if child_node.move is not None] return top_moves
def analyse_end_game(self, board: GameState) -> typing.Tuple[float, np.ndarray]: """ Calculate the value based on the winner. :param board: state of a game, that must be in an ended game :return: 1 if the now active player won the game, -1 for a loss, and 0 for a draw, plus an evenly distributed policy (probably irrelevant) """ winner = board.get_winner() active_player = board.get_active_player() if winner == board.NO_PLAYER: value = 0 elif winner == active_player: value = 1 else: value = -1 return value, self.create_even_policy(board)
def update_board(self, game_state: GameState): assert isinstance(game_state, SpireState) move_types = game_state.get_valid_colours() self.visible_move_types = move_types if self.selected_move_type not in move_types: self.selected_move_type = move_types[0] super().update_board(game_state)
def get_policy(self, board: GameState): valid_moves = board.get_valid_moves() if valid_moves.any(): first_valid = np.nonzero(valid_moves)[0][0] else: first_valid = 0 policy = np.zeros_like(valid_moves) policy[first_valid] = 1.0 return policy
def choose_move(self, game_state: GameState) -> int: """ Choose a move for the given board. :param game_state: the current state of the game. :return: the chosen move's index in the list of valid moves. """ self.search_manager.search(game_state, self.iteration_count) if game_state.get_move_count() < 15: return self.search_manager.choose_weighted_move() return self.search_manager.get_best_move()
def create_even_policy(board: GameState) -> np.ndarray: """ Create an evenly distributed policy across valid moves. If there are no valid moves, distribute across all entries. """ child_valid_flags = board.get_valid_moves() valid_count = child_valid_flags.sum() if valid_count: child_predictions = child_valid_flags / valid_count else: child_predictions = (child_valid_flags + 1) / child_valid_flags.size return child_predictions
def __init__(self, start_state: GameState): if not isinstance(start_state, GridGameState): raise ValueError(f'{start_state.__class__} is not a subclass of GridGameState.') super().__init__() # start_state params self.board_height = start_state.board_height self.board_width = start_state.board_width self.action_size = len(start_state.get_valid_moves()) self.epochs_completed = 0 self.epochs_to_train = 100 self.start_state = start_state args = Namespace(lr=0.001, dropout=0.3, epochs=10, batch_size=64, num_channels=512) self.checkpoint_name = 'random weights' self.args = args num_channels = 512 kernel_size = [3, 3] dropout = 0.3 model = Sequential() # regularizer = regularizers.l2(0.00006) regularizer = regularizers.l2(0.0001) model.add(Conv2D(num_channels, kernel_size, padding='same', activation='relu', input_shape=(self.board_height, self.board_width, 1), activity_regularizer=regularizer)) model.add(Conv2D(num_channels, kernel_size, padding='same', activation='relu', activity_regularizer=regularizer)) model.add(Conv2D(num_channels, kernel_size, activation='relu', activity_regularizer=regularizer)) model.add(Conv2D(num_channels, kernel_size, activation='relu', activity_regularizer=regularizer)) model.add(Dropout(dropout)) model.add(Dropout(dropout)) model.add(Flatten()) model.add(Dense(self.action_size + 1)) model.compile('adam', 'mean_squared_error') self.model = model
def analyse_move(self, game_state: GameState, analysing_player: int, move_probabilities: typing.List[typing.Tuple[str, float, int, float]]): """ Record analysis of the choices for a move. :param game_state: the state before the move :param analysing_player: the player doing the analysis; if both players report analysis, the active player's will be kept :param move_probabilities: the detailed analysis of best valid moves [(choice, probability, count, value)] where choice is the move display, probability is the recommended probability of choosing that move, count is the number of times the move or a descendant was analysed during the search, and value is the estimated value of the move, where 1 is 100% wins for the active player and -1 is 100% losses. """ for item in reversed(self.items): if item.game_state == game_state: break else: raise ValueError('Board not found in log.') active_player = game_state.get_active_player() if item.choices and active_player != analysing_player: return item.choices = move_probabilities q = Decimal('0.1') for i, (choice, probability, count, value) in enumerate(move_probabilities, 1): dec_value = Decimal(value).quantize(q) if choice == item.move_text: item.comment = f'{dec_value}' if i != 1: item.comment += f', choice {i}' break else: # Didn't find chosen move in analysed moves, probably a poor choice. item.comment = '?'
def record_move(self, game_state: GameState, move: int): self.step += 1 player = game_state.display_player(game_state.get_active_player()) move_text = game_state.display_move(move) self.items.append(LogItem(self.step, player, move_text, game_state))
def update_board(self, game_state: GameState): assert isinstance(game_state, ShibumiGameState) self.current_state = game_state valid_moves = game_state.get_valid_moves() is_ended = game_state.is_ended() pixmaps = { game_state.BLACK: self.black_scaled, game_state.WHITE: self.white_scaled, game_state.RED: self.red_scaled, game_state.NO_PLAYER: self.black_scaled } if self.show_move_types: move_type = self.selected_move_type else: move_type = MoveType.BLACK for level, item_level in zip(game_state.get_levels(), self.item_levels): for row_pieces, row_items in zip(level, item_level): piece_item: GraphicsShibumiPieceItem for piece, piece_item in zip(row_pieces, row_items): move_index = game_state.get_index(piece_item.height, piece_item.row, piece_item.column, move_type) is_valid = valid_moves[move_index] and not is_ended piece_item.setVisible( bool(piece != game_state.NO_PLAYER or is_valid)) piece_item.hover_listener = self if is_valid else None pixmap = pixmaps[piece] piece_item.setPixmap(pixmap) piece_item.setOpacity(0.001 if piece == game_state.NO_PLAYER else 1) if piece_item == self.hovered_piece: if is_valid: piece_item.setOpacity(0.5) else: self.hovered_piece = None displayed_player: typing.Optional[int] = None if is_ended: if game_state.is_win(game_state.WHITE): self.update_move_text('wins') displayed_player = game_state.WHITE elif game_state.is_win(game_state.BLACK): self.update_move_text('wins') displayed_player = game_state.BLACK elif game_state.is_win(game_state.RED): self.update_move_text('wins') displayed_player = game_state.RED else: self.update_move_text('draw') else: displayed_player = game_state.get_active_player() self.update_move_text(self.choose_active_text()) if displayed_player == game_state.WHITE: self.ui.player_pixmap.setPixmap(self.white_pixmap) elif displayed_player == game_state.BLACK: self.ui.player_pixmap.setPixmap(self.black_pixmap) elif displayed_player == game_state.RED: self.ui.player_pixmap.setPixmap(self.red_pixmap) if displayed_player is not None: displayed_player = int(displayed_player) self.ui.player_pixmap.setVisible( displayed_player != game_state.NO_PLAYER) if self.show_counts: self.update_count_text()
def update_board(self, game_state: GameState): player = game_state.get_active_player() player_move = MoveType(player) self.visible_move_types = (player_move, MoveType.RED) super().update_board(game_state)
def on_game_ended(self, game_state: GameState): if (self.is_history_dirty or self.display is None or self.ui.searches_lock1.isChecked()): return db_session = self.db_session if db_session is None: return game_record = GameRecord.find_or_create(db_session, game_state) game_end_time = datetime.now() game_duration = game_end_time - self.game_start_time match_record = MatchRecord(game=game_record, start_time=self.game_start_time, total_seconds=round( game_duration.total_seconds()), move_count=game_state.get_move_count()) db_session.add(match_record) winner = game_state.get_winner() mcts_player: typing.Optional[MctsPlayer] for player_number in game_state.get_players(): mcts_player = self.display.get_player(player_number) if mcts_player is None: player_record = db_session.query(PlayerRecord).filter_by( type=PlayerRecord.HUMAN_TYPE).one_or_none() if player_record is None: player_record = PlayerRecord(type=PlayerRecord.HUMAN_TYPE) db_session.add(player_record) else: player_record = db_session.query(PlayerRecord).filter_by( type=PlayerRecord.PLAYOUT_TYPE, iterations=mcts_player.iteration_count).one_or_none() if player_record is None: player_record = PlayerRecord( type=PlayerRecord.PLAYOUT_TYPE, iterations=mcts_player.iteration_count) db_session.add(player_record) if player_number == winner: result = 1 elif winner == game_state.NO_PLAYER: result = 0 else: result = -1 match_player = MatchPlayerRecord(match=match_record, player=player_record, player_number=player_number, result=result) db_session.add(match_player) db_session.commit() try: mcts_player, = self.display.mcts_players except ValueError: # Didn't have exactly one MCTS player return assert mcts_player is not None winning_player = game_state.get_winner() if winning_player == mcts_player.player_number: score = -1 elif winning_player == GameState.NO_PLAYER: score = 0 else: score = 1 settings = get_settings(self.start_state) strength_adjuster = StrengthAdjuster( strength=mcts_player.iteration_count, game_count=settings.value('game_count', 0, int), last_score=settings.value('last_score', 0, int), streak_length=settings.value('streak_length', 1, int)) strength_adjuster.record_score(score) settings.setValue('searches', strength_adjuster.strength) settings.setValue('game_count', strength_adjuster.game_count) settings.setValue('last_score', strength_adjuster.last_score) settings.setValue('streak_length', strength_adjuster.streak_length)