示例#1
0
                # Start Play state
                if b_:
                    gamestate = PLAY_STATE
                    ttt = Tictactoe()
                    time.sleep(0.1)
        # Play state
        elif gamestate == PLAY_STATE:

            # Mouse click
            if event.type == pygame.MOUSEBUTTONDOWN:
                point = event.pos

                # If user's trun
                if not ai_turn:

                    # If game is not over
                    if not ttt.gameover():
                        # Cells
                        b_ = True
                        for i in range(3):
                            for j in range(3):
                                if cells[i][j].collidepoint(point):
                                    if ttt.is_valid_action((i, j)):
                                        ttt.action((i, j))
                                        ai_turn = True
                                    break

                # Menu button
                if menu_button.collidepoint(point):
                    gamestate = MENU_STATE
示例#2
0
    def train(self, num_games):
        """
        Trains the agent by playing games against itself

        Args:
            num_games (int): number of games to train
        """

        # Play num_games games
        for n in range(num_games):

            # Print game number
            if n % 1000 == 0:
                print(f'Game #{n + 1}')

            # Initialize the game
            ttt = Tictactoe()

            # Keep track of last state and actions
            last = {
                'X': {
                    'state': None,
                    'action': None
                },
                'O': {
                    'state': None,
                    'action': None
                }
            }

            # Play the game
            while True:

                # Get the state and action
                state = ttt.get_board()
                action = self.best_action(state, epsilon_true=True)

                # Save as lasts
                last[ttt.get_player()]['state'] = state
                last[ttt.get_player()]['action'] = action

                # Apply action and get the new state
                ttt.action(action)
                new_state = ttt.get_board()

                # Game over
                if ttt.gameover():

                    # Won the game
                    if ttt.get_winner() is not None:

                        # Update q value for winner
                        self.update_q_value(state, action, new_state, 1)

                        # Update q value for loser
                        self.update_q_value(last[ttt.get_player()]['state'],
                                            last[ttt.get_player()]['action'],
                                            new_state, -1)

                    # Draw
                    else:
                        # Update q value
                        self.update_q_value(state, action, new_state, 0)

                    break

                # Game continues
                elif last[ttt.get_player()]['state'] is not None:

                    # Update last action
                    self.update_q_value(last[ttt.get_player()]['state'],
                                        last[ttt.get_player()]['action'],
                                        new_state, 0)

        print('Training done')