示例#1
0
    def update(self, state, action):

        print("ENVIRONMENT UPDATE: ", state, action)

        # given the current state, determine reward and next state
        next_state = board.add_move(action['player'], action['cell'], state)

        reward = 0.0
        if play.is_winner(next_state, action['player']):
            reward = 1.0
            self.is_complete = True
            self.outcome = action['player'] + "_WINNER"
        elif play.is_cat_game(next_state):
            self.is_complete = True
            self.outcome = "CAT_GAME"

        if not self.is_complete:
            # OTHERWISE, let the OTHER player play
            opponent_id, opponent_cell = self.opponent.pick_next_move(
                next_state)
            next_state = board.add_move(opponent_id, opponent_cell, next_state)

            if play.is_winner(next_state, opponent_id):
                reward = -1.0
                self.is_complete = True
                self.outcome = opponent_id + "_WINNER"
            elif play.is_cat_game(next_state):
                self.is_complete = True
                self.outcome = "CAT_GAME"

        return next_state, reward
示例#2
0
def model_environment(opponent, state, action):

    game_complete = False
    initial_board = state

    file.write("AGENT MAKING MOVE: " + str(action) + str(board.to_state(action)) + "\n")

    current_board = p.add_move('X',action,initial_board)

    print("AFTER AGENT MOVE:")
    print(game.to_display_string(current_board))

    file.write("AFTER AGENT MOVE:\n")
    file.write(game.to_display_string(current_board))

    reward = 0.0

    if p.is_winner(current_board,'X'):
        game_complete = True
        reward = 1.0
    elif p.is_cat_game(current_board):
        game_complete = True
        reward = 0.0

    if not game_complete:
        # let the opponent make a move ...
        (opponent_id, opponent_move) = opponent.pick_next_move(current_board)

        current_board = p.add_move(opponent_id, opponent_move, current_board)

        print("AFTER OPPONENT MOVE")
        print(game.to_display_string(current_board))

        file.write("AFTER OPPONENT MOVE\n")
        file.write(game.to_display_string(current_board))

        if p.is_winner(current_board,opponent_id):
            game_complete = True
            reward = -1.0
        elif p.is_cat_game(current_board):
            game_complete = True
            reward = 0

    return current_board, reward, game_complete
示例#3
0
def play_game(p1, p2, file=None):

    board = p.empty_board()

    players = [p1, p2]
    current_player_index = 0
    winner = None
    move_count = 0
    while (True):

        print("Current move is for player: ", players[current_player_index].player)

        if (file is not None):
            file.write("PRIOR TO MOVE " + str(move_count) + " ------------\n")
            file.write(to_display_string(board))

        if p.is_cat_game(board):
            if (file is not None):
                file.write("RESULT IS CAT GAME")
            break

        m = players[current_player_index].pick_next_move(board)

        board = p.add_move(m[0], m[1], board)

        p.display_board(board)

        move_count += 1

        if p.is_winner(board, players[current_player_index].player):
            winner = players[current_player_index]
            if (file is not None):
                file.write("FINAL BOARD AFTER MOVE " + str(move_count) + " WINNER IS: " + winner.player + "\n")
                file.write(to_display_string(board))
            break

        # alternate players
        if (current_player_index == 0):
            print("Switching to player 1...")
            current_player_index = 1
        else:
            print("Switching to player 0...")
            current_player_index = 0


    if (winner is None):
        print("CAT GAME")
    else:
        print("WINNER IS PLAYER: ", winner.player)

    return winner
示例#4
0
    def get_reward(self, board, action):

        next_board = action['board']
        print("In REWARD given next board -----")
        p.display_board(next_board)

        if p.is_winner(next_board, self.player):
            return 1.0
        elif p.is_potential_loser_on_next_move(next_board, self.player):
            print("POTENTIAL LOSER BOARD: ")
            self.log.write("POTENTIAL LOSING BOARD -----\n")
            self.log.write(self.to_string(next_board))
            p.display_board(next_board)
            return -0.5
        else:
            return 0.0
示例#5
0
    def pick_next_move(self, current_board):
        # identify all available cells
        available_cells = []
        for row in p.get_rows(current_board):
            for col in p.get_cols(current_board):
                cell = (row, col)
                if (not p.is_occupied(current_board, cell)):
                    available_cells.append(cell)

        for cell in available_cells:
            tmp_board = board.add_move(self.player, cell, current_board)
            if p.is_winner(tmp_board, self.player):
                return self.player, cell

        # otherwise, pick move at random
        n = len(available_cells)
        inx_select = np.random.randint(n)
        return (self.player, available_cells[inx_select])