Пример #1
0
 def turn(self):
     letter, count = self.lettersAreNormal
     if count > 0 and self.words.EMPTY not in self.letters:
         return State.ChangeLetters(letter, count)
     else if count < 0:
         return State.LettersChange
     match0 = self.words.getMatches(0)
     try:
         word, pos, scores = max(self.wordsGenerator(), key = lambda x: x[2])
     except ValueError:
         pass # TODO
     try:
         wordWithCon, posOfWWC, ConWord, posOfC, scoresWC = self.board.BestConnectedWord(letters)
Пример #2
0
def negamax(state, alpha, beta, depth, θ):
    if state.terminal_test():
        return state.utility()
    if depth == 0:
        return H(Φ(state), θ)

    v = -INF
    for a in state.actions():
        child = state.result(a)
        # game state must be flipped
        v = max(v, -negamax(State(-1*child.board), alpha, beta, depth-1, θ))
        if v >= beta:
            return v
        alpha = max(alpha, v)
    return v
Пример #3
0
 def turn0(self):
     letter, count = self.lettersAreNormal
     if count > 0 and self.words.EMPTY not in self.letters:
         return State.ChangeLetters(letter, count)
     else if count < 0:
         return State.LettersChange
     try:
         word = min(
             self.words.getMatches(0),
             key = lambda x: return self.words.scores(x)
         )
         column = self.board.findBestInCenter(word)
         self.board.place(word, self.board.centerVertical, column)
         self.removeLetters(word)
         return State.SuccessfulTurn
     except ValueError:
         return State.LettersChange
Пример #4
0
 def turn(self):
     self.currentPlayer.letters += self.bag.get(
         7 - len(self.currentPlayer.letters)
     )
     state = self._turn()
     if state > 0:
         letter, count = State.Decode(state)
         self.removeLetters(letter * count)
         return True
     elif state == State.LettersChange:
         self.swapLetters()
         self.fail()
     elif state == State.SuccessfulTurn:
         self._fails = 0
         self.swapTurn()
     else: # State = Pass Turn
         self.fail()
     self.nextPlayer()
Пример #5
0
def play(θo, θm, θe, depth=TRAIN_DEPTH):
    OPN, MID, END = 0, 1, 2
    state = State()

    first = np.random.choice([0, 1])

    random_turns = 0  #np.random.choice([0] + [2]*2 + [4]*4 + [8]*8 + 16*[16] + 32*[32])
    while (not state.terminal_test()):
        print(f'Turn number {state.turn}')
        print_board(state.board)
        print()

        if (state.turn + first) % 2:
            if state.board[state.board > 0].sum() == 12:
                θ = θo
            elif state.board[state.board > 0].sum() > 5:
                θ = θm
            else:
                θ = θe

            state.history[state] += 1

            if state.turn < random_turns:
                num_actions = len(state.actions(False))
                state = state.result(
                    state.actions(False)[np.random.choice(
                        [i for i in range(num_actions)])])
            else:
                searched_states = []
                V = minimax(State(state.board), depth, θ, searched_states)

                Δθ = np.zeros(num_features)
                for s, vs, hs, features, d in searched_states:
                    #𝛿 = V(s) - H(features, θ)
                    𝛿 = vs - hs
                    Δθ += α * 𝛿 * features * λ**(depth - d)

                for i in range(num_features):
                    if Δθ[i] > MAX_CHANGE:
                        Δθ[i] = MAX_CHANGE
                    elif Δθ[i] < -MAX_CHANGE:
                        Δθ[i] = -MAX_CHANGE
                θ += Δθ

                actions = []
                actions2 = []
                for a in state.actions():
                    child = state.result(a)
                    actions.append((-negamax(State(-1 * child.board), -INF,
                                             INF, depth - 1, θ), a))

                state = state.result(max(actions)[1])
        else:
            print(actions_with_indices(translate_actions(state.actions())))
            i = int(input())
            state = state.result(state.actions()[i])

        state.board *= -1
        state.turn += 1
    print(state)
    print('Game over!')
    return θo, θm, θe
Пример #6
0
def tree_strap_train(θo, θd, θm, θe, depth=TRAIN_DEPTH):
    state = State()
    #memoised_features = {} if MULTI else None

    memoised_features = {}
    random_turns = np.random.choice([0] * 0 + [2] * 0 + [6] * 2 + [8] * 4 +
                                    [16] * 4 + [32] * 8)
    # See if each player will use book
    X_use_book = np.random.choice([0, 0, 0, 1])
    O_use_book = np.random.choice([0, 0, 0, 1])

    while (not state.training_terminal_test()):
        print(f'Turn number {state.turn}')
        print(state)
        print()
        if state.stage[0] == OPN:
            θ = θo
        elif state.stage[0] == DEV:
            θ = θd
        elif state.stage[0] == MID:
            θ = θm
        else:
            θ = θe
            #depth = 2*TRAIN_DEPTH

        if ((state.turn % 2 and X_use_book) or
            (not state.turn % 2 and O_use_book)) and (str(state.board)
                                                      in opening_book):
            state = state.result(tuple(opening_book[str(state.board)]))

        elif state.turn < random_turns:
            num_actions = len(state.actions(False))
            state = state.result(
                state.actions(False)[np.random.choice(
                    [i for i in range(num_actions)])])
        else:
            if MULTI:
                searched_states = set()
                V = speedy_minimax(state,
                                   depth,
                                   θ,
                                   searched_states,
                                   first=True,
                                   memoised_states=memoised_features)[0]
            elif not AB_TRAIN:
                searched_states = []
                V = negamax(state, -10 * INF, 10 * INF, depth, θ,
                            memoised_features)

            if AB_TRAIN:
                searched_states = []
                alpha_beta_train(state, θ, searched_states, TRAIN_DEPTH,
                                 memoised_features)
                ab_weight_updates(searched_states, θ, depth, α, λ, MAX_CHANGE)
            else:
                Δθ = np.zeros(num_features)
                #for s, vs, hs, features, d in searched_states:
                #    # updates should only happen for states that match the player to play
                #    if not d % 2:
                #        features = np.frombuffer(features)
                #        #𝛿 = V(s) - H(features, θ)
                #        𝛿 = vs - hs
                #        Δθ += α*𝛿*features*λ**(depth-d)
                if V != 0:
                    features = Φ(state, memoised_features)
                    h = H(features, θ)
                    𝛿 = V - h
                    Δθ += α * 𝛿 * features

                for i in range(num_features):
                    if Δθ[i] > MAX_CHANGE:
                        Δθ[i] = MAX_CHANGE
                    elif Δθ[i] < -MAX_CHANGE:
                        Δθ[i] = -MAX_CHANGE
                θ += Δθ

            best_action = None
            alpha, beta, v = -4 * INF, 4 * INF, -4 * INF
            for a in state.actions():
                child = state.result(a)
                nmax = -negamax(child, -beta, -alpha, depth - 1, θ,
                                memoised_features)
                if nmax > alpha:
                    alpha = nmax
                    best_action = a

            state = state.result(best_action)
            print(alpha)

    print('Terminal State:')
    print(state)
    memoised_features = None
    gc.collect()
    return θo, θd, θm, θe
Пример #7
0
from features import Φ, ALL_STACKS, RINGS, H
from ab_treestrap_train import alpha_beta_train, ab_weight_updates
from opening import opening_book
#from weight import weight1

# note multiprocessing was used for training only, not for play
from multiprocessing import Pool, Manager
import gc

MULTI = False
PROCESSES = 8

AB_TRAIN = True
TRAIN_DEPTH = 4

num_features = len(Φ(State(), {}))

α = 0.000001
λ = 0.5
MAX_CHANGE = 0.01


def tree_strap_train(θo, θd, θm, θe, depth=TRAIN_DEPTH):
    state = State()
    #memoised_features = {} if MULTI else None

    memoised_features = {}
    random_turns = np.random.choice([0] * 0 + [2] * 0 + [6] * 2 + [8] * 4 +
                                    [16] * 4 + [32] * 8)
    # See if each player will use book
    X_use_book = np.random.choice([0, 0, 0, 1])
Пример #8
0
def Φ(state, memoized_states={}):
    if state in memoized_states:
        return memoized_states[state]

    X, O = 1, 0
    board = state.board
    opp_b = State(-1 * board).board
    X_stacks = [(x, y) for x, y in ALL if board[x][y] > 0]
    O_stacks = [(x, y) for x, y in ALL if board[x][y] < 0]
    X_stacks_by_size = [[(x, y) for x, y in X_stacks
                         if board[x][y] == stack_size]
                        for stack_size in range(1, 13)]
    O_stacks_by_size = [[(x, y) for x, y in O_stacks
                         if board[x][y] == -stack_size]
                        for stack_size in range(1, 13)]

    def largest_connected_cluster(player):
        '''
        largest connected cluster in terms of number of stacks
        '''
        NORM = 12
        player_stacks = X_stacks.copy() if player == X else O_stacks.copy()
        colour = 1 if player == X else -1
        adj = [[-1, -1], [-1, 0], [-1, 1], [0, -1], [0, 1], [1, -1], [1, 0],
               [1, 1]]

        largest_connected_cluster = 0
        num_stacks = len(player_stacks)

        while len(player_stacks) > 0:
            cur_piece = player_stacks[0]
            x, y = cur_piece
            are_adj = set()
            checked_adj = set()
            are_adj.add((x, y))
            while len(are_adj) > len(checked_adj):
                x, y = are_adj.difference(checked_adj).pop()
                for d in adj:
                    dx, dy = x + d[0], y + d[1]
                    if 0 <= dx < 8 and 0 <= dy < 8:
                        if board[dx][dy] * colour > 0:
                            are_adj.add((dx, dy))
                checked_adj.add((x, y))
                player_stacks.remove((x, y))
            if len(are_adj) > largest_connected_cluster:
                largest_connected_cluster = len(are_adj)
                if largest_connected_cluster >= num_stacks / 2:
                    return largest_connected_cluster / NORM

        return largest_connected_cluster / NORM

    def largest_almost_connected_cluster_stacks(player, num_pieces=False):
        ''' 
        number of stacks (opt pieces) in an extended cluster
        (vulnerable to one opposing stack in the right spot)
        '''
        NORM = 12
        player_stacks = X_stacks.copy() if player == X else O_stacks.copy()
        max_lost = 0
        adj = [[-1, -1], [-1, 0], [-1, 1], [0, -1], [0, 1], [1, -1], [1, 0],
               [1, 1]]

        check_spots = set()
        [
            check_spots.add((x + dx, y + dy)) for x, y in player_stacks
            for dx, dy in adj if 0 <= x + dx <= 7 and 0 <= y +
            dy <= 7 and [x + dx, y + dy] not in player_stacks
        ]
        if num_pieces:
            starting_pieces = pieces(player)
        else:
            starting_stacks = stacks(player)

        for x, y in check_spots:
            result = state.result(('BOOM', (x, y)))
            if num_pieces:
                lost = starting_pieces - pieces(player, result.board)
            else:
                lost = starting_stacks - stacks(player, board=result.board)
            if lost > max_lost:
                max_lost = lost
        return max_lost / NORM

    def largest_almost_connected_cluster_pieces(player, num_pieces=False):
        ''' 
        number of pieces in an extended cluster
        (vulnerable to one opposing stack in the right spot)
        '''
        # other function is already normalised correctly
        NORM = 1
        return largest_almost_connected_cluster_stacks(player, True) / NORM

    def piece_centrality(player, ring):
        ''' Overall centrality in terms of where pieces are '''
        NORM = 12
        # player_stacks is a list of (x, y) of each stack
        player_stacks = X_stacks if player == X else O_stacks
        colour = 1 if player == X else -1

        count = 0
        for pos in player_stacks:
            if pos in ring:
                count += board[pos[0]][pos[1]]
        return count * colour / NORM

    def stack_centrality(player, ring):
        ''' Overall centrality in terms of where stacks are (ignores that bigger stacks have more pieces) '''
        NORM = 12
        # player_stacks is a list of (x, y) of each stack
        player_stacks = X_stacks if player == X else O_stacks

        count = 0
        for pos in player_stacks:
            if pos in ring:
                count += 1
        return count / NORM

    def spacing(player):
        NORM = 1
        pass

    def mobility(player):
        ''' How many different squares the player can move onto '''
        NORM = 1
        player_stacks = X_stacks if player == X else O_stacks
        colour = 1 if player == X else -1

        contribution = 0
        for x, y in player_stacks:
            stack_size = abs(board[x][y])
            num_spots = 0
            for dx in range(1, stack_size + 1):
                if 0 <= x + dx < 8 and ((board[x + dx][y] == 0) or
                                        (board[x + dx][y] * colour > 0)):
                    num_spots += 1
                if 0 <= x - dx < 8 and ((board[x - dx][y] == 0) or
                                        (board[x - dx][y] * colour > 0)):
                    num_spots += 1

            for dy in range(1, stack_size + 1):
                if 0 <= y + dy < 8 and ((board[x][y + dy] == 0) or
                                        (board[x][y + dy] * colour > 0)):
                    num_spots += 1
                if 0 <= y - dy < 8 and ((board[x][y - dy] == 0) or
                                        (board[x][y - dy] * colour > 0)):
                    num_spots += 1

            # contribution += num spots that piece can move / num_spaces it could move if free
            contribution += num_spots / (4 * stack_size)
        return contribution / len(player_stacks) / NORM

    def control(player):
        ''' Blowing up all pieces now, how many squares are touched '''
        NORM = 1
        player_stacks = X_stacks if player == X else O_stacks

    def control2(player):
        ''' Moving and then blowing up, how many squares are touched '''
        NORM = 1
        pass

    def best_trade(player):
        ''' piece advantage of best trade '''
        NORM = 11
        pass

    def av_cluster_size(player):
        NORM = 1
        pass

    def pieces(player, board=board):
        '''
        Returns the number of pieces on a board for the current player.
        Defaults to the board of the current state, can pass in a different board.
        '''
        NORM = 12
        if player == X:
            return board[board > 0].sum() / NORM
        return -board[board < 0].sum() / NORM

    def stacks(player, board=board):
        ''' 
        Takes a player and returns the number of stacks that player has INT 
        Defaults to the board of the current state, can pass in a different board.
        '''
        NORM = 12
        if player == X:
            return (board > 0).sum() / NORM
        return (board < 0).sum() / NORM

    def actions(player):
        ''' Returns the number of actions the player has INT'''
        NORM = 130
        if player == X:
            return len(State(board).actions()) / NORM
        return len(State(opp_b).actions()) / NORM

    def connectivity(player):
        NORM = 8
        player_stacks = X_stacks if player == X else O_stacks
        colour = 1 if player == X else -1
        adj = [[-1, -1], [-1, 0], [-1, 1], [0, -1], [0, 1], [1, -1], [1, 0],
               [1, 1]]

        count = 0
        s = set()
        for x, y in player_stacks:
            for d in adj:
                dx, dy = x + d[0], y + d[1]
                if 0 <= dx < 8 and 0 <= dy < 8:
                    if board[dx][dy] * colour > 0:
                        s.add((dx, dy))
        return len(s) / NORM

    def threat(player):
        NORM = 8
        player_stacks = X_stacks if player == X else O_stacks
        colour = 1 if player == X else -1
        adj = [[-1, -1], [-1, 0], [-1, 1], [0, -1], [0, 1], [1, -1], [1, 0],
               [1, 1]]

        count = 0
        s = set()
        for x, y in player_stacks:
            for d in adj:
                dx, dy = x + d[0], y + d[1]
                if 0 <= dx < 8 and 0 <= dy < 8:
                    if board[dx][dy] * colour < 0:
                        s.add((dx, dy))
        return len(s) / NORM

    def column_piece_count(player, column):
        ''' How many pieces are in the column '''
        NORM = 24
        col = []
        for row in range(8):
            col.append(board[row][column])
        col = np.array(col)
        if player == X:
            return col[col > 0].sum() / NORM
        return -col[col < 0].sum() / NORM

    def column_stack_count(player, column):
        ''' How many stacks of certain size are in the column '''
        NORM = 8
        col = []
        for row in range(8):
            col.append(board[row][column])
        col = np.array(col)

        if player == X:
            return (col > 0).sum() / NORM
        return (col < 0).sum() / NORM

    def av_stack_size(player):
        NORM = 12
        return pieces(player) / stacks(player) / NORM

    # Distance to opponent
    # Measure of closeness or spread
    # Board position
    # Closeness to centre

    f1s = [
        largest_connected_cluster,  #largest_almost_connected_cluster_stacks, largest_almost_connected_cluster_pieces,
        mobility,
        pieces,
        stacks,
        actions,
        connectivity,
        threat,
        av_stack_size
    ]
    f2s = [piece_centrality, stack_centrality]
    f3s = [column_piece_count, column_stack_count]

    features = [f(player) for f in f1s for player in [X, O]] + \
               [f(player, ring) for f in f2s for ring in RINGS for player in [X, O]] + \
               [f(player, col) for f in f3s for col in range(8) for player in [X, O]]
    diffs = []
    for i in range(0, len(features), 2):
        diffs.append(features[i] - features[i + 1])
    features = np.array(features + diffs)

    memoized_states[state] = features
    return features
Пример #9
0
 def actions(player):
     ''' Returns the number of actions the player has INT'''
     NORM = 130
     if player == X:
         return len(State(board).actions()) / NORM
     return len(State(opp_b).actions()) / NORM
Пример #10
0
from player import State, ALL, MOVE, INF
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from collections import defaultdict as dd
from features import Φ, ALL_STACKS, RINGS

TRAIN_DEPTH = 2

num_features = len(Φ(State()))

def H(features, θ):
    h = np.dot(features, θ)
    if h > 0.99*INF:
        return 0.99*INF
    if h < -0.99*INF:
        return -0.99*INF
    return h

α = 0.000001*3
λ = 0.5
MAX_CHANGE = 0.1
def tree_strap_train(θo, θm, θe, depth=TRAIN_DEPTH):
    OPN, MID, END = 0, 1, 2
    state = State()
    random_turns = np.random.choice([0] + [2]*2 + [4]*4 + [8]*8 + 16*[16] + 32*[32])
    while (not state.terminal_test()):
        print(f'Turn number {state.turn}')
        print(state)
        print()
Пример #11
0
from collections import defaultdict as dd
from features import Φ, ALL_STACKS, RINGS, H
from ab_treestrap_train import alpha_beta_train, ab_weight_updates
from opening import opening_book
from weight import weight1

from multiprocessing import Pool, Manager
import gc

MULTI = False
PROCESSES = 8

AB_TRAIN = True
TRAIN_DEPTH = 4

num_features = len(Φ(State()))

α = 0.000001
λ = 0.5
MAX_CHANGE = 0.01


def tree_strap_train(θo, θd, θm, θe, depth=TRAIN_DEPTH):
    state = State()
    #memoised_features = {} if MULTI else None

    memoised_features = {}
    random_turns = np.random.choice([0] * 0 + [2] * 0 + [6] * 2 + [8] * 4 +
                                    [16] * 4 + [32] * 8)
    # See if each player will use book
    X_use_book = np.random.choice([0, 0, 0, 1])
Пример #12
0
def startGamePvC():
    board = []
    player_1 = Player(1)
    player_2 = Player(2)
    p2_nodes = 0
    alg_choice = -1
    valid_choice = False

    while not valid_choice:
        print("Please choose an algorithm that the computer should use:")
        print("1 - Mini-Max")
        print("2 - Alpha-Beta")
        alg_choice = int(input())
        if alg_choice == 1:
            print(
                "\033[1;31;17mThe Computer\033[0m uses the Mini-Max Algorithm."
            )
            valid_choice = True
        elif alg_choice == 2:
            print(
                "\033[1;34;17mThe Computer\033[0m uses the Alpha-Beta Algorithm."
            )
            valid_choice = True
        else:
            print("Invalid choice. Please type 1 or 2.")
            valid_choice = False

    #num_stones = randint(1, Constants.MAX_STONES)
    num_stones = Constants.NUM_STONES
    initGame(board, player_1, player_2, num_stones)
    printBoard(board)

    rand = randint(0, 1)
    if rand == 0:
        print("\033[1;31;17mThe player\033[0m makes the first move.")
    else:
        print("\033[1;34;17mThe Computer\033[0m makes the first move.")

    while rand == 0:
        if isOutOfMoves(board, Constants.P1_PITS):
            print(
                "\033[1;31;17mThe player\033[0m is out of moves, skip to the Computer:"
            )
        else:
            pit_choice = -1
            pit_valid = False
            while not pit_valid:
                print(
                    "\033[1;31;17mThe player\033[0m's turn. Please choose a pit."
                )
                pit_choice = int(input())
                if isValidPit(board, pit_choice, Constants.P1_PITS):
                    pit_valid = True
                else:
                    print("Invalid choice, please try again.")

            print(
                "\033[1;31;17mThe player\033[0m chose to move the stones in pit #"
                + str(pit_choice) + ":")

            move = State(None, 0, pit_choice, board[pit_choice].getNumStones(),
                         None)
            tempBoard = []
            for i in range(len(board)):
                tempBoard.append(board[i].getNumStones())
            newBoard = player_1.updateBoard(move, tempBoard)
            move.setBoard(newBoard)
            updateBoard(board, move)
            printBoard(board)

        if isOutOfMoves(board, Constants.P1_PITS) and isOutOfMoves(
                board, Constants.P2_PITS):
            break

        if isOutOfMoves(board, Constants.P2_PITS):
            print(
                "\033[1;34;17mThe Computer\033[0m is out of moves, skip to the player:"
            )
        else:
            print("\033[1;34;17mThe Computer\033[0m's turn.")
            if alg_choice == 1:
                move, node_count = player_2.minimax_decision(board)
            elif alg_choice == 2:
                move, node_count = player_2.alpha_beta_search(board)
            p2_nodes = p2_nodes + node_count
            print(
                "\033[1;34;17mThe Computer\033[0m chose to move the stones in pit #"
                + str(move.getPitIndex()) + ":")
            updateBoard(board, move)
            printBoard(board)

        if isOutOfMoves(board, Constants.P1_PITS) and isOutOfMoves(
                board, Constants.P2_PITS):
            break

    while rand == 1:
        if isOutOfMoves(board, Constants.P2_PITS):
            print(
                "\033[1;34;17mThe Computer\033[0m is out of moves, skip to the player:"
            )
        else:
            print("\033[1;34;17mThe Computer\033[0m's turn.")
            if alg_choice == 1:
                move, node_count = player_2.minimax_decision(board)
            elif alg_choice == 2:
                move, node_count = player_2.alpha_beta_search(board)
            p2_nodes = p2_nodes + node_count
            print(
                "\033[1;34;17mThe Computer\033[0m chose to move the stones in pit #"
                + str(move.getPitIndex()) + ":")
            updateBoard(board, move)
            printBoard(board)

        if isOutOfMoves(board, Constants.P1_PITS) and isOutOfMoves(
                board, Constants.P2_PITS):
            break

        if isOutOfMoves(board, Constants.P1_PITS):
            print(
                "\033[1;31;17mThe player\033[0m is out of moves, skip to the Computer:"
            )
        else:
            pit_choice = -1
            pit_valid = False
            while not pit_valid:
                print(
                    "\033[1;31;17mThe player\033[0m's turn. Please choose a pit."
                )
                pit_choice = int(input())
                if isValidPit(board, pit_choice, Constants.P1_PITS):
                    pit_valid = True
                else:
                    print("Invalid choice, please try again.")

            print(
                "\033[1;31;17mThe player\033[0m chose to move the stones in pit #"
                + str(pit_choice) + ":")

            move = State(None, 0, pit_choice, board[pit_choice].getNumStones(),
                         None)
            tempBoard = []
            for i in range(len(board)):
                tempBoard.append(board[i].getNumStones())
            newBoard = player_1.updateBoard(move, tempBoard)
            move.setBoard(newBoard)
            updateBoard(board, move)
            printBoard(board)

        if isOutOfMoves(board, Constants.P1_PITS) and isOutOfMoves(
                board, Constants.P2_PITS):
            break

    p1_score = board[Constants.P1_POCKETS[0]].getNumStones() + board[
        Constants.P1_POCKETS[1]].getNumStones()
    p2_score = board[Constants.P2_POCKETS[0]].getNumStones() + board[
        Constants.P2_POCKETS[1]].getNumStones()

    if p1_score > p2_score:
        print("\033[1;31;17mThe Player\033[0m Won!")
    elif p2_score > p1_score:
        print("\033[1;34;17mThe Computer\033[0m Won!")
    else:
        print("It's a tie!")