示例#1
0
def tree_strap_train(θo, θd, θm, θe, depth=TRAIN_DEPTH):
    state = State()
    #memoised_features = {} if MULTI else None

    memoised_features = {}
    random_turns = np.random.choice([0] * 0 + [2] * 0 + [6] * 2 + [8] * 4 +
                                    [16] * 4 + [32] * 8)
    # See if each player will use book
    X_use_book = np.random.choice([0, 0, 0, 1])
    O_use_book = np.random.choice([0, 0, 0, 1])

    while (not state.training_terminal_test()):
        print(f'Turn number {state.turn}')
        print(state)
        print()
        if state.stage[0] == OPN:
            θ = θo
        elif state.stage[0] == DEV:
            θ = θd
        elif state.stage[0] == MID:
            θ = θm
        else:
            θ = θe
            #depth = 2*TRAIN_DEPTH

        if ((state.turn % 2 and X_use_book) or
            (not state.turn % 2 and O_use_book)) and (str(state.board)
                                                      in opening_book):
            state = state.result(tuple(opening_book[str(state.board)]))

        elif state.turn < random_turns:
            num_actions = len(state.actions(False))
            state = state.result(
                state.actions(False)[np.random.choice(
                    [i for i in range(num_actions)])])
        else:
            if MULTI:
                searched_states = set()
                V = speedy_minimax(state,
                                   depth,
                                   θ,
                                   searched_states,
                                   first=True,
                                   memoised_states=memoised_features)[0]
            elif not AB_TRAIN:
                searched_states = []
                V = negamax(state, -10 * INF, 10 * INF, depth, θ,
                            memoised_features)

            if AB_TRAIN:
                searched_states = []
                alpha_beta_train(state, θ, searched_states, TRAIN_DEPTH,
                                 memoised_features)
                ab_weight_updates(searched_states, θ, depth, α, λ, MAX_CHANGE)
            else:
                Δθ = np.zeros(num_features)
                #for s, vs, hs, features, d in searched_states:
                #    # updates should only happen for states that match the player to play
                #    if not d % 2:
                #        features = np.frombuffer(features)
                #        #𝛿 = V(s) - H(features, θ)
                #        𝛿 = vs - hs
                #        Δθ += α*𝛿*features*λ**(depth-d)
                if V != 0:
                    features = Φ(state, memoised_features)
                    h = H(features, θ)
                    𝛿 = V - h
                    Δθ += α * 𝛿 * features

                for i in range(num_features):
                    if Δθ[i] > MAX_CHANGE:
                        Δθ[i] = MAX_CHANGE
                    elif Δθ[i] < -MAX_CHANGE:
                        Δθ[i] = -MAX_CHANGE
                θ += Δθ

            best_action = None
            alpha, beta, v = -4 * INF, 4 * INF, -4 * INF
            for a in state.actions():
                child = state.result(a)
                nmax = -negamax(child, -beta, -alpha, depth - 1, θ,
                                memoised_features)
                if nmax > alpha:
                    alpha = nmax
                    best_action = a

            state = state.result(best_action)
            print(alpha)

    print('Terminal State:')
    print(state)
    memoised_features = None
    gc.collect()
    return θo, θd, θm, θe
示例#2
0
def play(θo, θm, θe, depth=TRAIN_DEPTH):
    OPN, MID, END = 0, 1, 2
    state = State()

    first = np.random.choice([0, 1])

    random_turns = 0  #np.random.choice([0] + [2]*2 + [4]*4 + [8]*8 + 16*[16] + 32*[32])
    while (not state.terminal_test()):
        print(f'Turn number {state.turn}')
        print_board(state.board)
        print()

        if (state.turn + first) % 2:
            if state.board[state.board > 0].sum() == 12:
                θ = θo
            elif state.board[state.board > 0].sum() > 5:
                θ = θm
            else:
                θ = θe

            state.history[state] += 1

            if state.turn < random_turns:
                num_actions = len(state.actions(False))
                state = state.result(
                    state.actions(False)[np.random.choice(
                        [i for i in range(num_actions)])])
            else:
                searched_states = []
                V = minimax(State(state.board), depth, θ, searched_states)

                Δθ = np.zeros(num_features)
                for s, vs, hs, features, d in searched_states:
                    #𝛿 = V(s) - H(features, θ)
                    𝛿 = vs - hs
                    Δθ += α * 𝛿 * features * λ**(depth - d)

                for i in range(num_features):
                    if Δθ[i] > MAX_CHANGE:
                        Δθ[i] = MAX_CHANGE
                    elif Δθ[i] < -MAX_CHANGE:
                        Δθ[i] = -MAX_CHANGE
                θ += Δθ

                actions = []
                actions2 = []
                for a in state.actions():
                    child = state.result(a)
                    actions.append((-negamax(State(-1 * child.board), -INF,
                                             INF, depth - 1, θ), a))

                state = state.result(max(actions)[1])
        else:
            print(actions_with_indices(translate_actions(state.actions())))
            i = int(input())
            state = state.result(state.actions()[i])

        state.board *= -1
        state.turn += 1
    print(state)
    print('Game over!')
    return θo, θm, θe
示例#3
0
    while (not state.terminal_test()):
        print(f'Turn number {state.turn}')
        print(state)
        print()

        if state.board[state.board > 0].sum() == 12:
            θ = θo
        elif state.board[state.board > 0].sum() > 5: 
            θ = θm
        else:
            θ = θe

        state.history[state] += 1

        if state.turn < random_turns:
            num_actions = len(state.actions(False))
            state = state.result(state.actions(False)[np.random.choice([i for i in range(num_actions)])])
        else:
            searched_states = []
            V = minimax(State(state.board), depth, θ, searched_states)

            Δθ = np.zeros(num_features)
            for s, vs, hs, features, d in searched_states:
                #� = V(s) - H(features, θ)
                � = vs - hs
                Δθ += α*�*features*λ**(depth-d)
                #s.board *= -1
                #flipped_features = Φ(s)
                #� = -(vs - hs) THIS IS ALL WRONG BTW, RECALCULATE V AND H
                #Δθ += α*�*flipped_features*λ**(depth-d)