Пример #1
0
def play(θo, θm, θe, depth=TRAIN_DEPTH):
    OPN, MID, END = 0, 1, 2
    state = State()

    first = np.random.choice([0, 1])

    random_turns = 0  #np.random.choice([0] + [2]*2 + [4]*4 + [8]*8 + 16*[16] + 32*[32])
    while (not state.terminal_test()):
        print(f'Turn number {state.turn}')
        print_board(state.board)
        print()

        if (state.turn + first) % 2:
            if state.board[state.board > 0].sum() == 12:
                θ = θo
            elif state.board[state.board > 0].sum() > 5:
                θ = θm
            else:
                θ = θe

            state.history[state] += 1

            if state.turn < random_turns:
                num_actions = len(state.actions(False))
                state = state.result(
                    state.actions(False)[np.random.choice(
                        [i for i in range(num_actions)])])
            else:
                searched_states = []
                V = minimax(State(state.board), depth, θ, searched_states)

                Δθ = np.zeros(num_features)
                for s, vs, hs, features, d in searched_states:
                    #𝛿 = V(s) - H(features, θ)
                    𝛿 = vs - hs
                    Δθ += α * 𝛿 * features * λ**(depth - d)

                for i in range(num_features):
                    if Δθ[i] > MAX_CHANGE:
                        Δθ[i] = MAX_CHANGE
                    elif Δθ[i] < -MAX_CHANGE:
                        Δθ[i] = -MAX_CHANGE
                θ += Δθ

                actions = []
                actions2 = []
                for a in state.actions():
                    child = state.result(a)
                    actions.append((-negamax(State(-1 * child.board), -INF,
                                             INF, depth - 1, θ), a))

                state = state.result(max(actions)[1])
        else:
            print(actions_with_indices(translate_actions(state.actions())))
            i = int(input())
            state = state.result(state.actions()[i])

        state.board *= -1
        state.turn += 1
    print(state)
    print('Game over!')
    return θo, θm, θe
Пример #2
0
def H(features, θ):
    h = np.dot(features, θ)
    if h > 0.99*INF:
        return 0.99*INF
    if h < -0.99*INF:
        return -0.99*INF
    return h

α = 0.000001*3
λ = 0.5
MAX_CHANGE = 0.1
def tree_strap_train(θo, θm, θe, depth=TRAIN_DEPTH):
    OPN, MID, END = 0, 1, 2
    state = State()
    random_turns = np.random.choice([0] + [2]*2 + [4]*4 + [8]*8 + 16*[16] + 32*[32])
    while (not state.terminal_test()):
        print(f'Turn number {state.turn}')
        print(state)
        print()

        if state.board[state.board > 0].sum() == 12:
            θ = θo
        elif state.board[state.board > 0].sum() > 5: 
            θ = θm
        else:
            θ = θe

        state.history[state] += 1

        if state.turn < random_turns:
            num_actions = len(state.actions(False))