def play(θo, θm, θe, depth=TRAIN_DEPTH): OPN, MID, END = 0, 1, 2 state = State() first = np.random.choice([0, 1]) random_turns = 0 #np.random.choice([0] + [2]*2 + [4]*4 + [8]*8 + 16*[16] + 32*[32]) while (not state.terminal_test()): print(f'Turn number {state.turn}') print_board(state.board) print() if (state.turn + first) % 2: if state.board[state.board > 0].sum() == 12: θ = θo elif state.board[state.board > 0].sum() > 5: θ = θm else: θ = θe state.history[state] += 1 if state.turn < random_turns: num_actions = len(state.actions(False)) state = state.result( state.actions(False)[np.random.choice( [i for i in range(num_actions)])]) else: searched_states = [] V = minimax(State(state.board), depth, θ, searched_states) Δθ = np.zeros(num_features) for s, vs, hs, features, d in searched_states: #𝛿 = V(s) - H(features, θ) 𝛿 = vs - hs Δθ += α * 𝛿 * features * λ**(depth - d) for i in range(num_features): if Δθ[i] > MAX_CHANGE: Δθ[i] = MAX_CHANGE elif Δθ[i] < -MAX_CHANGE: Δθ[i] = -MAX_CHANGE θ += Δθ actions = [] actions2 = [] for a in state.actions(): child = state.result(a) actions.append((-negamax(State(-1 * child.board), -INF, INF, depth - 1, θ), a)) state = state.result(max(actions)[1]) else: print(actions_with_indices(translate_actions(state.actions()))) i = int(input()) state = state.result(state.actions()[i]) state.board *= -1 state.turn += 1 print(state) print('Game over!') return θo, θm, θe
def H(features, θ): h = np.dot(features, θ) if h > 0.99*INF: return 0.99*INF if h < -0.99*INF: return -0.99*INF return h α = 0.000001*3 λ = 0.5 MAX_CHANGE = 0.1 def tree_strap_train(θo, θm, θe, depth=TRAIN_DEPTH): OPN, MID, END = 0, 1, 2 state = State() random_turns = np.random.choice([0] + [2]*2 + [4]*4 + [8]*8 + 16*[16] + 32*[32]) while (not state.terminal_test()): print(f'Turn number {state.turn}') print(state) print() if state.board[state.board > 0].sum() == 12: θ = θo elif state.board[state.board > 0].sum() > 5: θ = θm else: θ = θe state.history[state] += 1 if state.turn < random_turns: num_actions = len(state.actions(False))