def main(): #print(s) #print(s.actions()) s1 = State(s.board * -1) #print() #print(negamax2(s, -INF*10, INF*10, 2, θ)) print() print(s1) print() #print(negamax2(s1, -INF*4, INF*4, 2, θ)) depth = 2 #v = negamax2(s1, -INF*4, INF*4, 2, θ) best_action = None alpha, beta, v = -4 * INF, 4 * INF, -4 * INF for a in s1.actions(): child = s1.result(a) #child.board *= -1 nmax = -negamax2(child, -beta, -alpha, depth - 1, θ) if nmax > alpha: alpha = nmax best_action = a print(best_action)
class BasePlayer: def __init__(self, colour): self.time = timedelta(seconds=0.01) t = datetime.now() self.colour = colour self.state = State() self.n_v_two = None self.depth = DEPTH self.θo = np.load(path + 'o16.npy') self.θd = np.load(path + 'd16.npy') self.θm = np.load(path + 'm16.npy') self.θe = np.load(path + 'e16.npy') self.time += datetime.now() - t def action(self): t = datetime.now() if self.state.turn < 8: #and not self.state.turn % 2: try: if str(self.state.board) in opening_book: self.time += datetime.now() - t return tuple(opening_book[str(self.state.board)]) except: pass # n v two endgame if self.state.board[self.state.board < 0].sum( ) == -2 and self.state.board[self.state.board > 0].sum() > 2: if self.n_v_two is None: self.n_v_two = NvTwo() return self.format_action(self.n_v_two.move(self.state)) # n v one endgame, or opponent hanging out in one stack if len(self.state.board[self.state.board < 0] ) == 1 and self.state.board[self.state.board > 0].sum() > 1: return self.format_action(n_v_one(self.state)) if self.state.stage[0] == OPN: θ = self.θo elif self.state.stage[0] == DEV: θ = self.θd elif self.state.stage[0] == MID: θ = self.θm else: θ = self.θe if timedelta(seconds=59.6) < self.time: self.time += datetime.now() - t return self.format_action(self.state.actions()[0]) if timedelta(seconds=56) < self.time: self.depth = 1 depth = self.depth best_action = None alpha, beta = -4 * INF, 4 * INF for a in self.state.actions(): child = self.state.result(a) nmax = -self.negamax(child, -beta, -alpha, depth - 1, θ) if nmax > alpha: alpha = nmax best_action = a self.time += datetime.now() - t return self.format_action(best_action) def update(self, colour, action): t = datetime.now() self.state = self.state.result(action) self.time += datetime.now() - t def format_action(self, action): if action[0] == BOOM: move, orig = action statement = (BOOM, orig) else: move, n, orig, dest = action statement = (MOVE, n, orig, dest) return statement def negamax(self, state, alpha, beta, depth, θ): if state.terminal_test(): return state.utility() if depth == 0: return H(Φ(state), θ) v = -INF for a in state.actions(): child = state.result(a) v = max(v, -1 * (self.negamax(child, -beta, -alpha, depth - 1, θ))) if v >= beta: return v alpha = max(alpha, v) return v
def main2(): s1 = State(s.board * -1) print(s1.actions())
class BasePlayer: def __init__(self, colour): self.colour = colour self.state = State() self.n_v_two = None self.θo = np.load(path + 'w_opn-ab3.npy') self.θd = np.load(path + 'w_dev-ab3.npy') self.θm = np.load(path + 'w_mid-ab3.npy') self.θe = np.load(path + 'w_end-ab3.npy') def action(self): if self.state.turn < 8: # and not self.state.turn % 2: try: if str(self.state.board) in opening_book: return tuple(opening_book[str(self.state.board)]) except: print('Failed to get opening move') assert (False) # n v two endgame if self.state.board[self.state.board < 0].sum( ) == -2 and self.state.board[self.state.board > 0].sum() > 2: if self.n_v_two is None: self.n_v_two = NvTwo() return self.format_action(self.n_v_two.move(self.state)) # n v one endgame if len(self.state.board[self.state.board < 0] ) == 1 and self.state.board[self.state.board > 0].sum() > 1: return self.format_action(n_v_one(self.state)) depth = DEPTH if self.state.stage[0] == OPN: θ = self.θo elif self.state.stage[0] == DEV: θ = self.θd elif self.state.stage[0] == MID: θ = self.θm else: θ = self.θe depth = 2 * depth best_action = None alpha, beta = -4 * INF, 4 * INF for a in self.state.actions(): child = self.state.result(a) nmax = -self.negamax(child, -beta, -alpha, depth - 1, θ) if nmax > alpha: alpha = nmax best_action = a return self.format_action(best_action) def update(self, colour, action): self.state = self.state.result(action) # invert the sign of the pieces so that positive has the next move #self.state.board = -1*self.state.board def format_action(self, action): if action[0] == BOOM: move, orig = action statement = (BOOM, orig) else: move, n, orig, dest = action statement = (MOVE, n, orig, dest) return statement def negamax(self, state, alpha, beta, depth, θ): if state.terminal_test(): return state.utility() if depth == 0: return H(Φ(state), θ) v = -INF for a in state.actions(): child = state.result(a) v = max(v, -1 * (self.negamax(child, -beta, -alpha, depth - 1, θ))) if v >= beta: return v alpha = max(alpha, v) return v