def getNewPieces(pieces, playerMove): ## Get XY posistion of move. TicTacToeState uses (x,y) coordinates def getXY(index): if index == 0: return [0,0] if index == 1: return [0,1] if index == 2: return [0,2] if index == 3: return [1,0] if index == 4: return [1,1] if index == 5: return [1,2] if index == 6: return [2,0] if index == 7: return [2,1] if index == 8: return [2,2] ## Build Board board = [[None]*3 for j in range(3)] for i, piece in enumerate(pieces): row, col = getXY(i) board[row][col] = piece ## Player Move action = getXY(playerMove) gameState = TicTacToeState(board,'x') gameState = gameState.generateSuccessor(action) if None in pieces: ## Computer Move player = MinimaxAgent() action = player.getAction(gameState) if action != None: gameState = gameState.generateSuccessor(action) return gameState.board[0] + gameState.board[1] + gameState.board[2]
def evaluate(game): s = State() a = MinimaxAgent(max_depth=6, max_width=6) ss = [] pp = [] for x, y in game: d = a._get_dist(s) if len(d) != 1 or (d[0][0] >= 0 and d[0][1] >= 0): ss.append(s.featurize()) pp.append(util.dist_to_prob(d)) s.move(x, y) sys.stdout.write("=") sys.stdout.flush() return (np.array(ss), np.array(pp))
def __init__(self, master): tk.Frame.__init__(self, master) self.button = list() self.frames = list() self.state = State() root = path.join(path.dirname(__file__), "img") self.image = [ tk.PhotoImage(file=path.join(root, "empty.gif")), tk.PhotoImage(file=path.join(root, "naught.gif")), tk.PhotoImage(file=path.join(root, "cross.gif")), ] self.agent = MinimaxAgent() self.last = None self.pack() self.create_widgets() self.recommend()
class Application(tk.Frame): def __init__(self, master): tk.Frame.__init__(self, master) self.button = list() self.frames = list() self.state = State() root = path.join(path.dirname(__file__), "img") self.image = [ tk.PhotoImage(file=path.join(root, "empty.gif")), tk.PhotoImage(file=path.join(root, "naught.gif")), tk.PhotoImage(file=path.join(root, "cross.gif")), ] self.agent = MinimaxAgent() self.last = None self.pack() self.create_widgets() self.recommend() def recommend(self): t = time() actions = self.agent.get_score(self.state) print("time elapsed: %f seconds" % (time() - t)) if self.last is not None: for x, y, _ in self.last: button = self.button[np.ravel_multi_index((x, y), dims=(15, 15))] button.config(image=self.image[self.state.board[x, y]]) for x, y, v in actions: button = self.button[np.ravel_multi_index((x, y), dims=(15, 15))] button.config(image="", text="%.02f" % v) self.last = actions def highlight(self, x, y): for i, j in self.state.highlight(x, y): self.frames[np.ravel_multi_index((i, j), dims=(15, 15))].config(padx=1, pady=1, bg="blue") def click(self, i, j): def respond(e): if not self.state.end and self.state.board[i, j] == 0: self.button[np.ravel_multi_index((i, j), dims=(15, 15))].config(image=self.image[self.state.player]) self.state.move(i, j) if self.state.end: if self.state.features["win-o"] + self.state.features["win-x"] > 0: self.highlight(i, j) else: self.frames[np.ravel_multi_index((i, j), dims=(15, 15))].config(padx=1, pady=1, bg="red") else: self.recommend() return respond def create_widgets(self): for i in range(15): for j in range(15): f = tk.Frame(self, height=50, width=50) f.pack_propagate(0) f.grid(row=i, column=j, padx=0, pady=0) self.frames.append(f) b = tk.Label(f, image=self.image[0], bg="yellow") b.pack(fill=tk.BOTH, expand=1) b.bind("<Button-1>", self.click(i, j)) self.button.append(b)
else: print('hell no') #test getLegalActions print(state.getLegalActions(1)) #test generateSuccessor newState = state.generateSuccessor(1, 1, 'switch') print(newState.currAgent) newState = state.generateSuccessor(1, 'thunderbolt', 'moves') print(newState.opp) ''' #test minimax agent alg = MinimaxAgent(3) ''' action, movType = alg.getAction(state) print('my action: ', action, movType) state = state.generateSuccessor(1, action, movType) enemyaction, movType = alg.getEnemyAction(state) print('opp action: ', enemyaction, movType) state = state.generateSuccessor(-1, enemyaction, movType) ''' while not state.isEnd(): action, movType = alg.getAction(state) state = state.generateSuccessor(1, action, movType) print(action, movType) if state.isEnd(): break
if gui_active: win.updateSprites(state) win.refresh() if verbose > 0: state.printGrid(game.grid_size) return state if __name__ == "__main__": if len(sys.argv) > 1: max_iter = int(sys.argv[1]) else: max_iter = None minimax_agent = MinimaxAgent(depth=lambda s,a: 2) alphabeta_agent = AlphaBetaAgent(depth=lambda s,a: survivorDfunc(s, a, 4, 0.5), evalFn=greedyEvaluationFunction) expectimax_agent = ExpectimaxAgent(depth=lambda s,a: cowardCenterDepthFunction(s, a, 2), evalFn=greedyEvaluationFunction) strategies = [smartGreedyStrategy, opportunistStrategy, alphabeta_agent.getAction] # add a human player # strategies = [humanStrategy, smartGreedyStrategy, opportunistStrategy, alphabeta_agent.getAction] # add an RL agent featureExtractor = FeatureExtractor(len(strategies), grid_size = 20, radius_ = 10) rlStrategy = load_rl_strategy("nn-nn1-r10-1b.p", strategies, featureExtractor, discount = 0.9, q_type = "nn") strategies.append(rlStrategy) controller(strategies, 20, max_iter = max_iter, gui_active = True, verbose = 0, game_speed = 10)
""" battle arena between agents """ import argparse import numpy as np import tensorflow as tf from time import time from state import State from minimax import MinimaxAgent from mcts_agent import MCTSAgent NUM_GAMES = 2 with tf.Session() as sess: mcts = MCTSAgent(sess, "dualsup", chkpnt=3000) agent = MinimaxAgent() print("ARENA: %s-%d VERSES %s-%d" % (mcts.model_name, mcts.chkpnt, "minimax", 0)) stat = np.zeros(shape=(2, 2), dtype=np.int) for i in range(NUM_GAMES): t = time() s = State() a_is_black = (i % 2 == 0) while not s.end and len(s.history) < 225: if a_is_black == (s.player > 0): s.move(*mcts.get_action(s, deterministic=True)) mcts.update(s) else: s.move(*agent.get_action(s)) mcts.update(s) mcts.refresh()
from os import path, listdir from sys import stdout, argv from minimax import MinimaxAgent if __name__ == '__main__': if len(argv) != 3: print("Usage: python battle_minimax.py [total] [multiple]") else: total = int(argv[1]) rest = int(argv[2]) latest = -1 for f in listdir(path.join(path.dirname(__file__), "data", "minimax")): if f.endswith(".pkl"): latest = max(latest, int(f.split(".")[0])) agent = MinimaxAgent(max_depth=6, max_width=8) names = ["draw", "black", "white"] for i in count(latest + 1): if i % total == rest: print("[INFO] game %d begin" % i) begin = time() state = State() while len(state.history) != 225 and not state.end: x, y = agent.get_action(state) state.move(x, y) stdout.write(".") stdout.flush() winner = state.player if state.end else 0 with open( path.join(path.dirname(__file__), "data", "minimax", "%d.pkl" % i), "wb") as out:
"x:0": board }).reshape(225) y = np.exp(y) y = y / y.sum() self.dist_queue.put(y) def get_action(self, state): if len(state.history) == 0: return (7, 7) self.state_queue.put(state) prob = self.dist_queue.get() return np.unravel_index(np.random.choice(225, p=prob), dims=(15, 15)) agents = { "minimax": lambda which: MinimaxAgent(max_depth=6, max_width=8), "monet": lambda which: NetAgent(which), } players = [0, agents[argv[1]]("black"), agents[argv[2]]("white")] names = ["", "black", "white"] state = State() while len(state.history) != 225 and not state.end: t = time() x, y = players[state.player].get_action(state) print("%s [%g seconds]" % (names[state.player], time() - t)) state.move(x, y) winner = state.player if state.end else 0 with open( path.join(path.dirname(__file__), "data", "battle-%s-%s.pkl" % (argv[1], argv[2])), "wb") as out:
def create_widgets(self): for i in range(15): for j in range(15): f = tk.Frame(self, height=50, width=50) f.pack_propagate(0) f.grid(row=i, column=j, padx=0, pady=0) self.frames.append(f) b = tk.Label(f, image=self.image[0], bg="yellow") b.pack(fill=tk.BOTH, expand=1) b.bind("<Button-1>", self.click(i, j)) self.button.append(b) root = tk.Tk() root.wm_title("Alpha Gomoku") root.attributes("-topmost", True) with tf.Session() as sess: parser = argparse.ArgumentParser() parser.add_argument("model_name", type=str) parser.add_argument("--chkpnt", "-c", type=int) parser.add_argument("--ensemble", "-e", action="store_true") args = parser.parse_args() if args.model_name == "minimax": agent = MinimaxAgent(max_depth=6, max_width=6) elif args.model_name == "mininet": agent = MCTSMinimaxAgent(sess, "supervised", chkpnt=args.chkpnt) else: agent = Agent(sess, args.model_name, chkpnt=args.chkpnt) app = Application(agent, root, ensemble=args.ensemble) app.mainloop()