def __init__(self, player, nb_rows, nb_cols, timelimit): """Create Dots and Boxes agent. :param player: Player number, 1 or 2 :param nb_rows: Rows in grid :param nb_cols: Columns in grid :param timelimit: Maximum time allowed to send a next action. """ self.player = {player} self.timelimit = timelimit self.ended = False self.board = Strings_board(nb_rows, nb_cols) self.tree = MonteCarloSearchTree(nb_rows, nb_cols) self.nodes = self.tree.tree['nodes'] self.odds = [] i = 0 self.moves = [] self.mctsmoves = 0 self.heuristicmoves = 0 self.mcts = True self.times_for_move = [] while i < 120: if (i % 2 != 0): self.odds.append(i) i += 1
def __init__(self, player, nb_rows, nb_cols, timelimit): """Create Dots and Boxes agent. :param player: Player number, 1 or 2 :param nb_rows: Rows in grid :param nb_cols: Columns in grid :param timelimit: Maximum time allowed to send a next action. """ self.player = {player} self.timelimit = timelimit self.ended = False self.board = Strings_board(nb_rows, nb_cols) self.depth = 5 self.board2 = Coins_strings_board(nb_rows + 1, nb_cols + 1) self.odds = [] self.evens = [] self.maxnumberofmoves = len(self.board.get_potential_moves()) self.numberofmovesdone = 0 i = 0 while i < 120: if (i % 2 == 0): self.evens.append(i) else: self.odds.append(i) i += 1
def __init__(self, player, nb_rows, nb_cols, timelimit): """Create Dots and Boxes agent. :param player: Player number, 1 or 2 :param nb_rows: Rows in grid :param nb_cols: Columns in grid :param timelimit: Maximum time allowed to send a next action. """ self.player = {player} self.timelimit = timelimit self.ended = False self.board = Strings_board(nb_rows,nb_cols) self.odds = [] i = 0 while i<120: if(i%2!=0): self.odds.append(i) i += 1
class DotsAndBoxesAgent: """ A DotsAndBoxesAgent object should implement the following methods: - __init__ - add_player - register_action - next_action - end_game This class does not necessarily use the best data structures for the approach you want to use. """ def __init__(self, player, nb_rows, nb_cols, timelimit): """Create Dots and Boxes agent. :param player: Player number, 1 or 2 :param nb_rows: Rows in grid :param nb_cols: Columns in grid :param timelimit: Maximum time allowed to send a next action. """ self.player = {player} self.timelimit = timelimit self.ended = False self.board = Strings_board(nb_rows, nb_cols) self.tree = MonteCarloSearchTree(nb_rows, nb_cols) self.nodes = self.tree.tree['nodes'] self.odds = [] i = 0 self.moves = [] self.mctsmoves = 0 self.heuristicmoves = 0 self.mcts = True self.times_for_move = [] while i < 120: if (i % 2 != 0): self.odds.append(i) i += 1 def add_player(self, player): """Use the same agent for multiple players.""" self.player.add(player) def register_action(self, row, column, orientation, player): """ Register action played in game. :param row: :param columns: :param orientation: "v" or "h" :param player: 1 or 2 """ if (orientation == "h"): y = column x = row * 2 else: y = column x = self.odds[row] self.board.fill_line(x, y) self.moves.append( str(row) + "," + str(column) + "," + str(orientation)) node = self.tree.fill_line( self.nodes, str(row) + "," + str(column) + "," + str(orientation)) if node != False: self.nodes = node['children'] def next_action(self): """Return the next action this agent wants to perform. :return: (row, column, orientation) """ start_time = time.time() free_lines = self.board.get_potential_moves() if len(free_lines) == 0: # Board full return None signal.alarm(self.timelimit) try: (s, value) = self.tree.get_best_move_for_set(self.moves.copy()) if not isinstance(s, str) or self.mcts == False or s in self.moves: (a, b) = heuristics.find_good_move(self.board) signal.alarm(0) self.mcts = False if a % 2 == 0: o = "h" c = b r = int(a / 2) else: o = "v" c = b r = self.odds.index(a) self.heuristicmoves += 1 elapsed_time = time.time() - start_time self.times_for_move.append(elapsed_time) return r, c, o else: signal.alarm(0) self.mctsmoves += 1 r, c, o = s.split(",") elapsed_time = time.time() - start_time self.times_for_move.append(elapsed_time) return r, c, o except TimeoutException: (a, b) = heuristics.find_good_move(self.board) signal.alarm(0) self.mcts = False if a % 2 == 0: o = "h" c = b r = int(a / 2) else: o = "v" c = b r = self.odds.index(a) self.heuristicmoves += 1 elapsed_time = time.time() - start_time self.times_for_move.append(elapsed_time) return r, c, o def end_game(self): time = 0 for t in self.times_for_move: time += t print("avg time v3 =", int(time) / len(self.times_for_move)) self.ended = True
class DotsAndBoxesAgent: """ A DotsAndBoxesAgent object should implement the following methods: - __init__ - add_player - register_action - next_action - end_game This class does not necessarily use the best data structures for the approach you want to use. """ def __init__(self, player, nb_rows, nb_cols, timelimit): """Create Dots and Boxes agent. :param player: Player number, 1 or 2 :param nb_rows: Rows in grid :param nb_cols: Columns in grid :param timelimit: Maximum time allowed to send a next action. """ self.player = {player} self.timelimit = timelimit self.ended = False self.board = Strings_board(nb_rows, nb_cols) self.times_for_move = [] self.odds = [] i = 0 while i < 120: if (i % 2 != 0): self.odds.append(i) i += 1 def add_player(self, player): """Use the same agent for multiple players.""" self.player.add(player) def register_action(self, row, column, orientation, player): """ Register action played in game. :param row: :param columns: :param orientation: "v" or "h" :param player: 1 or 2 """ if (orientation == "h"): y = column x = row * 2 else: y = column x = self.odds[row] self.board.fill_line(x, y) def next_action(self): """Return the next action this agent wants to perform. :return: (row, column, orientation) """ start_time = time.time() free_lines = self.board.get_potential_moves() if len(free_lines) == 0: # Board full return None (a, b) = heuristics.find_good_move(self.board) if a % 2 == 0: o = "h" c = b r = int(a / 2) else: o = "v" c = b r = self.odds.index(a) elapsed_time = time.time() - start_time self.times_for_move.append(elapsed_time) return r, c, o def end_game(self): time = 0 for t in self.times_for_move: time += t print("avg time v3 =", int(time) / len(self.times_for_move)) self.ended = True
class DotsAndBoxesAgent: """ A DotsAndBoxesAgent object should implement the following methods: - __init__ - add_player - register_action - next_action - end_game This class does not necessarily use the best data structures for the approach you want to use. """ def __init__(self, player, nb_rows, nb_cols, timelimit): """Create Dots and Boxes agent. :param player: Player number, 1 or 2 :param nb_rows: Rows in grid :param nb_cols: Columns in grid :param timelimit: Maximum time allowed to send a next action. """ self.player = {player} self.timelimit = timelimit self.ended = False self.board = Strings_board(nb_rows,nb_cols) self.tree = MonteCarloSearchTree(nb_rows,nb_cols) self.nodes = self.tree.tree['nodes'] self.odds = [] i = 0 self.moves = [] self.mctsmoves = 0 self.heuristicmoves = 0 self.mcts = True self.shouldnotstartwith = [] while i<120: if(i%2!=0): self.odds.append(i) i += 1 def add_player(self, player): """Use the same agent for multiple players.""" self.player.add(player) def register_action(self, row, column, orientation, player): """ Register action played in game. :param row: :param columns: :param orientation: "v" or "h" :param player: 1 or 2 """ if(orientation == "h"): y = column x = row*2 else: y = column x = self.odds[row] self.board.fill_line(x,y) self.moves.append(str(row)+","+str(column)+","+str(orientation)) print(self.moves) # node = self.tree.fill_line(self.nodes,str(row)+","+str(column)+","+str(orientation)) # if node != False: # self.nodes = node['children'] def next_action(self): """Return the next action this agent wants to perform. :return: (row, column, orientation) """ free_lines = self.board.get_potential_moves() if len(free_lines) == 0: # Board full return None move = False if self.mcts: value = 0 for l in it.permutations(self.moves, len(self.moves)): go = True for seq in self.shouldnotstartwith: li = list(l) if li[:len(seq)] == seq: go = False break if go: (newmove,rate) = self.tree.get_best_move_for_set(list(l).copy()) if rate > value and newmove not in self.moves: value = rate move = newmove self.mctsmoves += 1 r,c,o = move.split(",") return r,c,o else: self.shouldnotstartwith.append(list(l)) print("SHOULDNTO",self.shouldnotstartwith) print("MCTS POWER") if not isinstance(move, str) or self.mcts == False: (a,b) = heuristics.find_good_move(self.board) self.mcts = False if a%2==0: o = "h" c = b r = int(a/2) else: o = "v" c = b r = self.odds.index(a) self.heuristicmoves += 1 return r, c, o def end_game(self): print("HEURISTIC MOVES:",self.heuristicmoves) print("MCTS MOVES:",self.mctsmoves) self.ended = True
class DotsAndBoxesAgent: """ A DotsAndBoxesAgent object should implement the following methods: - __init__ - add_player - register_action - next_action - end_game This class does not necessarily use the best data structures for the approach you want to use. """ def __init__(self, player, nb_rows, nb_cols, timelimit): """Create Dots and Boxes agent. :param player: Player number, 1 or 2 :param nb_rows: Rows in grid :param nb_cols: Columns in grid :param timelimit: Maximum time allowed to send a next action. """ self.player = {player} self.timelimit = timelimit self.ended = False self.board = Strings_board(nb_rows, nb_cols) self.depth = 5 self.board2 = Coins_strings_board(nb_rows + 1, nb_cols + 1) self.odds = [] self.evens = [] self.maxnumberofmoves = len(self.board.get_potential_moves()) self.numberofmovesdone = 0 i = 0 while i < 120: if (i % 2 == 0): self.evens.append(i) else: self.odds.append(i) i += 1 def add_player(self, player): """Use the same agent for multiple players.""" self.player.add(player) def register_action(self, row, column, orientation, player): """ Register action played in game. :param row: :param columns: :param orientation: "v" or "h" :param player: 1 or 2 """ if (orientation == "h"): y = column x = row * 2 else: y = column x = self.odds[row] self.board.fill_line(x, y) if (orientation == "h"): a = self.evens[row] b = self.odds[column] else: a = self.odds[row] b = self.evens[column] self.board2.fill_line(a, b, player) self.numberofmovesdone += 1 def next_action(self): """Return the next action this agent wants to perform. :return: (row, column, orientation) """ free_lines = self.board.get_potential_moves() if len(free_lines) == 0: # Board full return None # Start the timer. Once 5 seconds are over, a SIGALRM signal is sent. # This try/except loop ensures that # you'll catch TimeoutException when it's sent. if (self.numberofmovesdone / self.maxnumberofmoves * 100 < 60): (a, b) = heuristics.heuristic(self.board) signal.alarm(0) if a % 2 == 0: o = "h" c = b r = int(a / 2) else: o = "v" c = b r = self.odds.index(a) return r, c, o signal.alarm(self.timelimit) try: (a, b, score) = abv1.alphabeta(self.board2, 10, player=list(self.player)[0]) signal.alarm(0) self.depth += 1 if a % 2 == 0: x = self.odds.index(b) y = self.evens.index(a) return (y, x, "h") else: y = self.odds.index(a) x = self.evens.index(b) return (y, x, "v") except TimeoutException: self.depth -= 1 (a, b) = heuristics.heuristic(self.board) signal.alarm(0) if a % 2 == 0: o = "h" c = b r = int(a / 2) else: o = "v" c = b r = self.odds.index(a) return r, c, o def end_game(self): self.ended = True
def train(nb_rows, nb_cols): board = Strings_board(nb_rows, nb_cols) edges = [] for i, row in enumerate(board.board): for j, val in enumerate(row): board.board[i][j] = True edges.append((i, j)) for m in range(nb_rows * nb_cols): board_states = combinations(edges, m) if m == 0: #end states board_num = str(qlearning.board2num(board.board)) max_score = (nb_rows * nb_cols) Qedge = [dict()] Q = [[dict() for i in range(max_score + 1)]] for score in range(max_score + 1): Q[m][score][board_num] = RW * (score - (max_score // 2)) else: # initiate row in Q-value lookup table Q.append( [dict() for i in range(max_score - int(np.floor(m // 4)))]) Qedge.append(dict()) for board_state in board_states: # construct board from board state for edge in board_state: i, j = edge board.board[i][j] = False # find out how many squares are already filled total_score = max_score - len(board.get_potential_moves()) # find the best move - it must be the same, regardless of score # we will consider it to be total_score potential_moves = [] for i, row in enumerate(board.board): for j, val in enumerate(row): if val == False: gain = board.check_surrounding_squares((i, j), 3) # move remembers edge inserted and score gain, potential_moves.append(((i, j), gain)) # map potential_moves on Q-values qmax = -np.inf board_num = qlearning.board2num(board.board) for move in potential_moves: edge, gain = move edge_num = qlearning.edge2num(edge) if gain: qval = gamma * Q[m - 1][total_score + gain][str(board_num + edge_num)] else: qval = -gamma * Q[m - 1][0][str(board_num + edge_num)] if qmax < qval: qmax = qval best_edge = edge # insert Q-values for score_state in range(total_score + 1): Q[m][score_state][str( board_num)] = qmax - RW * (total_score - score_state) # insert best move Qedge[m][str(board_num)] = best_edge for edge in board_state: i, j = edge board.board[i][j] = True return (Q, Qedge)