def __init__(self, s_model = None, f_model = None): self._board = Board() self._mycolor = None if (s_model == None): self._strong_model = s_base_model else: self._strong_model = s_model if (f_model == None): self._fast_model = f_base_model else: self._fast_model = f_model
def rollout(self): """Play according to model_stone until the end returns result""" # make prediction for every slot prediction = node.get_play_stone(self.board) # check legal moves child_gnugo_board = deepcopy(self.gnugo_board) legal_moves = self.gnugo_board.legal_moves() # convert playable coords to board board_legal_moves = np.zeros((9, 9), dtype=bool) for move in legal_moves: x, y = name_to_coord(Board.flat_to_name(move)) board_legal_moves[x, y] = 1 # combine predictions with legal moves legal_predict = np.reshape(prediction[0], (9, 9)) * board_legal_moves # take best and push index_best = np.argmax(legal_predict) x = index_best % 9 # 9 should be replaced by board_size y = index_best // 9 # 9 should be replaced by board_size child_board = deepcopy(self.board) child_board[x, y][0] = 1 tmp_move = Board.name_to_flat(Board.coord_to_name((x, y))) if tmp_move in legal_moves: child_gnugo_board.push(tmp_move) child = node(self, child_board, child_gnugo_board, tmp_move) self.children.append(child) else: tmp_move = np.random.choice(legal_moves) child_gnugo_board.push(tmp_move) child = node(self, child_board, child_gnugo_board, tmp_move) self.children.append(child) # continue rollouts while not game_over game_over = child_gnugo_board.is_game_over() if not game_over: return not child.rollout() else: result = child_gnugo_board.result() if result == "1/2-1/2": return 0 return 1 if result == "0-1" else -1 # black wins if True
def explore_node(self): """Explores all children of node""" player_turn = self.get_player_turn() tmp_board = deepcopy(self.board) # check legal moves child_gnugo_board = deepcopy(self.gnugo_board) legal_moves = self.gnugo_board.legal_moves() # convert playable coords to board board_legal_moves = np.zeros((9, 9), dtype=bool) # explore legal moves for move in legal_moves: [x, y] = name_to_coord(Board.flat_to_name(move)) board_legal_moves[x, y] = 1 # make move on local board child_board = deepcopy(tmp_board) child_board[x, y] = 1 # push on gnugo_board child_gnugo_board.push(move) child = node(self, child_board, child_gnugo_board, move) self.children.append(child) self.nb_child_visit += 1
def __init__(self): self._board = Board() self._mycolor = None
def __init__(self): self.board = Board() self.mycolor = None self.transposition_table = {} self.max_time = 7.4 self.start_time = 0
class myPlayer(PlayerInterface): def __init__(self): self.board = Board() self.mycolor = None self.transposition_table = {} self.max_time = 7.4 self.start_time = 0 def getPlayerName(self): return "Team 38" def getPlayerMove(self): if self.board.is_game_over(): return "PASS" move = self.choose_action() self.board.push(move) return Board.flat_to_name(move) def playOpponentMove(self, move): self.board.push(Board.name_to_flat(move)) def newGame(self, color): self.mycolor = color self.opponent = Board.flip(color) def endGame(self, winner): if self.mycolor == winner: print("I won :D") else: print("I lost :(") def choose_action(self): depth = 1 self.start_time = time() (eval_score, selected_action) = (-1, -1) while (True): tmp_time = time() self.transposition_table = {} new_score, new_action = self.minimax(depth, True, float('-inf'), float('+inf')) if (time() - self.start_time < self.max_time): (eval_score, selected_action) = (new_score, new_action) print("MINIMAX AB ID(%d) : eval=%f, action=%d, time=%s" % (depth, eval_score, selected_action, time() - tmp_time)) if (time() - self.start_time >= self.max_time): break depth += 1 return selected_action def minimax(self, depth, is_max_turn, alpha, beta): transposition = self.transposition_table.get( str(self.board._currentHash)) if transposition != None: return transposition if depth == 0 or (time() - self.start_time >= self.max_time) or self.board.is_game_over(): result = (self.evaluate(), None) # self.player_color self.transposition_table.update( {str(self.board._currentHash): result}) return result key_of_actions = list(self.board.generate_legal_moves()) shuffle(key_of_actions) #randomness best_value = float('-inf') if is_max_turn else float('+inf') best_action = -1 action_targets = [] for action_key in key_of_actions: self.board.push(action_key) eval_child, action_child = self.minimax(depth - 1, not is_max_turn, alpha, beta) self.board.pop() if is_max_turn and best_value < eval_child: best_value = eval_child action_targets.clear() action_targets.append(action_key) alpha = max(alpha, best_value) if beta <= alpha: break elif (not is_max_turn) and best_value > eval_child: best_value = eval_child action_targets.clear() action_targets.append(action_key) beta = min(beta, best_value) if beta <= alpha: break elif best_value == eval_child: action_targets.append(action_key) if not not action_targets: best_action = choice(action_targets) #randomness self.transposition_table.update( {str(self.board._currentHash): (best_value, best_action)}) return (best_value, best_action) def evaluate(self): position_score = [ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 2, 2, 1, 2, 2, 2, 0, 0, 2, 2, 2, 1, 2, 2, 2, 0, 0, 2, 2, 1, 1, 1, 2, 2, 0, 0, 1, 1, 1, 1, 1, 1, 1, 0, 0, 2, 2, 1, 1, 1, 2, 2, 0, 0, 2, 2, 2, 1, 2, 2, 2, 0, 0, 2, 2, 2, 1, 2, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 ] score_pieces = 0 if self.board.next_player() == Board._BLACK: score_pieces += (self.board._nbWHITE - self.board._nbBLACK) * 3 # score for white else: score_pieces += (self.board._nbBLACK - self.board._nbWHITE) * 3 # score for black score_liberties = 0 score_positions = 0 for fcoord in range(len(self.board)): if self.board[fcoord] == Board._EMPTY: pass elif self.board[fcoord] == self.board.next_player(): # Liberties string = self.board._getStringOfStone(fcoord) score_liberties -= self.board._stringLiberties[string] * 1 # Corner + position score_positions -= position_score[fcoord] * 10 else: # Liberties string = self.board._getStringOfStone(fcoord) score_liberties += self.board._stringLiberties[string] * 1 # Corner + position score_positions += position_score[fcoord] * 10 if self.board.next_player() == self.mycolor: score_pieces *= -1 score_liberties *= -1 score_positions *= -1 return score_pieces * normal(1, 0.1) + score_positions * normal( 1, 0.1) + score_liberties * normal(1, 0.1)
def playOpponentMove(self, move): self.board.push(Board.name_to_flat(move))
def getPlayerMove(self): if self.board.is_game_over(): return "PASS" move = self.choose_action() self.board.push(move) return Board.flat_to_name(move)
class myPlayer(PlayerInterface): def __init__(self, s_model = None, f_model = None): self._board = Board() self._mycolor = None if (s_model == None): self._strong_model = s_base_model else: self._strong_model = s_model if (f_model == None): self._fast_model = f_base_model else: self._fast_model = f_model def getPlayerName(self): return "RL player" def encode(self, board): toPush = board._historyMoveNames lenToPush = len(toPush) black_stones = np.zeros((9,9), dtype=np.float32) white_stones = np.zeros((9,9), dtype=np.float32) memo = [np.zeros((9,9), dtype = int) for z in range(8)] if (lenToPush%2 == 0): # I respect the model convention / if (len(data[i]["list_of_moves"])%2 == 0): player_turn = np.ones((9,9), dtype=np.float32) else: player_turn = np.zeros((9,9), dtype=np.float32) toPlay = 0 for move in toPush: move = name_to_coord(move) if (toPlay): # 1 is white white_stones[move[0],move[1]] = 1 else: black_stones[move[0],move[1]] = 1 toPlay = (toPlay + 1) % 2 for i in range(8): if lenToPush >= i: move_history = name_to_coord(toPush[-i]) memo[8-i,move[0],move[1]] = 1 return(np.dstack((black_stones,white_stones,player_turn, memo[0], memo[1], memo[2], memo[3], memo[4], memo[5], memo[6], memo[7]))) def getPlayerMove(self): if self._board.is_game_over(): return "PASS" move = self.select_move(self._board) self._board.play_move(move) return Board.flat_to_name(move) def playOpponentMove(self, move): self._board.play_move(Board.name_to_flat(move)) def newGame(self, color): self._mycolor = color self._opponent = Board.flip(color) def endGame(self, winner): if self._mycolor == winner: print("I won :D") else: print("I lost :(") def select_move(self, board_org, max_time=7.4, temperature=1.2): start_time = time.time() root = MCTSNode(board_org.weak_legal_moves()) # add nodes (at least 10,000 rollouts per turn) i=0 while(True): board = copy.deepcopy(board_org) node = root while (not node.can_add_child()) and (not board.is_game_over()): node = self.select_child(node, board, temperature) #board.push(node.move) if node.can_add_child() and not board.is_game_over(): node = node.add_random_child(board) #board.push(node.move) winner = self.simulate_random_game(board) while node is not None: node.record_win(winner) node = node.parent if (time.time() - start_time >= max_time): print() break i+=1 print("Rounds %d (%f)" % (i,time.time()-start_time), end='\r') # debug scored_moves = [(child.winning_frac(board_org.next_player()), child.move, child.num_rollouts) for child in root.children] scored_moves.sort(key=lambda x: x[0], reverse=True) for s, m, n in scored_moves[:5]: print('%s - %.3f (%d)' % (m, s, n)) # pick best node best_move = -1 best_pct = -1.0 for child in root.children: child_pct = child.winning_frac(board_org.next_player()) if child_pct > best_pct: best_pct = child_pct best_move = child.move print('Select move %s with win pct %.3f' % (best_move, best_pct)) # TODO: Here, get the best root children return best_move def select_child(self, node, board, temperature): # upper confidence bound for trees (UCT) metric total_rollouts = sum(child.num_rollouts for child in node.children) log_rollouts = math.log(total_rollouts) best_score = -1 best_child = None # loop over each child. data_prepared = np.array([prepare_datas(board, all_rotations = False)[0][0]], dtype = int) policy_prediction = self._strong_model.predict( data_prepared ) # TODO: Verify if everything's okay with this line for child in node.children: # calculate the UCT score. win_percentage = (policy_prediction[0][child.move] + child.winning_frac(board.next_player())) / 2.0 # TODO: Verifier que name_to_flat donne bien la bonne case exploration_factor = math.sqrt(log_rollouts / child.num_rollouts) uct_score = win_percentage + temperature * exploration_factor # Check if this is the largest we've seen so far. if uct_score > best_score: best_score = uct_score best_child = child board.play_move(best_child.move) return best_child def simulate_random_game(self, board): nb_iter = 0 while not board.is_game_over() and nb_iter < 1500 : #To avoid going infinite, which appends for ... some reasons ? :/ moves = board.weak_legal_moves() move_probabilities = self._fast_model.predict(np.array([prepare_datas(board, all_rotations = False)[0][0]], dtype = int)) max_move = np.argmax(move_probabilities) if max_move in moves: board.play_move(max_move) else: move = random.choice(moves) board.play_move(move) nb_iter += 1 #if nb_iter % 50 == 0: #print("debug:", nb_iter, end = "/ ", flush = True) if (board._nbWHITE > board._nbBLACK): return "1-0" elif (board._nbWHITE < board._nbBLACK): return "0-1" else: return "1/2-1/2"
def newGame(self, color): self._mycolor = color self._opponent = Board.flip(color)
def __init__(self): self._board = Board() self._mycolor = None self._model_priors = tensorflow.keras.models.load_model('model_priors')
class myPlayer(PlayerInterface): def __init__(self): self._board = Board() self._mycolor = None self._model_priors = tensorflow.keras.models.load_model('model_priors') def getPlayerName(self): return "Paul & Hugo" def getPlayerMove(self): if self._board.is_game_over(): return "PASS" move = self.select_move(self._board) self._board.play_move(move) return Board.flat_to_name(move) def playOpponentMove(self, move): self._board.play_move(Board.name_to_flat(move)) def newGame(self, color): self._mycolor = color self._opponent = Board.flip(color) def endGame(self, winner): if self._mycolor == winner: print("I won :D") else: print("I lost :(") def select_move(self, board_org, max_time=7.4, temperature=1.2): start_time = time.time() # Create the root node with legal_moves of the board root = MCTSNode(board_org.weak_legal_moves(), board_org._nextPlayer) # add nodes while (True): board = copy.deepcopy(board_org) node = root # Select a node and play its move: EXPLORATION while (not node.can_add_child()) and (not board.is_game_over()): node = self.select_child(node, board, temperature) # Add a random child to the node selected if possible if node.can_add_child() and not board.is_game_over(): node = node.add_random_child(board) # Construct a sample to be predicted by CNN_priors to_predict = np.empty((0, 15, board._BOARDSIZE, board._BOARDSIZE), dtype='int8') valid, sample_features_maps = db.build_history_from_moves( node.list_of_moves, board._BOARDSIZE) # If the board is not valid, we consider the node as a loss if not valid: # Backpropagation : update the win ratio of all the previous nodes while node is not None: node.update_winrate(self._mycolor, 0) node = node.parent else: # Predict the win_rate from the board to_predict = np.append(to_predict, sample_features_maps, axis=0) # ERROR when loading the model: the predict method does not work # tensorflow.python.framework.errors_impl.UnimplementedError: The Conv2D op currently only supports the NHWC tensor format on the CPU. The op was given the format: NCHW # It demands a NHWC (batch n, height, width, channels) format instead of a NCHW # BUT the model does not accept the NHWC format either # to_predict = np.transpose(to_predict, (0, 2, 3, 1)) # ValueError: Input 0 of layer sequential is incompatible with the layer: expected axis -3 of input shape to have value 15 but received input with shape (None, 9, 9, 15) # => problem during save/load of the model because it works in CNN_priors.ipynb after training the model prediction = self._model_priors.predict(to_predict) # Backpropagation : update the win ratio of all the previous nodes while node is not None: node.update_winrate(self._mycolor, prediction) node = node.parent # time over if (time.time() - start_time >= max_time): break # pick best node : EXPLOITATION best_move = -1 best_ratio = -1.0 for child in root.children: child_ratio = child.winrate(board_org.next_player()) if child_ratio > best_ratio: best_ratio = child_ratio best_move = child.move print('Select move %s with win pct %.3f' % (best_move, best_ratio)) return best_move def select_child(self, node, board, temperature): # upper confidence bound for trees (UCT) metric # total_rollouts = node.num_rollouts #??? total_rollouts = sum(child.num_rollouts for child in node.children) log_rollouts = math.log(total_rollouts) best_score = -1 best_child = None # loop over each child. for child in node.children: # calculate the UCT score. win_percentage = child.winrate(board.next_player()) exploration_factor = math.sqrt(log_rollouts / child.num_rollouts) uct_score = win_percentage + temperature * exploration_factor # Check if this is the best score we've seen so far. if uct_score > best_score: best_score = uct_score best_child = child board.play_move(best_child.move) return best_child
def prepare_datas( board, care_about_win=False, all_rotations=True, all_moves=False ): # all_rotations also say that you only want the board, not the goal datas = [] nb_uses = random.randint(MIN_INFO_FROM_ONE_GAME, MAX_INFO_FROM_ONE_GAME) length = len(board._historyMoveNames) if (board.result() == "1-0"): winner = 1 elif (board.result() == "0-1"): winner = 0 else: winner = 2 #print(winner) r = range(length - 1) moves = board._historyMoveNames if all_rotations: if not (all_moves): espe = length / 2. #On ne peut pas prendre le dernier ecart = length / 4. #Distribution gaussienne pour avoir plus de chances de prendre des données du milieu de partie proba_not_normalized = [ (1. / (ecart * np.sqrt(2 * np.pi)) * np.exp( (i + 1 - espe)**2 / (2. * ecart)**2)) for i in range(length - 1) ] norm = np.linalg.norm(proba_not_normalized) proba = [x / norm for x in proba_not_normalized] chosen_moves = np.random.choice(r, nb_uses, proba) else: chosen_moves = list( range(length - 1) ) #Take every move (the learning part, where we retrace the entire game) else: chosen_moves = [ length - 1 ] # Only the last one, because we want a prediction on it for i in chosen_moves: #On va s'arrêter au move i, et prédire le suivant black = np.zeros((9, 9), dtype=int) white = np.zeros((9, 9), dtype=int) memo = [np.zeros((9, 9), dtype=int) for z in range(8)] if (i + 1) % 2 == 0: #Le coup actuel (après avoir joué i) current = np.ones( (9, 9), dtype=int) #The whites, because the blacks have just played else: current = np.zeros( (9, 9), dtype=int) #The blacks, the first move, (0+1)%2 == 1 if all_rotations: goal_move = board.name_to_coord(moves[i + 1]) goal = np.zeros((9, 9), dtype=int) if care_about_win and ((winner == 1 and current[0][0] != 1) or (winner == 0 and current[0][0] != 0)): goal[goal_move[0]][goal_move[1]] = -1 else: goal[goal_move[0]][goal_move[ 1]] = 1 #Une égalité est traitée comme une victoire (on n'a pas perdu après tout) b = Board() for j in range(i + 1): b.push(Board.name_to_flat(moves[j])) #move = board.name_to_coord(moves[j]) #if j % 2 == 0: #black plays first # black[move[0]][move[1]] = 1 #else: # white[move[0]][move[1]] = 1 #if (i - j) < 8: # memo[i - j][move[0]][move[1]] = 1 for x in range(9): for y in range(9): p = b._board[Board.flatten((x, Board._BOARDSIZE - y - 1))] if p == Board._WHITE: white[x][y] = 1 elif p == Board._BLACK: black[x][y] = 1 curr_data = np.dstack( (black, white, current, memo[0], memo[1], memo[2], memo[3], memo[4], memo[5], memo[6], memo[7])) if all_rotations: datas.append([curr_data, np.reshape(goal, 81)]) datas.append([ np.rot90(curr_data, k=1, axes=(0, 1)), np.reshape(np.rot90(goal, k=1, axes=(0, 1)), 81) ]) datas.append([ np.rot90(curr_data, k=2, axes=(0, 1)), np.reshape(np.rot90(goal, k=2, axes=(0, 1)), 81) ]) datas.append([ np.rot90(curr_data, k=3, axes=(0, 1)), np.reshape(np.rot90(goal, k=3, axes=(0, 1)), 81) ]) curr_data = np.flipud(curr_data) goal = np.flipud(goal) datas.append([curr_data, np.reshape(goal, 81)]) datas.append([ np.rot90(curr_data, k=1, axes=(0, 1)), np.reshape(np.rot90(goal, k=1, axes=(0, 1)), 81) ]) datas.append([ np.rot90(curr_data, k=2, axes=(0, 1)), np.reshape(np.rot90(goal, k=2, axes=(0, 1)), 81) ]) datas.append([ np.rot90(curr_data, k=3, axes=(0, 1)), np.reshape(np.rot90(goal, k=3, axes=(0, 1)), 81) ]) else: datas.append([curr_data]) return datas
def getPlayerMove(self): if self._board.is_game_over(): return "PASS" move = self.select_move(self._board) self._board.play_move(move) return Board.flat_to_name(move)
def update_winrate(self, player, score): self.total_scores[player] += score self.total_scores[Board.flip(player)] += (1 - score) self.num_rollouts += 1
def playOpponentMove(self, move): self._board.play_move(Board.name_to_flat(move))
def backpropagate(self, node, value): node.visits += 1 node.total_value += value node.value = node.total_value/node.visits print(f"After backprop, node value {node.value}") if node.parent: self.backpropagate(node.parent,value) def play(self): return self.select(self.node.best_child()) print(f"\nCréation de la board :\n") from rollout import Rollout, copy_board from Goban import Board board = Board() print(np.array(board.generate_legal_moves()).shape) #81 move possible + pass print(f"Création du noeud :\n") base_node = MCTSNode(board, 0) # prior to CNN.predict(board)) instead of 0 print(f"Création du MCTS Tree :\n") mcts_tree= MCTSTree(base_node) # First step : expand base node #mcts_tree.expand() mcts_tree.select() #expand done in select print("\n\n!!!!!!!!!!!!!!!!!!!\n\n") for node in mcts_tree.nodeList:
class myPlayer(PlayerInterface): def __init__(self): self._board = Board() self._mycolor = None def getPlayerName(self): return "Team 38" def getPlayerMove(self): if self._board.is_game_over(): return "PASS" move = self.select_move(self._board) self._board.play_move(move) return Board.flat_to_name(move) def playOpponentMove(self, move): self._board.play_move(Board.name_to_flat(move)) def newGame(self, color): self._mycolor = color self._opponent = Board.flip(color) def endGame(self, winner): if self._mycolor == winner: print("I won :D") else: print("I lost :(") @staticmethod def select_move(board_org, max_time=7, temperature=1.33): start_time = time.time() root = MCTSNode(board_org.weak_legal_moves()) # exploit the chance when the other player passes and he's losing if (board_org._lastPlayerHasPassed) and ( ((board_org.next_player() == board_org._WHITE) and (board_org._nbWHITE > board_org._nbBLACK)) or ((board_org.next_player() == board_org._BLACK) and (board_org._nbWHITE < board_org._nbBLACK))): return -1 # add nodes (at least 1,000 rollouts per turn) i = 0 """pool = Pool()""" while (True): board = copy.deepcopy(board_org) node = root while (not node.can_add_child()) and (not board.is_game_over()): node = myPlayer.select_child(node, board, temperature) if node.can_add_child() and not board.is_game_over(): node = node.add_random_child(board) """ winners = [] results = [] # use all cores of the processor for proc in range(pool._processes): results.append(pool.apply_async(myPlayer.simulate_random_game, [board])) for res in results: winners.append(res.get()) while node is not None: for winner in winners: node.record_win(winner) node = node.parent if (time.time() - start_time >= max_time): print() break i+=pool._processes """ winner = myPlayer.simulate_random_game(board) while node is not None: node.record_win(winner) node = node.parent i += 1 print("Rounds %d (%f)" % (i, time.time() - start_time), end='\r') if (time.time() - start_time >= max_time): print() break # debug scored_moves = [(child.winning_frac(board_org), child.move, child.num_rollouts) for child in root.children] scored_moves.sort(key=lambda x: x[0], reverse=True) for s, m, n in scored_moves[:5]: print('%s - %.2f (%d)' % (m, s, n)) # pick best node best_move = -1 best_pct = -1.0 for child in root.children: child_pct = child.winning_frac(board_org) if child_pct > best_pct: best_pct = child_pct best_move = child.move print('Select move %s with win pct %.3f' % (best_move, best_pct)) return best_move @staticmethod def select_child(node, board, temperature): # upper confidence bound for trees (UCT) metric total_rollouts = sum(child.num_rollouts for child in node.children) + 0.001 log_rollouts = math.log(total_rollouts) best_score = -1 best_child = None best_move = -1 # loop over each child. for child in node.children: # calculate the UCT score. win_percentage = child.winning_frac(board) exploration_factor = math.sqrt(log_rollouts / child.num_rollouts) uct_score = win_percentage + temperature * exploration_factor # Check if this is the largest we've seen so far. if uct_score > best_score: best_score = uct_score best_child = child best_move = child.move if (best_child == None): best_child = node best_move = -1 board.play_move(best_move) return best_child @staticmethod def simulate_random_game(board): def is_point_an_eye(board, coord): # We must control 3 out of 4 corners if the point is in the middle # of the board; on the edge we must control all corners. friendly_corners = 0 off_board_corners = 0 i_org = i = board._neighborsEntries[coord] while board._neighbors[i] != -1: n = board._board[board._neighbors[i]] if n != board.next_player(): return False if n == board.next_player(): friendly_corners += 1 i += 1 if i >= i_org + 4: # Point is in the middle. return friendly_corners >= 3 # Point is on the edge or corner. return (4 - i_org - i) + friendly_corners == 4 def is_pass_valid(board, coord): # We can only pass if we are winning if coord != -1: return True if board.next_player() == board._BLACK: return board._nbWHITE < board._nbBLACK else: return board._nbWHITE > board._nbBLACK # ============================== turns = 0 while not board.is_game_over(): turns += 1 # exploit the chance when the other player passes and he's losing if (board._lastPlayerHasPassed) and (is_pass_valid(board, -1)): board.play_move(-1) continue moves = board.weak_legal_moves() random.shuffle(moves) valid_move = -1 # PASS for move in moves: """(move != -1) and""" if (is_pass_valid(board, move)) and (not is_point_an_eye( board, move)) and (board.play_move(move)): valid_move = move break if valid_move == -1: board.play_move(-1) if turns > 100: break if (board._nbWHITE > board._nbBLACK): return "1-0" elif (board._nbWHITE < board._nbBLACK): return "0-1" else: return "1/2-1/2"
class myPlayer(PlayerInterface): def __init__(self): self._board = Board() self._mycolor = None def getPlayerName(self): return "Team 38" def getPlayerMove(self): if self._board.is_game_over(): return "PASS" move = self.select_move(self._board) self._board.play_move(move) return Board.flat_to_name(move) def playOpponentMove(self, move): self._board.play_move(Board.name_to_flat(move)) def newGame(self, color): self._mycolor = color self._opponent = Board.flip(color) def endGame(self, winner): if self._mycolor == winner: print("I won :D") else: print("I lost :(") @staticmethod def select_move(board_org, max_time=7.4, temperature=1.2): start_time = time.time() root = MCTSNode(board_org.weak_legal_moves()) # add nodes (at least 10,000 rollouts per turn) i = 0 while (True): board = copy.deepcopy(board_org) node = root while (not node.can_add_child()) and (not board.is_game_over()): node = myPlayer.select_child(node, board, temperature) #board.push(node.move) if node.can_add_child() and not board.is_game_over(): node = node.add_random_child(board) #board.push(node.move) winner = myPlayer.simulate_random_game(board) while node is not None: node.record_win(winner) node = node.parent if (time.time() - start_time >= max_time): print() break i += 1 print("Rounds %d (%f)" % (i, time.time() - start_time), end='\r') # debug scored_moves = [(child.winning_frac(board_org.next_player()), child.move, child.num_rollouts) for child in root.children] scored_moves.sort(key=lambda x: x[0], reverse=True) for s, m, n in scored_moves[:5]: print('%s - %.3f (%d)' % (m, s, n)) # pick best node best_move = -1 best_pct = -1.0 for child in root.children: child_pct = child.winning_frac(board_org.next_player()) if child_pct > best_pct: best_pct = child_pct best_move = child.move print('Select move %s with win pct %.3f' % (best_move, best_pct)) return best_move @staticmethod def select_child(node, board, temperature): # upper confidence bound for trees (UCT) metric total_rollouts = sum(child.num_rollouts for child in node.children) log_rollouts = math.log(total_rollouts) best_score = -1 best_child = None # loop over each child. for child in node.children: # calculate the UCT score. win_percentage = child.winning_frac(board.next_player()) exploration_factor = math.sqrt(log_rollouts / child.num_rollouts) uct_score = win_percentage + temperature * exploration_factor # Check if this is the largest we've seen so far. if uct_score > best_score: best_score = uct_score best_child = child board.play_move(best_child.move) return best_child @staticmethod def simulate_random_game(board): def is_point_an_eye(board, coord): # We must control 3 out of 4 corners if the point is in the middle # of the board; on the edge we must control all corners. friendly_corners = 0 i_org = i = board._neighborsEntries[coord] while board._neighbors[i] != -1: n = board._board[board._neighbors[i]] if n == board.next_player(): return False if (n != Board._EMPTY) or (n != board.next_player()): friendly_corners += 1 i += 1 if i >= i_org + 4: # Point is in the middle. return friendly_corners >= 3 # Point is on the edge or corner. return (4 - i_org - i) + friendly_corners == 4 # ============================== while not board.is_game_over(): moves = board.weak_legal_moves() random.shuffle(moves) valid_move = -1 # PASS for move in moves: if not (is_point_an_eye(board, move)) and (board.play_move(move)): valid_move = move break if valid_move == -1: board.play_move(-1) if (board._nbWHITE > board._nbBLACK): return "1-0" elif (board._nbWHITE < board._nbBLACK): return "0-1" else: return "1/2-1/2"