def evaluateLeaf(self, leaf, value, done, breadcrumbs): lg.logger_mcts.debug('------EVALUATING LEAF------') if not done: state = GameState.from_id(leaf.state_id, config.GRID_SHAPE) value, probs, allowedActions = self.get_preds(state) lg.logger_mcts.debug('PREDICTED VALUE FOR %d: %f', state.currentPlayer, value) for idx, allowedAction in enumerate(allowedActions): if allowedAction: newState, _, _ = state.takeAction(idx) if newState.id not in self.mcts.tree: node = mc.Node(newState) self.mcts.addNode(node) lg.logger_mcts.debug('added node...%s...p = %f', node.state_id, probs[idx]) else: node = self.mcts.tree[newState.id] lg.logger_mcts.debug('existing node...%s...', node.state_id) newEdge = mc.Edge(leaf, node, probs[idx], idx) leaf.edges.append((idx, newEdge)) else: lg.logger_mcts.debug( 'GAME VALUE FOR %d: %f', GameState.current_player_from_id(leaf.state_id), value) return ((value, breadcrumbs))
def backFill(self, leaf, value, breadcrumbs): lg.logger_mcts.debug('------DOING BACKFILL------') currentPlayer = GameState.current_player_from_id(leaf.state_id) for edge in breadcrumbs: playerTurn = GameState.current_player_from_id(edge.inNode.state_id) if playerTurn == currentPlayer: direction = 1 else: direction = -1 edge.stats['N'] = edge.stats['N'] + 1 edge.stats['W'] = edge.stats['W'] + value * direction edge.stats['Q'] = edge.stats['W'] / edge.stats['N'] lg.logger_mcts.debug( 'updating edge with value %f for player %d... N = %d, W = %f, Q = %f', value * direction, playerTurn, edge.stats['N'], edge.stats['W'], edge.stats['Q']) if lg.logger_mcts.isEnabledFor(logging.DEBUG): lg.logger_mcts.debug( GameState.from_id(edge.outNode.state_id, config.GRID_SHAPE).render())
def test_id(self): """Je transforme les victoires de test_victory_true_4 en id""" """Puis je crée un GameState à partir de cet id""" """Et je teste si le nouveau GameState est égal à l'ancien""" for ligne in range(GRID_SHAPE[0] + 1 - NB_TOKENS_VICTORY, GRID_SHAPE[0]): for column in range(0, GRID_SHAPE[1] + 1 - NB_TOKENS_VICTORY): #On aligne 4 PLAYER_1 dans la diagonale dont le point de départ (bas,gauche) est (ligne,column) board = np.full(GRID_SHAPE, NONE, dtype=np.int8) for i, j in zip(range(ligne, ligne - NB_TOKENS_VICTORY, -1), range(column, column + NB_TOKENS_VICTORY)): board[i, j] = PLAYER_1 board[0:i, j] = PLAYER_2 board[i, 0:j] = PLAYER_2 #On remplit les lignes sous ligne avec PLAYER_2 board[0:ligne - 3, :] = PLAYER_2 game_state = GameState(currentPlayer=PLAYER_1, board=board) id = game_state.id new_game_state = GameState.from_id(id, board.shape) # See https://stackoverflow.com/questions/3302949/best-way-to-assert-for-numpy-array-equality self.assertIsNone( np.testing.assert_array_equal(new_game_state.board, game_state.board)) self.assertEqual(new_game_state.currentPlayer, game_state.currentPlayer)
def simulate(self): if lg.logger_mcts.isEnabledFor(logging.DEBUG): state = GameState.from_id(self.mcts.root.state_id, config.GRID_SHAPE) lg.logger_mcts.debug('ROOT NODE...%s', self.mcts.root.state_id) lg.logger_mcts.debug(state.render()) lg.logger_mcts.debug('CURRENT PLAYER...%d', state.currentPlayer) ##### MOVE THE LEAF NODE leaf, value, done, breadcrumbs = self.mcts.moveToLeaf() if lg.logger_mcts.isEnabledFor(logging.DEBUG): state = GameState.from_id(leaf.state_id, config.GRID_SHAPE) lg.logger_mcts.debug(state.render()) ##### EVALUATE THE LEAF NODE value, breadcrumbs = self.evaluateLeaf(leaf, value, done, breadcrumbs) ##### BACKFILL THE VALUE THROUGH THE TREE self.mcts.backFill(leaf, value, breadcrumbs)
def moveToLeaf(self): lg.logger_mcts.debug('------MOVING TO LEAF------') breadcrumbs = [] currentNode = self.root done = False value = 0 while not currentNode.isLeaf(): state = GameState.from_id(currentNode.state_id, config.GRID_SHAPE) lg.logger_mcts.debug('PLAYER TURN...%d', state.currentPlayer) maxQU = -99999 if currentNode == self.root: epsilon = config.EPSILON nu = np.random.dirichlet([config.ALPHA] * len(currentNode.edges)) else: epsilon = 0 nu = [0] * len(currentNode.edges) Nb = 0 for action, edge in currentNode.edges: Nb = Nb + edge.stats['N'] for idx, (action, edge) in enumerate(currentNode.edges): U = self.cpuct * \ ((1-epsilon) * edge.stats['P'] + epsilon * nu[idx] ) * \ np.sqrt(Nb) / (1 + edge.stats['N']) Q = edge.stats['Q'] lg.logger_mcts.debug( 'action: %d (%d)... N = %d, P = %f, nu = %f, adjP = %f, W = %f, Q = %f, U = %f, Q+U = %f', action, action % 7, edge.stats['N'], np.round(edge.stats['P'], 6), np.round(nu[idx], 6), ((1 - epsilon) * edge.stats['P'] + epsilon * nu[idx]), np.round(edge.stats['W'], 6), np.round(Q, 6), np.round(U, 6), np.round(Q + U, 6)) if Q + U > maxQU: maxQU = Q + U simulationAction = action simulationEdge = edge lg.logger_mcts.debug('action with highest Q + U...%d', simulationAction) newState, value, done = state.takeAction( simulationAction ) #the value of the newState from the POV of the new playerTurn currentNode = simulationEdge.outNode breadcrumbs.append(simulationEdge) lg.logger_mcts.debug('DONE...%d', done) return currentNode, value, done, breadcrumbs