Exemplo n.º 1
0
def depthFirstSearch(problem):
    """
    Search the deepest nodes in the search tree first.

    Your search algorithm needs to return a list of actions that reaches the
    goal. Make sure to implement a graph search algorithm.

    To get started, you might want to try some of these simple commands to
    understand the search problem that is being passed in:

    print "Start:", problem.getStartState()
    print "Is the start a goal?", problem.isGoalState(problem.getStartState())
    print "Start's successors:", problem.getSuccessors(problem.getStartState())
    """
    "*** YOUR CODE HERE ***"
    from util import Stack
    nodes = Stack()
    closed = dict()
    moves = list()
  
    for var in problem.getSuccessors(problem.getStartState()):
        state = FringeState(var[0])
        state.moves.append(Directions.convToDirection(var[1]))
        nodes.push(state)
    
    closed[problem.getStartState()] = True   

    while(False == nodes.isEmpty()):
        current_node = nodes.pop()
        #print "Current node:", current_node
        
        if(closed.has_key(current_node.pos)):
            continue

        
        if(problem.isGoalState(current_node.pos)):
            print "Goal reached!"
            moves = current_node.moves
            break
        
        for var in problem.getSuccessors(current_node.pos):
            state = FringeState(var[0])
            state.moves= copy.deepcopy(current_node.moves)
            state.moves.append(Directions.convToDirection(var[1]))
            nodes.push(state)
        
        closed[current_node.pos] = True
                
    return moves
    def update(self, transitionsBatch):
        """
        Update the Q-Values from the given batch of transitions
        :param transitionsBatch: List of tuples (qState, action, nextQState, reward, isStateFinal, list of legal actions)
        """

        trainingBatchQStates = []
        trainingBatchTargetQValues = []

        # Convert raw states to our q-states and calculate update policy for each transition in batch
        for aQState, anAction, aReward, aNextQState, isTerminal, nextStateLegalActions in transitionsBatch:

            # aReward = util.rescale(aReward, -510, 1000, -1, 1)

            actionsQValues = self.model.model.predict(np.array([aQState]))[0]
            targetQValues = actionsQValues.copy()

            # Update rule
            if isTerminal:
                updatedQValueForAction = aReward

            else:
                nextActionsQValues = self.model.model.predict(
                    np.array([aNextQState]))[0]
                nextStateLegalActionsIndices = [
                    Directions.getIndex(action)
                    for action in nextStateLegalActions
                ]

                try:
                    nextStateLegalActionsIndices.remove(4)
                except:
                    pass

                nextStateLegalActionsQValues = np.array(
                    nextActionsQValues)[nextStateLegalActionsIndices]
                maxNextActionQValue = max(nextStateLegalActionsQValues)
                updatedQValueForAction = (
                    aReward + self.trainingRoom.discount * maxNextActionQValue)

            targetQValues[Directions.getIndex(
                anAction)] = updatedQValueForAction

            trainingBatchQStates.append(aQState)
            trainingBatchTargetQValues.append(targetQValues)

        return self.model.model.train_on_batch(
            x=np.array(trainingBatchQStates),
            y=np.array(trainingBatchTargetQValues))
Exemplo n.º 3
0
    def getFeatures(self, state, action):
        from pacmanAgents import CarefulGreedyAgent

        dangerousActions = CarefulGreedyAgent()._getAction(state)[1]
        dangerousActionsBools = np.array([action in dangerousActions for action in Directions.asList() if action != Directions.STOP]).astype(float)
        legalActions = getLegalActions(state)

        return np.concatenate((dangerousActionsBools, legalActions)).astype(dtype=float)
    def getAction(self, rawState, epsilon):

        legalActions = rawState.getLegalActions()
        legalActions.remove(Directions.STOP)

        if util.flipCoin(epsilon):
            return random.choice(legalActions)

        else:
            qValues = [(Directions.getIndex(action),
                        self.getQValue(rawState, action))
                       for action in legalActions]
            qValues = sorted(qValues, key=lambda x: x[1], reverse=True)

            for index, qValue in qValues:
                action = Directions.fromIndex(index)
                if action in legalActions:
                    return action
Exemplo n.º 5
0
def aStarSearch(problem, heuristic=nullHeuristic):
    """Search the node that has the lowest combined cost and heuristic first."""
    from util import PriorityQueue
    nodes = PriorityQueue()
    closed = dict()
    moves = list()
  
    for var in problem.getSuccessors(problem.getStartState()):
        state = FringeState(var[0])
        state.moves.append(Directions.convToDirection(var[1]))
        state.cost = var[2]
        nodes.push(state, state.cost + heuristic(state.pos,problem))
        
    closed[problem.getStartState()] = True   
    
    while(False == nodes.isEmpty()):
        current_node = nodes.pop()
        #print "Current node:", current_node
        
        if(closed.has_key(current_node.pos)):
            continue
                    
        if(problem.isGoalState(current_node.pos)):
            print "Goal reached!"
            moves = current_node.moves
            break
        
        for var in problem.getSuccessors(current_node.pos):
            state = FringeState(var[0])
            state.moves= copy.deepcopy(current_node.moves)
            state.moves.append(Directions.convToDirection(var[1]))
            state.cost = var[2] + current_node.cost
            nodes.push(state, state.cost + heuristic(state.pos,problem))
        
        closed[current_node.pos] = True
                
    return moves
Exemplo n.º 6
0
def uniformCostSearch(problem):
    """Search the node of least total cost first."""
    from util import PriorityQueue
    nodes = PriorityQueue()
    closed = dict()
    moves = list()
  
    for var in problem.getSuccessors(problem.getStartState()):
        state = FringeState(var[0])
        state.moves.append(Directions.convToDirection(var[1]))
        state.cost = var[2]
        nodes.push(state, state.cost)
        
    closed[problem.getStartState()] = True   
    
    while(False == nodes.isEmpty()):
        current_node = nodes.pop()
        #print "Current node:", current_node
        
        if(closed.has_key(current_node.pos)):
            continue
                    
        if(problem.isGoalState(current_node.pos)):
            print "Goal reached!"
            moves = current_node.moves
            break
        
        for var in problem.getSuccessors(current_node.pos):
            state = FringeState(var[0])
            state.moves= copy.deepcopy(current_node.moves)
            state.moves.append(Directions.convToDirection(var[1]))
            state.cost = var[2] + current_node.cost
            nodes.push(state, state.cost)
        
        closed[current_node.pos] = True
                
    return moves
Exemplo n.º 7
0
def breadthFirstSearch(problem):
    """Search the shallowest nodes in the search tree first."""
    "*** YOUR CODE HERE ***"
    from util import Queue
    nodes = Queue()
    closed = dict()
    moves = list()
  
    
    for var in problem.getSuccessors(problem.getStartState()):
        state = FringeState(var[0])
        state.moves.append(Directions.convToDirection(var[1]))
        nodes.push(state)

    closed[problem.getStartState()] = True   
    
    while(False == nodes.isEmpty()):
        current_node = nodes.pop()
        #print "Current node:", current_node
        
        if(closed.has_key(current_node.pos)):
            continue
                    
        if(problem.isGoalState(current_node.pos)):
            print "Goal reached!"
            moves = current_node.moves
            break
        
        for var in problem.getSuccessors(current_node.pos):
            state = FringeState(var[0])
            state.moves= copy.deepcopy(current_node.moves)
            state.moves.append(Directions.convToDirection(var[1]))
            nodes.push(state)
        
        closed[current_node.pos] = True
                
    return moves
    def getAction(self, rawState, epsilon):
        legalActions = rawState.getLegalActions()
        legalActions.remove(Directions.STOP)

        qState = self.trainingRoom.featuresExtractor.getFeatures(
            rawState, None)

        if util.flipCoin(epsilon):
            return random.choice(legalActions)

        else:
            qValues = list(
                enumerate(self.model.model.predict(np.array([qState]))[0]))
            qValues = sorted(qValues, key=lambda x: x[1], reverse=True)

            for index, qValue in qValues:
                action = Directions.fromIndex(index)
                if action in legalActions:
                    return action
Exemplo n.º 9
0
 def before_turn(self):
     board = self.board
     # 1. get all the empty slots and choose randomly between them
     # 2. choose randomly(0.8) between putting 2 or 4 there
     emptySlots = [(x,y) for x in range(4) for y in range(4) if board[x][y] == None]
     numberToPutInSlot = weighted_choice()
     x,y = choose_uni_from_seq(emptySlots)
     self.create_new_tile(x, y, numberToPutInSlot)
     
     # check that there left some moves here, or it's a game over
     noMoreMoves = True
     for move in Directions.generator():
         if self.is_legal_turn(move):
             noMoreMoves = False
     if noMoreMoves:
         self.display_score_and_exit()
         
     if self.agent:
         self.after(AFTER_FOR_NEW_TURN, self.update_turn, self.agent.getAction(self.board))
     
     self.ignoreKeys = False
Exemplo n.º 10
0
def getLegalActions(state):
    legalActions = state.getLegalActions()
    if Directions.STOP in legalActions: legalActions.remove(Directions.STOP)
    return np.array([Directions.fromIndex(i) in legalActions for i in range(4)])
Exemplo n.º 11
0
def getGhostDirections(state):
    return np.array([Directions.getIndex(s.getDirection()) for s in state.getGhostStates()]) / 4.0
Exemplo n.º 12
0
 def getNextStatesOfMyTurn(board):
     ''' gets a state - a board - and yields tuples (move,nextState) '''
     for direction in Directions.generator():
         if Miniboard.isLegalAction(board, direction):
             nextBoard = Miniboard.calculateNextBoardUsing(direction, board)
             yield(direction, nextBoard)
Exemplo n.º 13
0
 def test_legal_turn(self):
     self.create_from_list([0,0,0,0,2,4,2,4])
     self.debug_board()
     for d in Directions.generator():
         print(d, self.is_legal_turn(d))
Exemplo n.º 14
0
 def willBeInCorridor(self, gameState, action):
     successor = self.getSuccessor(gameState, action)
     myPos = successor.getAgentPosition(self.index)
     legalActionsAtSuccessor = successor.getLegalActions(self.index)
     willBeInCorridor = len(legalActionsAtSuccessor) == 3 and action in legalActionsAtSuccessor and Directions.REVERSE(
         action) in legalActionsAtSuccessor
     return float(willBeInCorridor)
Exemplo n.º 15
0
 def remember(self, state, action, reward, nextState):
     from game import Directions
     self.replayMemory[str(state.__hash__()) +
                       str(Directions.getIndex(action))] = (state, action,
                                                            reward,
                                                            nextState)