Exemplo n.º 1
0
  def partiallyObservableAction(self,gameState):
    global possibleGhostStates
    global frontierStates
    pacmanLoc = gameState.getPacmanPosition()
    ghostLoc = gameState.getGhostPosition(1)
    room = gameState.getLoctoRoom()
    if room[int(ghostLoc[0])][int(ghostLoc[1])] in gameState.data.roomsOn:
      possibleGhostStates.clear()
      possibleGhostStates[ghostLoc] = 1.0
      frontierStates = [ghostLoc]
      if (pacmanLoc, ghostLoc) in MDPUtil.AstarPolicy.keys():
        action = self.convertTuple(MDPUtil.AstarPolicy[(pacmanLoc,ghostLoc)])
      else:
        action = self.A_star(pacmanLoc,ghostLoc,self.heuristic,gameState)
        MDPUtil.AstarPolicy[(pacmanLoc,ghostLoc)] = self.convertAction(action)
    else:
      predictLoc = (-1,-1)
      while True:
        predictLoc = util.chooseFromDistribution(possibleGhostStates)
        if len(possibleGhostStates.keys())==1:
          predictLoc = possibleGhostStates.keys()[0]
          break
        while (room[int(predictLoc[0])][int(predictLoc[1])] in gameState.data.roomsOn) and len(possibleGhostStates.keys())!=1:
          del possibleGhostStates[predictLoc]
          predictLoc = util.chooseFromDistribution(possibleGhostStates)
          if len(possibleGhostStates.keys())==1:
            predictLoc = possibleGhostStates.keys()[0]
            break
        if predictLoc != pacmanLoc: break
      tempFrontierStates = Set()
      for frontierLoc in frontierStates:
        tempNode = self.node(int(frontierLoc[0]),int(frontierLoc[1]))
        possibleActions = self.getActions(gameState,tempNode)

        for frontierAction in possibleActions:
          actionTuple = self.convertAction(frontierAction)
          newFrontierState = (frontierLoc[0]+actionTuple[0],frontierLoc[1]+actionTuple[1])
          
          if newFrontierState in possibleGhostStates.keys(): continue
          if room[int(newFrontierState[0])][int(newFrontierState[1])] in gameState.data.roomsOn: continue
          tempFrontierStates.add(newFrontierState)
          oldProb = possibleGhostStates[frontierLoc]
          possibleGhostStates[newFrontierState] += oldProb/float(len(possibleActions))

      possibleGhostStates.normalize()
      frontierStates = tempFrontierStates

      if (pacmanLoc, predictLoc) in MDPUtil.AstarPolicy.keys():
        action = self.convertTuple(MDPUtil.AstarPolicy[(pacmanLoc,predictLoc)])
      else:
        action = self.A_star(pacmanLoc,predictLoc,self.heuristic,gameState)
        MDPUtil.AstarPolicy[(pacmanLoc,predictLoc)] = self.convertAction(action)
    return action
Exemplo n.º 2
0
 def get_generation(self):
     #if self.id == 'i0':
     timePeriod = self.environment.get_time()
     if timePeriod != self.last_time:
         tran = self.transitions[self.last_time]
         disto = {}
         for s in tran:
             if s['from'] == self.last_generation:
                 for ns in s['to']:
                     disto[ns['to']] = ns['prob']
                 break
         try:
             ss = self.last_generation
             self.last_generation = util.chooseFromDistribution(disto)
         except Exception as e:
             #if self.id == 'i0':
             print 'vvvvvvvvvvvvvvvvvvvvvvvvvvvv'
             print self.last_time, ss, self.last_generation, timePeriod, disto
             print '****************************'
             raise e
         self.last_time = timePeriod
     res = self.last_generation - 18
     if res < 0:
         res = 0
     return res
Exemplo n.º 3
0
    def getDirectionalExpectimaxValue(self, gameState, agentIndex, depth):
        if (agentIndex == 0
                and depth == 1) or gameState.isWin() or gameState.isLose():
            return self.evaluationFunction(gameState)

        legalMoves = gameState.getLegalActions(agentIndex)

        if agentIndex == 0:
            return max(
                self.getDirectionalExpectimaxValue(
                    state, getNextIndexAgent(agentIndex, gameState), depth - 1)
                for state in [
                    gameState.generatePacmanSuccessor(action)
                    for action in legalMoves
                ])
        else:
            ghost = DirectionalGhost(index=agentIndex)
            act_prob_dict = ghost.getDistribution(gameState)
            val_prob_dict = util.Counter()
            for action in legalMoves:
                state = gameState.generateSuccessor(agentIndex, action)
                val = self.getDirectionalExpectimaxValue(
                    state, getNextIndexAgent(agentIndex, gameState), depth)
                val_prob_dict[val] = act_prob_dict[action]
            val_prob_dict.normalize()
            return util.chooseFromDistribution(val_prob_dict)
Exemplo n.º 4
0
 def getAction(self, state):
     """
     Get the action the ghost will do based on a distribution
     of possible actions.
     """
     dist = self.getDistribution(state)
     return chooseFromDistribution(dist)
Exemplo n.º 5
0
 def getAction( self, state ):
   dist = self.getDistribution(state)
   # return state.getLegalActions(self.index)[0]
   if len(dist) == 0: 
     return Directions.STOP
   else:
     return util.chooseFromDistribution( dist )
Exemplo n.º 6
0
 def getAction( self, state ):
     dist = self.getDistribution(state)
     if 'Stop' in dist.keys() and len(dist) > 1:
         del dist['Stop']
     if len(dist) == 0:
         return Directions.STOP
     else:
         return util.chooseFromDistribution( dist )
 def getThreeAction( self, state, list_of_ghost_actions):
     print evaluateHeuristic(state)
     dist = self.getDistribution(state, list_of_ghost_actions)
     if len(dist) == 0:
         return Directions.STOP
     else:
         act = util.chooseFromDistribution( dist )
         #print self.index, act
         return act
Exemplo n.º 8
0
    def getAction(self, state):
        if (state.isWin() or state.isLose()):  #whoami
            return Directions.STOP

        dist = self.getDistribution(state)
        if len(dist) == 0:
            return Directions.STOP
        else:
            return util.chooseFromDistribution(dist)
Exemplo n.º 9
0
 def sample_data(self, trainingData, trainingLabels, sample_weights):
     td = util.Counter()
     tl = util.Counter()
     for i in range(len(trainingLabels)):
         k = util.chooseFromDistribution(sample_weights)
         td[i] = trainingData[k]
         tl[i] = trainingLabels[k]
     return td, tl
     "*** YOUR CODE HERE ***"
Exemplo n.º 10
0
  def getPartiallyObservableLoc(self,state):
    global possiblePacmanStates
    global frontierStates
    pacmanLoc = state.getPacmanPosition()
    ghostLoc = state.getGhostPosition(self.index)
    room = state.getLoctoRoom()
    if room[int(pacmanLoc[0])][int(pacmanLoc[1])] in state.data.roomsOn:
      possiblePacmanStates.clear()
      possiblePacmanStates[pacmanLoc] = 1.0
      frontierStates = [pacmanLoc]
      return pacmanLoc

    else:
 
      predictLoc = util.chooseFromDistribution(possiblePacmanStates)
      while (room[int(predictLoc[0])][int(predictLoc[1])] in state.data.roomsOn) and len(possiblePacmanStates.keys())!=1:
        del possiblePacmanStates[predictLoc]
        predictLoc = util.chooseFromDistribution(possiblePacmanStates)
        if len(possiblePacmanStates.keys())==1:
          predictLoc = possiblePacmanStates.keys()[0]
          break

      tempFrontierStates = Set()
      for frontierLoc in frontierStates:
        tempNode = self.node(int(frontierLoc[0]),int(frontierLoc[1]))
        possibleActions = self.getActions(state,tempNode)

        for frontierAction in possibleActions:
          actionTuple = self.convertAction(frontierAction)
          newFrontierState = (frontierLoc[0]+actionTuple[0],frontierLoc[1]+actionTuple[1])
          
          if room[int(newFrontierState[0])][int(newFrontierState[1])] in state.data.roomsOn: continue
          tempFrontierStates.add(newFrontierState)
          oldProb = possiblePacmanStates[frontierLoc]
          possiblePacmanStates[newFrontierState] += oldProb/float(len(possibleActions))

      possiblePacmanStates.normalize()
      frontierStates = tempFrontierStates
      pacmanLoc = predictLoc
      return pacmanLoc
Exemplo n.º 11
0
    def getAction(self, gameState):
        if not self.xl:
            return max(gameState.getLegalActions(self.index), key = lambda action: (1 - 2 * self.index) * self.rawStateLib[gameState.generateSuccessor(self.index, action)])
        # 增加随机性
        sumprob = 0
        dist = util.Counter()
        for action in gameState.getLegalActions(self.index):
            newState = gameState.generateSuccessor(self.index, action)
            #sumprob += self.k ** self.rawStateLib[newState][self.index]
            dist[action] = (self.k ** ((1 - 2 * self.index) * self.rawStateLib[newState]))
        dist.normalize()

        return util.chooseFromDistribution( dist )
Exemplo n.º 12
0
    def make_decision(self):
        if self.dcopPhase == 0:
            if not self.decision_made:
                if self.last_state is not None:
                    probs = {}
                    for ns in self.next_states(self.last_state):
                        probs[ns] = self.probabilities[(self.last_state, ns)]

                    if len(probs) > 0:
                        self.last_state = chooseFromDistribution(probs)

                        self.commit_generators(
                            self.generatorsValues[self.last_state])
                        self.commit_children_powerLines(
                            self.powerLineValues[self.last_state])

                        self.decision_made = True
                        for n in self.relayNode.neighbours:
                            self.send(n, {'type': 'action-taken'})
                    else:
                        # Asking children for solving DCOP
                        self.printer('%d %s miss Unknown' %
                                     (self.environment.get_time(), self.name))
                        self.test_log.append(2)
                        for c in self.relayNode.neighbours:
                            self.send(c, {'type': 'request-for-dcop'})
                        self.dcopPhase = 1
                else:
                    raise 'last state is None'

            elif self.all_neighbours_took:
                # Checking for good decision
                powerLines = self.relayNode.get_powerLine_values()
                powerLineValues = tuple([powerLines[pl] for pl in powerLines])
                # check for goodness
                if self.last_state[1] == powerLineValues:
                    self.done = True
                else:
                    # Asking children for solving DCOP
                    self.printer('%d %s miss Prediction' %
                                 (self.environment.get_time(), self.name))
                    self.test_log.append(1)
                    for c in self.relayNode.neighbours:
                        self.send(c, {'type': 'request-for-dcop'})
                    self.dcopPhase = 1

        elif self.dcopPhase == 3:
            self.dcopPhase = 0
            self.decision_made = True
            self.done = True
Exemplo n.º 13
0
def predictGhostMove(myPos, ghostPos):  #TODO: NOTE: This actually doesn't work since it doesn't check validity of a move for the ghost
    '''
    Returns the expected position of the ghost
    :param myPos:
    :param ghostPos:
    :return:
    '''
    moves = util.Counter()
    #r = random.random()  #where r is the degree of noise (randomness)
    #if optimal:
    if myPos[1] < ghostPos[1]:
        moves[Directions.SOUTH] = 1
        if myPos[0] < ghostPos[0]:
          moves[Directions.SOUTH] = .8
          moves[Directions.WEST] = .2
        elif myPos[0] > ghostPos[0]:
          moves[Directions.SOUTH] = .8
          moves[Directions.EAST] = .2

    elif myPos[1] > ghostPos[1]:
            moves[Directions.NORTH] = 1
            if myPos[0] < ghostPos[0]:
              moves[Directions.NORTH] = .8
              moves[Directions.WEST] = .2
            elif myPos[0] > ghostPos[0]:
              moves[Directions.NORTH] = .8
              moves[Directions.EAST] = .2


    elif myPos[1] == ghostPos[1]:
        if myPos[0] < ghostPos[0]:
             moves[Directions.WEST] = .9
             moves[Directions.STOP] = .1
        elif myPos[0] > ghostPos[0]:
            moves[Directions.EAST] = 1
            moves[Directions.STOP] = .1
        else:
            moves[Directions.STOP] = 1

    #currently there is no check if ghost will move off the map or into a wall
    ## should just pick optimally for closer towards pac
    ## ghost might be using euc or man instead of maze distance though!!!
    move = util.chooseFromDistribution(moves)
    vector =  Actions.directionToVector(move)
    newPos = (ghostPos[0] + vector[0], ghostPos[1] + vector[1])
    return newPos
Exemplo n.º 14
0
    def getAction(self, state):
        """
          Compute the action to take in the current state.  With
          probability self.epsilon, we should take a random action and
          take the best policy action otherwise.  Note that if there are
          no legal actions, which is the case at the terminal state, you
          should choose None as the action.

          HINT: You might want to use util.flipCoin(prob)
          HINT: To pick randomly from a list, use random.choice(list)
        """
        # Pick Action
        legalActions = self.getLegalActions(state)
        action = None
        totalDeno = 0
        probability_list = []
        "*** YOUR CODE HERE ***"

        if (len(legalActions) == 0):
            return None

        if (util.flipCoin(self.epsilon)):

            for action in legalActions:
                currQValue = self.getQValue(state, action)
                currProb = math.exp(self.epsilon * currQValue)
                totalDeno = totalDeno + currProb

            for action in legalActions:
                currQValue = self.getQValue(state, action)
                currProb = math.exp(self.epsilon * currQValue)
                prob = (currProb / totalDeno)
                probability_list.append((prob, action))

            action = util.chooseFromDistribution(probability_list)

        else:
            action = self.computeActionFromQValues(state)

        return action
    def assignJointActions(self, state, depth=4):
        startTime = time.clock()
        pacmanPosition = state.getPacmanPosition()
        #pos = state.getGhostPosition( self.index )
        allGhostPositions = state.getGhostPositions()
        numGhosts = len(allGhostPositions)
        jointActions = self.get_all_joint_actions(numGhosts, state)

        #print jointActions

        bestJointAction = None
        bestJointActionValue = float("-inf")
        for jointAction in jointActions:
            value = evaluate_joint_action(jointAction, state, depth)
            if value > bestJointActionValue:
                bestJointActionValue = value
                bestJointActions = []
                bestJointActions.append(jointAction)
            elif value == bestJointActionValue:
                bestJointActions.append(jointAction)                

            # if value(jointAction) > bestJointActionValue:
            #     bestJointActionValue = value(jointAction)
            #     bestJointAction = jointAction
            # elif value(jointAction) == bestJointActionValue:
            #     compare

        bestProb = 0.95
        distribution = util.Counter()
        for a in bestJointActions: distribution[tuple(a)] = bestProb / len(bestJointActions)
        for a in jointActions: distribution[tuple(a)] += ( 1-bestProb ) / len(jointActions)

        # jointAction = {}
        
        # for i in xrange(1, numGhosts+1):
        #     jointAction[i] = 'Stop'
        #print "bestJointAction is : ", bestJointAction

        print evaluateHeuristic(state)
        return list(util.chooseFromDistribution( distribution ))
Exemplo n.º 16
0
    def getRandomSuccessor(self, gameState, agentIndex, currentDepth):
        # Randomly uniform selection of an action
        dist = self.getDistribution(agentIndex, gameState)
        selectedAction = util.chooseFromDistribution(dist)

        legal = gameState.getLegalActions(agentIndex)
        if Directions.STOP in legal: legal.remove(Directions.STOP)

        # NB : we browse this loop to expand all the gameState but it's not optimal
        # In practise we can just expand call expectiMax on the action selected previously
        for action in legal:
            successor = gameState.generateSuccessor(agentIndex, action)
            nextDepth = (currentDepth +
                         1) if (agentIndex == self.numGhosts) else currentDepth
            nextAgent = (agentIndex + 1) % (self.numGhosts + 1)
            (score, oldAction) = self.expectiMax(successor, nextAgent,
                                                 nextDepth)

            # Check if the action is the one who was randomly selected
            if action == selectedAction:
                selectedScore = score

        return (selectedScore, selectedAction)
Exemplo n.º 17
0
    def getAction(self, gameState):
        leftnum = len(gameState.getLeftPiles(self.index))
        legalaction = gameState.getLegalActions(self.index)
        sumprob = 0
        max = -99999
        dist = util.Counter()
        bestaction = legalaction[0]
        for action in legalaction:
            newgameState = gameState.generateSuccessor(self.index, action)
            #print str(newgameState.getBoard()) in self.gameStateValue[leftnum - 1].keys()
            if str(newgameState.getBoard()) in self.gameStateValue[self.index][leftnum - 1].keys():
                '''
                if self.gameStateValue[self.index][leftnum - 1][str(newgameState.getBoard())] > max:
                    bestaction = action
                    max = self.gameStateValue[self.index][leftnum - 1][str(newgameState.getBoard())]
                '''
                sumprob += (self.k ** self.gameStateValue[self.index][leftnum - 1][str(newgameState.getBoard())])
            else:
                '''
                if self.evalFunc(newgameState, self.index) * 1.9 > max:
                    bestaction = action
                    max = self.evalFunc(newgameState, self.index) * 1.9
                '''
                sumprob += (self.k ** evalFunc(newgameState, self.index))

        for action in legalaction:
            newgameState = gameState.generateSuccessor(self.index, action)
            if str(newgameState.getBoard()) in self.gameStateValue[self.index][leftnum - 1].keys():
                dist[action] = float(self.k ** self.gameStateValue[self.index][leftnum - 1][str(newgameState.getBoard())]) / float(sumprob)
            else:
                dist[action] = float(self.k ** evalFunc(newgameState, self.index)) / float(sumprob)

        dist.normalize()

        return util.chooseFromDistribution( dist )
        return bestaction
Exemplo n.º 18
0
 def getAction(self, state):
     dist = self.getDistribution(state)
     if len(dist) == 0:
         return Directions.STOP
     else:
         return util.chooseFromDistribution(dist)
 def getAction( self, state ):
     dist = self.getDistribution(state)
     if len(dist) == 0:
         return Directions.STOP
     else:
         return util.chooseFromDistribution( dist )
Exemplo n.º 20
0
 def getAction( self, state ):
   dist = self.getDistribution( state )
   if len(dist) == 0: return Actions.STOP
   return chooseFromDistribution( dist )
Exemplo n.º 21
0
 def pickAction(self, state):
   "Returns the action according to probability distribution."
   return util.chooseFromDistribution()
Exemplo n.º 22
0
 def getAction(self, state):
     dist = self.getDistribution(state)
     return chooseFromDistribution(dist)
Exemplo n.º 23
0
 def getAction(self, state):
     dist = self.getDistribution(state)
     return chooseFromDistribution(dist)
 def getAction(self, state, total_pacmen, agentIndex):
     dist = self.getDistribution(state, total_pacmen)
     if len(dist) == 0:
         return Directions.STOP
     else:
         return util.chooseFromDistribution(dist)
Exemplo n.º 25
0
                     for i in range(21, 52): smallDice.append(i)
                     mediumDice = range(51, 256)
                     largeDice = range(256, 10000)
                     roundTenDice = range(30, 10000, 10)
                     roundHundredDice = range(100, 10000, 100)
                     
                     score = 0
                     results = []
                     for attempt in range(numGames):
                         diceDist = util.Counter()
                         diceDist[1] = 5
                         for number in range(1, 11): diceDist[number] = 10*(1/float(len(range(2,10))))
                         for number in range(11, 101): diceDist[number] = 3*(1/float(len(range(11,100))))
                         for number in range(101, 1001): diceDist[number] = 1*(1/float(len(range(101,1000))))
                         diceDist.normalize()
                         numDice = util.chooseFromDistribution( diceDist )
 
                         sizeDist = util.Counter()
                         for number in smallDice: sizeDist[number] = 150*(1/float(len(smallDice)))
                         for number in mediumDice: sizeDist[number] = 40*(1/float(len(mediumDice)))
                         for number in largeDice: sizeDist[number] = 10*(1/float(len(largeDice)))
                         for number in dNDDice: sizeDist[number] += 350*(1/float(len(dNDDice)))
                         for number in roundTenDice: sizeDist[number] += 40*(1/float(len(roundTenDice)))
                         for number in roundHundredDice: sizeDist[number] += 60*(1/float(len(roundTenDice)))
                         
                         sizeDist.normalize()
                         diceSize = util.chooseFromDistribution( sizeDist )
                         
                         
                         modDist = util.Counter()
                         modDist[0] = 150
Exemplo n.º 26
0
    def direcional_expectimax(self, gameState, agent, depth):
        if gameState.isLose() or gameState.isWin() or len(
                gameState.getLegalActions()) == 0:
            return (gameState.getScore(), 0)
        if depth == self.depth:
            return (self.evaluationFunction(gameState), 0)
        actions = gameState.getLegalActions(agent)
        nextAgent = agent + 1
        if agent == gameState.getNumAgents() - 1:
            nextAgent = 0
            depth = depth + 1
        if agent == 0:
            curMax = (-float('inf'), 0)
            currentActions = [curMax]
            for action in actions:
                temp = self.direcional_expectimax(
                    gameState.generateSuccessor(agent, action), nextAgent,
                    depth)
                if temp[0] == curMax[0]:
                    currentActions.append((temp[0], action))
                elif temp[0] > curMax[0]:
                    curMax = (temp[0], action)
                    currentActions = [curMax]
            return random.choice(currentActions)
        else:
            ghostState = gameState.getGhostState(agent)
            isScared = ghostState.scaredTimer > 0
            pacmanPosition = gameState.getPacmanPosition()
            pos = gameState.getGhostPosition(agent)
            speed = 1
            if isScared: speed = 0.5

            actionVectors = [
                Actions.directionToVector(a, speed) for a in actions
            ]
            newPositions = [(pos[0] + a[0], pos[1] + a[1])
                            for a in actionVectors]
            distancesToPacman = [
                util.manhattanDistance(pos, pacmanPosition)
                for pos in newPositions
            ]
            if isScared:
                bestScore = max(distancesToPacman)
                bestProb = 0.8
            else:
                bestScore = min(distancesToPacman)
                bestProb = 0.8
            succ_actions = []
            for action in actions:
                succ_actions.append(
                    self.direcional_expectimax(
                        gameState.generateSuccessor(agent, action), nextAgent,
                        depth))
            bestActions = [
                action
                for action, distance in zip(succ_actions, distancesToPacman)
                if distance == bestScore
            ]
            # Construct distribution
            dist = util.Counter()
            for a in bestActions:
                dist[a] = bestProb / len(bestActions)
            for a in succ_actions:
                dist[a] += (1 - bestProb) / len(succ_actions)

            dist.normalize()
            return util.chooseFromDistribution(dist)