Exemplo n.º 1
0
 def makeMove(self, state):
     # if there is just 1 action (PASS), avoid the comptutation
     actions = self.getAllActions(state)
     res_actions = []
     if len(actions) == 1:
         return actions[0]
     # do the sampling x times, then pick most common action
     x = 10
     for i in xrange(x):
         time_start = time.time()
         cardsLeft = cards.diff(cards.allCards(), [state.playedCards, self.hand])
         otherRemaining = list(state.numRemaining)
         del otherRemaining[self.idx]
         hands = cards.dealHands(cardsLeft , otherRemaining)
         hands.insert(self.idx, dict(self.hand))
         root = mctsNode(state.playedCards, self.idx, hands, self.idx,
                         None, 0, state.topCard, state.lastPlayed, state.finished)
         loop_count = 0
         while time.time() < time_start + budget:
             loop_count += 1
             nextNode = self.selection(root)
             result = self.simulation(nextNode)
             self.backpropagation(nextNode, result)
         sorted_children  = sorted(root.children, key = lambda child: child.score/child.visits)
         res_actions.append(sorted_children[-1].lastMove)
     numActions = Counter(res_actions).most_common()
     return numActions[0][0]
Exemplo n.º 2
0
def simulate(args):
    """Function to simulate the other players' cards randomly and play out the
    paranoid game tree based on those hands. Returns the best action.
    """
    global nodesExpanded
    nodesExpanded = 0
    trial, state, index, hand = args
    # subtract played cards and your own hand from cards remaining
    cardsLeft = cards.diff(cards.allCards(), [state.playedCards, hand])
    # get number of remaining cards for everyone else and deal hands
    withoutMe = list(state.numRemaining)
    del withoutMe[index]
    hands = cards.dealHands(cardsLeft, withoutMe)
    # put my hand back in
    hands.insert(index, hand)
    agents = map(lambda (i,h): ParanoidAgent(i, h),
                 zip(xrange(state.numPlayers), hands))
    res = paranoid(state, 1, agents, -(sys.maxint -1), sys.maxint)
    return res[0], nodesExpanded
Exemplo n.º 3
0
 def addChild(self, action):
     # get successor state from state module
     curr_state = state.State(self.playedCards, self.whosTurn,
                         self.topCard, self.lastPlayed, self.finished)
     newState = curr_state.getChild(action)
     if action == agent.PASS:
         newHands = list(self.hands)
     else:
         player_hand  = dict(self.hands[self.whosTurn])
         new_hand = cards.diff(player_hand, {action[1]: action[0]})
         newHands = list(self.hands)
         newHands[self.whosTurn] = new_hand
     score = 0.
     # if agent got rid of cards, initialize score to finishing position
     if self.idx in newState.finished:
         score = (newState.finished.index(self.idx) + 1) ** -1
     newNode = mctsNode(newState.playedCards, newState.whosTurn, newHands,
                         self.idx, action, self.depth + 1, newState.topCard,
                         newState.lastPlayed, newState.finished, self, score)
     self.children.append(newNode)
Exemplo n.º 4
0
def simulate(args):
    """Function to simulate the other players' cards randomly and play out the
    max^n tree based on those hands. Returns the best action.

    :trialNum: Trial number (for debugging and unique identification).
    :node: The current State object.
    :idx: The index of the current player.
    :hand: The current player's hand, which is known.
    :returns: The action tuple corresponding to the best action to take.
    """
    trialNum, node, idx, hand = args
    # subtract played cards and your own hand from cards remaining
    cardsLeft = cards.diff(cards.allCards(), [node.playedCards, hand])
    # get number of remaining cards for everyone else and deal hands
    withoutMe = list(node.numRemaining)
    del withoutMe[idx]
    hands = cards.dealHands(cardsLeft, withoutMe)
    # put my hand back in
    hands.insert(idx, hand)
    agents = map(lambda (i,h): MaxNAgent(i, h),
                 zip(xrange(node.numPlayers), hands))
    bestAct, bestVal = maxN(node, agents, 0, 2*node.numPlayers)
    return bestAct