/
maxN.py
120 lines (105 loc) · 4.47 KB
/
maxN.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
from collections import Counter
import multiprocessing as mp
import operator as op
import sys
import time
import agent
import cards
import game
class MaxNAgent(agent.Agent):
"""An agent that plays according to the Max-N algorithm for multiplayer
games. (TODO: explain max-n more, as well as how we sampling cards)"""
def makeMove(self, node):
"""Chooses a move by (TODO: explain max-n)
:node: The current state from which we make a move.
:returns: The (numCards, whichCard) action pair and the values of the
node for each player.
"""
allActions = self.getAllActions(node)
# if there's only one option, just play that action
if len(allActions) == 1:
return allActions[0]
# sample opponent hands on each trial and keep track of best actions in
# each trial
numTrials = 3
# sample hands several times in parallel
pool = mp.Pool(mp.cpu_count())
start = time.time()
inputs = [
(trial, node, self.idx, self.hand) for trial in xrange(numTrials)
]
bestActions = Counter(pool.map_async(simulate, inputs).get(sys.maxint))
pool.close()
pool.join()
allBest = max(bestActions, key=bestActions.get)
return allBest
def simulate(args):
"""Function to simulate the other players' cards randomly and play out the
max^n tree based on those hands. Returns the best action.
:trialNum: Trial number (for debugging and unique identification).
:node: The current State object.
:idx: The index of the current player.
:hand: The current player's hand, which is known.
:returns: The action tuple corresponding to the best action to take.
"""
trialNum, node, idx, hand = args
# subtract played cards and your own hand from cards remaining
cardsLeft = cards.diff(cards.allCards(), [node.playedCards, hand])
# get number of remaining cards for everyone else and deal hands
withoutMe = list(node.numRemaining)
del withoutMe[idx]
hands = cards.dealHands(cardsLeft, withoutMe)
# put my hand back in
hands.insert(idx, hand)
agents = map(lambda (i,h): MaxNAgent(i, h),
zip(xrange(node.numPlayers), hands))
bestAct, bestVal = maxN(node, agents, 0, 2*node.numPlayers)
return bestAct
def maxN(node, agents, d, maxDepth):
"""Returns best action and corresponding tuple as given by the max-n
algorithm for the current node.
:node: the current node.
:returns: returns a tuple (bestAction, bestValue) where bestValue is a
tuple of values (one for each player).
"""
player = agents[node.whosTurn]
if node.isFinalState():
places = [5*node.numPlayers - node.finished.index(i)
for i in xrange(node.numPlayers)]
return ((0, -1), places)
# if at max depth, see which move minimizes cards remaining
if d >= maxDepth:
bestAct = (0, -1)
bestVal = [heuristic(node, p) for p in agents]
for act in player.getAllActions(node):
child = node.getChild(act)
childVal = [heuristic(node, p) for p in agents]
if childVal[player.idx] > bestVal[player.idx]:
bestAct = act
bestVal = childVal
return bestAct, bestVal
# otherwise, continue to recurse down the tree
bestAct = (0, -1)
bestVal = tuple(-float('inf') for i in xrange(node.numPlayers))
actions = player.getAllActions(node)
for act in player.getAllActions(node):
child = node.getChild(act)
childAct, childVal = maxN(child, agents, d+1, maxDepth)
if childVal[player.idx] > bestVal[player.idx]:
bestAct = act
bestVal = childVal
return bestAct, bestVal
def heuristic(node, player):
"""A heuristic for when we reach maxDepth before reaching a final state.
:node: The current node at which to evaluate.
:player: The agent object for which we are evaluating the state.
:returns: A float, with higher values representing better positions.
"""
idx = player.idx
numCardsPlayed = sum(node.playedCards[idx].itervalues())
propCardsPlayed = float(numCardsPlayed) / node.initHandSize
strengthPlayed = sum(k*v for k, v in node.playedCards[idx].iteritems())
strengthRemaining = sum(k*v for k, v in player.hand.iteritems())
initStrength = strengthPlayed + strengthRemaining
propStrengthRemaining = float(strengthRemaining) / initStrength
return propStrengthRemaining + 1.1*propCardsPlayed