def max(self, state, a, b, table): util = -999 possActs = state.actions() best = None #if state in table: # return best; #else: #checks if the current state is terminal if (state.is_terminal()): return state.utility(currPly) #if there are no possible actions with this board if not (possActs): #set the next state to be the same board, #but with the opponents row nextState = State(state.board, state.opponent_row, state.player) #and run it util = self.min(nextState, a, b, table) if (util >= b): return util if (util > a): a = util #if theres no already best set action if best is None: #if there are possible actions while (possActs): #get the next possible action curr = possActs.pop(0) #get the state that results from the current action nextState = state.result(curr) #get the utility nextUtil = self.min(nextState, a, b, table) #if the utility is greater reassign the current utility #and add the move a the current best move into the table if (nextUtil > util): util = nextUtil best = curr table[state.ser] = (best, util) #compare the utility with alpha and beta if (util >= b): return util elif (util > a): a = util #return the utility return util
def minmax(self, state, num): #starts off by getting all possible actions posActs=state.actions() global action if (state.is_terminal()): table[state.ser] = None return state.utility(currPly) #checks if the game reached a terminal state #and if it did it return the utility #sets the util based on current player if(currPly.row == state.player_row): util =-1; else: util =1; #if there are no possible actions available if not(posActs): nextState = State(state.board, state.opponent_row, state.player) util = self.minmax(nextState, False) table[state.ser] = None return util while(posActs): currentAction =posActs.pop() nextState = state.result(currentAction) nextUtility = self.minmax(nextState, False) if (currPly.row == state.player_row): if (util <= nextUtility): util = nextUtility; table[state.ser] = currentAction else: if (util >= nextUtility): util = nextUtility table[state.ser] = currentAction return util
def minValue(self, state, depth): global transpositionTable ''' state key ''' ''' stateString = "" for i in state.board: stateString += str(i) stateString += "," stateString += str(state.player_row) ''' stateString = state.ser() ''' 1. Terminal test ''' if (state.is_terminal()): utility = state.utility(player) transpositionTable[stateString] = (None, utility) return utility ''' 2. Initialize utility to positive infinite (effectively) ''' utility = 2 ''' Process actions ''' actions = state.actions() # No actions available if not (actions): nextState = State(state.board, state.opponent_row, state.player) utility = self.maxValue(nextState, depth + 1) transpositionTable[stateString] = (None, utility) return utility else: while (actions): currentAction = actions.pop(0) nextState = state.result(currentAction) utilityTmp = self.maxValue(nextState, depth + 1) # Get the max utility if (utility > utilityTmp): utility = utilityTmp transpositionTable[stateString] = (currentAction, utility) return utility
def min(self, state, a, b, table): best = None util = 999 possActs = state.actions() if (state.is_terminal()): return state.utility(currPly) #if state in table: # return best; #else: if not (possActs): nextState = State(state.board, state.opponent_row, state.player) util = self.max(nextState, a, b, table) if (util <= a): return util if (b > util): b = util if best is None: while (possActs): curr = possActs.pop(0) nextState = state.result(curr) nextUtil = self.max(nextState, a, b, table) if (util > nextUtil): util = nextUtil best = curr table[state.ser] = (best, util) if (util <= a): return util if (util < b): b = util return util
def maxValue(self, state, alpha, beta, depth): global depth_limit global transpositionTableForMax global search_is_complete global max_depth_reached if (depth >= max_depth_reached): max_depth_reached = depth ''' state key ''' ''' stateString = "" for i in state.board: stateString += str(i) stateString += "," ''' stateString = state.ser() ''' Check the time ''' if (self.is_time_up()): if (state.is_terminal()): value = state.utility(player) else: value = self.evaluate(state, player.row, depth) search_is_complete = False #transpositionTableForMax[stateString] = (None, value, depth) return value ''' Check the depth ''' if (depth >= depth_limit): if (state.is_terminal()): value = state.utility(player) else: value = self.evaluate(state, player.row, depth) search_is_complete = False #transpositionTableForMax[stateString] = (None, value, depth) return value ''' PROCEED NORMALLY FROM HERE ''' ''' 1. Terminal test ''' if (state.is_terminal()): value = state.utility(player) #transpositionTableForMax[stateString] = (None, value, depth) return value ''' 2. Initialize value to negative infinite (effectively) ''' value = -2 ''' Process actions ''' actions = state.actions() # No actions available if not (actions): nextState = State(state.board, state.opponent_row, state.player) value = self.minValue(nextState, alpha, beta, depth + 1) #transpositionTableForMax[stateString] = (None, value, depth) # Prune if possible ''' if (value >= beta): print "PRUNING WHEN THERE's NO ACTION" return value ''' ''' if (value > alpha): alpha = value ''' return value else: bestAction = transpositionTableForMax.get(stateString, None) if not (bestAction is None): currentAction = bestAction[0] #print "BEST ACTION AT MAX" + stateString + " " + str(currentAction.index) #print str(state.board) nextState = state.result(currentAction) valueTmp = self.minValue(nextState, alpha, beta, depth + 1) # Get the max value if (value < valueTmp): value = valueTmp #print "Storing for max, at " + stateString + " " + str(currentAction.index) transpositionTableForMax[stateString] = (currentAction, value, depth) # Prune if possible if (value >= beta): return value if (value > alpha): alpha = value while (actions): currentAction = actions.pop(0) if not (currentAction == bestAction[0]): nextState = state.result(currentAction) valueTmp = self.minValue(nextState, alpha, beta, depth + 1) #if (depth == 0): # print str(nextState.board) + " " + str(valueTmp) # Get the max value if (value < valueTmp): value = valueTmp #print "Storing for max, at " + stateString + " " + str(currentAction.index) transpositionTableForMax[stateString] = ( currentAction, value, depth) # TIEBREAKER if (value == valueTmp): if (currentAction.index < (bestAction[0]).index): transpositionTableForMax[stateString] = ( currentAction, value, depth) # Prune if possible if (value > beta): return value if (value > alpha): alpha = value else: while (actions): currentAction = actions.pop(0) nextState = state.result(currentAction) valueTmp = self.minValue(nextState, alpha, beta, depth + 1) #if (depth == 0): # print str(nextState.board) + " " + str(valueTmp) # Get the max value if (value < valueTmp): value = valueTmp #print "Storing for max, at " + stateString + " " + str(currentAction.index) transpositionTableForMax[stateString] = (currentAction, value, depth) # Prune if possible if (value >= beta): return value if (value > alpha): alpha = value return value
def __init__(self, M, N, K, player_classes, timeout=None): self.players = Player.create_players(player_classes) self.state = State.initial(M, N, K, self.players[0]) self.timeout = timeout
def maxValue(self, state, alpha, beta, depth): #print "maxValue executes at depth " + str(depth) stateString = "" for i in state.board: stateString += str(i) stateString += str(state.player_row) #print stateString global changed #print("MAX VALUE CALLED") global depth_limit # Check the time, start exiting if (self.is_time_up == True): print("TIME IS UP") return self.evaluate(state, state.player_row) #print("TIME IS NOT UP") # Check the depth: if (depth >= depth_limit): #print ("REACHED THE LIMIT") return self.evaluate(state, state.player_row) #print ("PAST THE CHECK") # Check if best action exists bestAction = transposition_table.get(stateString, None) value = (state.M * state.N + 1) * -1 if (stateString == "020700180"): print str(value) + " BWAEfORE" # Best Action Exists # Expand best node first if not (bestAction is None): # Terminal test, exit if (state.is_terminal()): return bestAction[1] nextState = state.result(bestAction[0]) valueTmp = self.minValue(nextState, alpha, beta, depth + 1) # Update value if (value > valueTmp): value = valueTmp # Prune if able to if (value >= beta): return value # Update alpha if (value > alpha): alpha = value ''' # Terminal test if (state.is_terminal()): return state.utility(player) # Past lowest possible utility, effectively negative infinite utility = 2 ''' else: ### Change best action ### print "MAX" + stateString changed = True # Terminal test, exit if (state.is_terminal()): #print ("TERMINAL STATE") value = self.evaluate(state, state.player_row) transposition_table[stateString] = (None, value) return self.evaluate(state, state.player_row) # Then expand OTHER nodes # Get the actions from this state actions = state.actions() # No available actions, repeat state but switch players if not (actions): #print "NO ACTIONS FOR MAX" nextState = State(state.board, state.opponent_row, state.player) value = self.minValue(nextState, alpha, beta, depth + 1) transposition_table[stateString] = (None, value) if (value >= beta): return value if (value > alpha): alpha = value # Actions available if (stateString == "020700180"): print str(value) + " BEfORE" while (actions): currentAction = actions.pop(0) # Check only nodes that aren't the previous best action if not (bestAction == None): if not (currentAction == bestAction[0]): nextState = state.result(currentAction) valueTmp = self.minValue(nextState, alpha, beta, depth + 1) if (valueTmp > value): value = valueTmp newBestAction = currentAction #if (stateString == "1011101"): # print "WHAT?" transposition_table[stateString] = (newBestAction, value) # tie break ''' if (utility == utilityTmp): bestAction = alphabetatranspositionTable.get(stateString, None) index = bestAction[0].index if (currentAction.index < index): print "tiebreaking" bestAction = currentAction alphabetatranspositionTable[stateString] = (bestAction, utility)''' # Prune if able if (value >= beta): return value # Update alpha if (value > alpha): alpha = value else: #print "NO BEST ACTION CHECKING ALL from " + stateString + " with intial value: " + str(value) nextState = state.result(currentAction) valueTmp = self.minValue(nextState, alpha, beta, depth + 1) print str(value) if (valueTmp > value): if (stateString == "020700180"): print "ACTION available" value = valueTmp newBestAction = currentAction transposition_table[stateString] = (newBestAction, value) # tie break ''' if (utility == utilityTmp): bestAction = alphabetatranspositionTable.get(stateString, None) index = bestAction[0].index if (currentAction.index < index): print "tiebreaking" bestAction = currentAction alphabetatranspositionTable[stateString] = (bestAction, utility)''' # Prune if able if (value >= beta): return value # Update alpha if (value > alpha): alpha = value return value
def minValue(self, state, alpha, beta, depth): #print "minValue executes at depth " + str(depth) stateString = "" for i in state.board: stateString += str(i) stateString += str(state.player_row) #print stateString global changed global depth_limit #print "MIN VALUE VALLED" # Check the time, start exiting if (self.is_time_up == True): print "TIME US UP" return self.evaluate(state, state.player_row) #print "TIME US NOT UP" ''' # Terminal test, exit if (state.is_terminal()): #print "TERMINAL" return self.evaluate(state, state.player_row) * -1 ''' # Check the depth: if (depth >= depth_limit): #print self.evaluate(state, state.player_row) * -1 return self.evaluate(state, state.player_row) # Check if best action exists bestAction = transposition_table.get(stateString, None) value = state.M * state.N + 1 # Best Action Exists # Expand best node first if not (bestAction is None): #print "best action exists" # Terminal test, exit if (state.is_terminal()): return bestAction[1] nextState = state.result(bestAction[0]) valueTmp = self.maxValue(nextState, alpha, beta, depth + 1) # Update value if (value > valueTmp): value = valueTmp # Prune if able to if (value <= alpha): return value # Update beta if (value < beta): beta = value ''' # Terminal test if (state.is_terminal()): return state.utility(player) # Past lowest possible utility, effectively negative infinite utility = 2 ''' else: #print "best action does not exist" ### Change best action ### print stateString changed = True #print "must print" # Terminal test, exit if (state.is_terminal()): #print (stateString + "TERMINAL STATE") value = self.evaluate(state, state.player_row) transposition_table[stateString] = (None, value) return self.evaluate(state, state.player_row) # Then expand OTHER nodes # Get the actions from this state actions = state.actions() # No available actions, repeat state but switch players if not (actions): #print "NO ACTIONS FOR MIN" nextState = State(state.board, state.opponent_row, state.player) value = self.maxValue(nextState, alpha, beta, depth + 1) transposition_table[stateString] = (None, value) if (value <= alpha): return value if (value < beta): beta = value # Actions available while (actions): currentAction = actions.pop(0) if not (bestAction == None): # Check only nodes that aren't the previous best action if not (currentAction == bestAction[0]): nextState = state.result(currentAction) valueTmp = self.maxValue(nextState, alpha, beta, depth + 1) if (value > valueTmp): value = valueTmp newBestAction = currentAction transposition_table[stateString] = (newBestAction, value) # tie break ''' if (utility == utilityTmp): bestAction = alphabetatranspositionTable.get(stateString, None) index = bestAction[0].index if (currentAction.index < index): print "tiebreaking" bestAction = currentAction alphabetatranspositionTable[stateString] = (bestAction, utility)''' # Prune if able if (value <= alpha): return value # Update beta if (value < beta): beta = value else: nextState = state.result(currentAction) valueTmp = self.maxValue(nextState, alpha, beta, depth + 1) if (value > valueTmp): value = valueTmp newBestAction = currentAction transposition_table[stateString] = (newBestAction, value) # tie break ''' if (utility == utilityTmp): bestAction = alphabetatranspositionTable.get(stateString, None) index = bestAction[0].index if (currentAction.index < index): print "tiebreaking" bestAction = currentAction alphabetatranspositionTable[stateString] = (bestAction, utility)''' # Prune if able if (value <= alpha): return value # Update beta if (value < beta): beta = value return value
def maxValue(self, state, alpha, beta, depth): global transpositionTableForMax ''' state key ''' ''' stateString = "" for i in state.board: stateString += str(i) stateString += "," ''' stateString = state.ser() ''' Check to see if we've already encountered this state ''' ''' action = transpositionTableForMax.get(stateString, None) if not(action is None): return action[1] ''' ''' 1. Terminal test ''' if (state.is_terminal()): utility = state.utility(player) #transpositionTableForMax[stateString] = (None, utility, depth) return utility ''' 2. Initialize utility to negative infinite (effectively) ''' utility = -2 ''' Process actions ''' actions = state.actions() # No actions available if not (actions): nextState = State(state.board, state.opponent_row, state.player) utility = self.minValue(nextState, alpha, beta, depth + 1) #transpositionTableForMax[stateString] = (None, utility, depth) # Prune if possible ''' if (utility >= beta): print "PRUNING WHEN THERE's NO ACTION" return utility ''' ''' if (utility > alpha): alpha = utility ''' return utility else: bestAction = transpositionTableForMax.get(stateString, None) if not (bestAction is None): currentAction = bestAction[0] nextState = state.result(currentAction) utilityTmp = self.minValue(nextState, alpha, beta, depth + 1) # Get the max utility if (utility < utilityTmp): utility = utilityTmp transpositionTableForMax[stateString] = (currentAction, utility, depth) # Prune if possible if (utility >= beta): return utility if (utility > alpha): alpha = utility while (actions): currentAction = actions.pop(0) if not (currentAction == bestAction[0]): nextState = state.result(currentAction) utilityTmp = self.minValue(nextState, alpha, beta, depth + 1) # Get the max utility if (utility < utilityTmp): utility = utilityTmp transpositionTableForMax[stateString] = ( currentAction, utility, depth) # Prune if possible if (utility >= beta): return utility if (utility > alpha): alpha = utility else: while (actions): currentAction = actions.pop(0) nextState = state.result(currentAction) utilityTmp = self.minValue(nextState, alpha, beta, depth + 1) # Get the max utility if (utility < utilityTmp): utility = utilityTmp transpositionTableForMax[stateString] = (currentAction, utility, depth) # Prune if possible if (utility > beta): return utility if (utility > alpha): alpha = utility return utility
def minimax(self, state, first): global action actionIndex = state.M * 2 + 2 actions = state.actions() # Reached terminal state if (state.is_terminal()): # Return the state's utility utility = state.utility(player) #print "TERMINAL " + str(state.board) + " " + str(utility) return utility # max if (state.player_row == player.row): utility = -2 if not (actions): # no actions available nextState = State(state.board, state.opponent_row, state.player) utility = self.minimax(nextState, first + 1) bestAction = None while (actions): currentAction = actions.pop(0) nextState = state.result(currentAction) utilityTmp = self.minimax(nextState, first + 1) if (utility <= utilityTmp): utility = utilityTmp # first move if (first == 1): action = currentAction actionIndex = currentAction.index # first move ''' if (utility == utilityTmp): if (first == 1): if (currentAction.index < actionIndex): print "tiebreaking" action = currentAction actionIndex = currentAction.index''' # min if (state.player_row != player.row): utility = 2 if not (actions): # no actions available nextState = State(state.board, state.opponent_row, state.player) utility = self.minimax(nextState, first + 1) while (actions): currentAction = actions.pop(0) nextState = state.result(currentAction) utilityTmp = self.minimax(nextState, first + 1) if (utility > utilityTmp): utility = utilityTmp # first move if (first == 1): action = currentAction actionIndex = currentAction.index return utility
def maxValue(self, state, alpha, beta, depth): ''' global maxDepth if depth > maxDepth: maxDepth = depth ''' stateString = "" for i in state.board: stateString += str(i) stateString += str(state.player_row) bestAction = alphabetatranspositionTable.get(stateString, None) if bestAction is None: # Terminal test if (state.is_terminal()): utility = state.utility(player) print "TERMINAL " + stateString + " " + str(utility) alphabetatranspositionTable[stateString] = (None, utility, depth) return utility # Past lowest possible utility, effectively negative infinite utility = -2 # Get the actions from this state actions = state.actions() # No available actions, repeat state but switch players if not (actions): nextState = State(state.board, state.opponent_row, state.player) utility = self.minValue(nextState, alpha, beta, depth + 1) alphabetatranspositionTable[stateString] = (None, utility, depth) if (utility >= beta): return utility if (utility > alpha): alpha = utility # Actions available while (actions): currentAction = actions.pop(0) nextState = state.result(currentAction) utilityTmp = self.minValue(nextState, alpha, beta, depth + 1) if (depth == 0): print str(nextState.board) + " " + str(utilityTmp) if (utility < utilityTmp): utility = utilityTmp bestAction = currentAction alphabetatranspositionTable[stateString] = (bestAction, utility, depth) if (utility >= beta): return utility if (utility > alpha): alpha = utility return utility else: return bestAction[1]
def maxValue(self, state, alpha, beta, depth): global maxDepth if (depth > maxDepth): maxDepth = depth global nonTerminal stateString = "" for i in state.board: stateString += str(i) stateString += str(state.player_row) #if (stateString == "11110111100"): #if not(transposition_table): #print "GOOD" #if (depth_limit == 35 or depth_limit == 36): #print stateString # If we're at the depth limit, stop # Apply utility function if we're at terminal node, apply evaluation function if we're not if (depth >= depth_limit): # Terminal test if (state.is_terminal()): #print "Applying utility" utility = state.utility(player) "Applying utility for " + stateString + " " + str(utility) transposition_table[stateString] = (None, utility, depth) return utility else: #print "Applying evaluation" #changed = True value = self.evaluate(state, player.row) "Applying valuefor " + stateString + " " + str(value) transposition_table[stateString] = (None, value, depth) nonTerminal = True return value # We're not at the depth limit, handle terminality anyways # Terminal test if (state.is_terminal()): utility = state.utility(player) "Applying utility not at depth limit for " + stateString + " " + str( utility) transposition_table[stateString] = (None, utility, depth) return utility # Otherwise we expand the node # Past lowest possible utility, effectively negative infinite utility = -2 # Get the actions from this state actions = state.actions() # No best action yet, will be set recursively on the way up bestAction = None # No available actions, repeat state but switch players if not (actions): nextState = State(state.board, state.opponent_row, state.player) utility = self.minValue(nextState, alpha, beta, depth + 1) transposition_table[stateString] = (None, utility, depth) if (utility >= beta): return utility if (utility > alpha): alpha = utility # Actions available, start from low to high index, find the best one bestAction = transposition_table.get(stateString, None) if (bestAction is None): while (actions): currentAction = actions.pop(0) nextState = state.result(currentAction) utilityTmp = self.minValue(nextState, alpha, beta, depth + 1) #if (stateString == "11110111100"): # print str(utilityTmp) if (utilityTmp > utility): # if (stateString == "11110111100"): # print "WHOANELLY" utility = utilityTmp bestAction = currentAction transposition_table[stateString] = (bestAction, utility, depth) ''' # tie break if (utility == utilityTmp): bestAction = transposition_table.get(stateString, None) index = bestAction[0].index if (currentAction.index < index): print "tiebreaking" bestAction = currentAction transposition_table[stateString] = (bestAction, utility)''' if (utility >= beta): return utility if (utility > alpha): alpha = utility return utility else: if (depth < bestAction[2]): #print "REEXPLORING at " + stateString while (actions): currentAction = actions.pop(0) nextState = state.result(currentAction) utilityTmp = self.minValue(nextState, alpha, beta, depth + 1) if (utilityTmp > utility): utility = utilityTmp bestAction = currentAction #if (nonTerminal == True): #print "UPDATING" #print "REUPDATING at " + stateString transposition_table[stateString] = (bestAction, utility, depth) # tie break if (utility == utilityTmp): bestAction = transposition_table.get(stateString, None) index = bestAction[0].index if (currentAction.index < index): print "tiebreaking" bestAction = currentAction transposition_table[stateString] = (bestAction, utility, depth) if (utility >= beta): return utility if (utility > alpha): alpha = utility #print "utility " + str(utility) + "bestaction 1: " + str(bestAction[1]) return utility else: return bestAction[1] return bestAction[1]
def minValue(self, state, alpha, beta, depth): global maxDepth if (depth > maxDepth): maxDepth = depth global nonTerminal stateString = "" for i in state.board: stateString += str(i) stateString += str(state.player_row) if (depth >= depth_limit): # Terminal test if (state.is_terminal()): utility = state.utility(player) print "Applying utiltiy for " + stateString + " " + str( utility) transposition_table[stateString] = (None, utility, depth) return utility else: nonTerminal = True value = self.evaluate(state, player.row) print "Applying evaluation for " + stateString + " " + str( value) transposition_table[stateString] = (None, value, depth) return value bestAction = transposition_table.get(stateString, None) #if bestAction is None: # changed = True # Terminal test if (state.is_terminal()): utility = state.utility(player) "Applying utility not at depth limit for " + stateString + " " + str( utility) transposition_table[stateString] = (None, utility, depth) return utility # Past lowest possible utility, effectively negative infinite utility = 2 # Get the actions from this state actions = state.actions() # No available actions, repeat state but switch players if not (actions): nextState = State(state.board, state.opponent_row, state.player) utility = self.maxValue(nextState, alpha, beta, depth + 1) transposition_table[stateString] = (None, utility, depth) if (utility <= alpha): return utility if (beta > utility): beta = utility if (bestAction is None): # Actions available while (actions): currentAction = actions.pop(0) nextState = state.result(currentAction) utilityTmp = self.maxValue(nextState, alpha, beta, depth + 1) if (utility > utilityTmp): utility = utilityTmp bestAction = currentAction transposition_table[stateString] = (bestAction, utility, depth) # tie break ''' if (utility == utilityTmp): bestAction = transposition_table.get(stateString, None) index = bestAction[0].index if (currentAction.index < index): print "tiebreaking" bestAction = currentAction transposition_table[stateString] = (bestAction, utility)''' if (utility <= alpha): return utility if (utility < beta): beta = utility return utility else: # If we're at a higher depth than the depth we last saw this state, we will explore if (depth < bestAction[2]): #explore #print "REEXPLORING at " + stateString while (actions): currentAction = actions.pop(0) nextState = state.result(currentAction) utilityTmp = self.maxValue(nextState, alpha, beta, depth + 1) if (utility > utilityTmp): utility = utilityTmp bestAction = currentAction #if (nonTerminal == True): # print "UPDATING" #print "Updating at " + stateString transposition_table[stateString] = (bestAction, utility, depth) # tie break if (utility == utilityTmp): bestAction = transposition_table.get(stateString, None) index = bestAction[0].index if (currentAction.index < index): print "tiebreaking" bestAction = currentAction transposition_table[stateString] = (bestAction, utility, depth) if (utility <= alpha): return utility if (utility < beta): beta = utility #print "utility " + str(utility) + "bestaction 1: " + str(bestAction[1]) return utility else: return bestAction[1] return bestAction[1]