def minimax1(self, state, ava_actions, depth): score = check_game_status(state[0]) if score == 0: return 0 elif score == 1: return 10 - depth elif score == 2: return -10 + depth if state[1] == 'O': best = -1000 c = 0 for action in ava_actions: ava_actions.remove(action) best = max( best, self.minimax1(after_action_state(state, action), ava_actions, depth + 1)) ava_actions.insert(c, action) c += 1 return best else: best = 1000 c = 0 for action in ava_actions: ava_actions.remove(action) best = min( best, self.minimax1(after_action_state(state, action), ava_actions, depth + 1)) ava_actions.insert(c, action) c += 1 return best
def minimax(self, state, turn, ava_actions,depth): score= check_game_status(state[0]) if(score==1): return 10 if(score==2): return -10 if(score==0): return 0 if(turn==0): best= 1000 for i in ava_actions: c=ava_actions.index(i) ava_actions.remove(i) best=min(best,self.minimax(after_action_state(state,i),1,ava_actions,depth+1)) ava_actions.insert(c,i) return best else: best=-1000 for i in ava_actions: c=ava_actions.index(i) ava_actions.remove(i) best=max(best,self.minimax(after_action_state(state,i),0,ava_actions,depth-1)) ava_actions.insert(c,i) return best
def minimax(self, state, depth, isMax, ava_actions): board, extra = state gameState = check_game_status(board) if gameState == 0: return 0 elif gameState == 1: return 1 elif gameState == 2: return -1 if isMax: bestScore = -100000 for i in range(0, 9, 1): if i in ava_actions: statet = after_action_state(state, i) ava_actions.remove(i) score = self.minimax(statet, depth + 1, False, ava_actions) ava_actions.append(i) bestScore = max(score, bestScore) return bestScore else: bestScore = 100000 for i in range(0, 9, 1): if i in ava_actions: statet = after_action_state(state, i) ava_actions.remove(i) score = self.minimax(statet, depth + 1, True, ava_actions) ava_actions.append(i) bestScore = min(score, bestScore) return bestScore
def act(self, state, ava_actions): for action in ava_actions: nstate = after_action_state(state, action) gstatus = check_game_status(nstate[0]) if gstatus > 0: if tomark(gstatus) == self.mark: return action return random.choice(ava_actions)
def act(self, state, my_env): available_actions = my_env.available_actions() for action in available_actions: nstate = after_action_state(my_env.state, action) gstatus = check_game_status(nstate[0]) if gstatus > 0: if tomark(gstatus) == self.mark: return action return random.choice(available_actions)
def act(self, state, my_env: TicTacToeEnv): available_actions = my_env.available_actions() # --- Step 1: play winning move, if possible --- for action in available_actions: nstate = after_action_state(state, action) gstatus = check_game_status(nstate[0]) if gstatus > 0: if tomark(gstatus) == self.mark: return action # --- Step 2: block opponent from winning --- # imagine the opponent was playing rev_state = (state[0], next_mark(state[1])) for action in available_actions: nstate = after_action_state(rev_state, action) gstatus = check_game_status(nstate[0]) if gstatus > 0: # if they can make a winning move, play that if tomark(gstatus) == self.opponent_mark: return action return random.choice(available_actions)
def compute_reward(self, state): """Given a terminal state, return reward""" gstatus = check_game_status(state[0]) if gstatus == -1: raise RuntimeError( "Error! Compute reward called when game was not finished.") # tie elif gstatus == 0: return 0.5 # won elif gstatus == tocode(self.mark): return 1 # lost else: return 0
def act(self, state, ava_actions): board, mark = state nboard = list(board[:]) if check_game_status(nboard) < 0: min = 100 max = -100 action_min = ava_actions[0] action_max = ava_actions[0] if mark == 'O': for action in ava_actions: nboard[action] = 1 mark = next_mark(mark) value, q = self.act( (tuple(nboard), mark), [p for p in ava_actions if p != action]) if (value < min): min = value action_min = action nboard[action] = 0 #backtrack mark = next_mark(mark) return min, action_min else: for action in ava_actions: nboard[action] = 2 mark = next_mark(mark) value, m = self.act( (tuple(nboard), mark), [p for p in ava_actions if p != action]) if (value > max): max = value action_max = action nboard[action] = 0 #backtrack mark = next_mark(mark) return max, action_max else: return check_game_status(nboard), 12
def ask_value(self, state): """Returns value of given state. If state is not exists, set it as default value. Args: state (tuple): State. Returns: float: Value of a state. """ if state not in st_values: gstatus = check_game_status(state[0]) val = 0 # win if gstatus > 0: val = O_REWARD if self.mark == 'O' else X_REWARD set_state_value(state, val) return st_values[state]
def minimax(board, is_max): #print("Minimax called") curr = check_game_status(board) #print(curr) if curr == 1: #print("max") return 10 if curr == 2: #print("min") return -10 if curr == 0: #print("draw") return 0 if is_max: #print("Maximizer") mark = 'O' maxval = -100 for i in range(0, 9): if board[i] == 0: old_state = [board, mark] new_board, new_mark = after_action_state(old_state, i) #print(new_board) currval = minimax(new_board, False) #print(currval) if maxval < currval: maxval = currval return maxval else: #print("minimizer") mark = 'X' minval = 100 for i in range(0, 9): if board[i] == 0: #print(i) old_state = [board, mark] new_state = after_action_state(old_state, i) #print(new_state[0]) new_board = new_state[0] new_mark = new_state[1] currval = minimax(new_board, False) #print(currval,mark) if minval > currval: minval = currval return minval
def ask_value(self, state): """Returns value of given state. If state is not exists, set it as default value. Args: state (tuple): State. Returns: float: Value of a state. """ if state not in st_values: logging.debug("ask_value - new state {}".format(state)) gstatus = check_game_status(state[0]) val = DEFAULT_VALUE # win if gstatus > 0: val = O_REWARD if self.mark == 'O' else X_REWARD set_state_value(state, val) return st_values[state]
def find_loc_prob(state, aval_actions, action, win_count, loss_count, step): aval_actions.remove(action) state = after_action_state(state, action) game_status = check_game_status(state[0]) if (game_status == 0 or game_status == tocode(next_mark(state[1]))): win_count = win_count + step if (game_status == 0): #If there is draw then it will be counted as victory for both the players loss_count = loss_count + step return win_count, loss_count elif (game_status == tocode(state[1])): loss_count = loss_count + step return win_count, loss_count else: for action in aval_actions: temp = aval_actions.copy() loss_count, win_count = find_loc_prob(state, temp, action, loss_count, win_count, step/5) return win_count, loss_count
def find_loc_prob(state, aval_actions, action, win_count, loss_count, step): aval_actions.remove(action) state = after_action_state(state, action) game_status = check_game_status(state[0]) print("Action = {}".format(action)) if (game_status == 0 or game_status == tocode(next_mark(state[1]))): win_count = win_count + step return win_count, loss_count elif (game_status == tocode(state[1])): loss_count = loss_count + step return win_count, loss_count else: for action in aval_actions: print("Calling recurssively for step {}".format(step)) print( "Win count and Loss count till this step = {} and {} for mark {}" .format(win_count, loss_count, state[1])) loss_count, win_count = find_loc_prob(state, aval_actions, action, loss_count, win_count, step - 1) return win_count, loss_count