def move(self, game: GameState): moves = game.get_possible_actions() # If there is a move if len(moves) > 0: ratings = [[move, 0] for move in moves] for rating in ratings: move = rating[0] if move[3] > move[ 1] and game.current_player == Piece.RED: # moving forward is good rating[1] = rating[1] + 1 if move[3] < move[ 1] and game.current_player == Piece.BLUE: # moving forward is good rating[1] = rating[1] + 1 if 0 <= game[( move[2], move[3] )].value - Piece.BLUE.value <= 1 and game.current_player == Piece.RED: # is red moving onto blue rating[1] = rating[1] + 2 if 0 <= game[( move[2], move[3] )].value - Piece.RED.value <= 1 and game.current_player == Piece.BLUE: # is blue moving onto red rating[1] = rating[1] + 2 if game.does_move_win_tuple(move): # if win, do it rating[1] = rating[1] + 10 random.shuffle(ratings) ratings.sort(key=lambda x: x[1], reverse=True) game.make_move_tuple(ratings[0][0]) # Make it else: game.pass_move()
def move(self, game: GameState): reward = 0 if self.last_action_red and self.last_action_red[ 3] > self.last_action_red[ 1] and game.current_player == Piece.RED: # moving forward is good reward = 0.05 if self.last_action_blue and self.last_action_blue[ 3] < self.last_action_blue[ 1] and game.current_player == Piece.BLUE: # moving forward is good reward = 0.05 actions = game.get_possible_actions() action_key_value_pairs = [] for action in actions: key = game_state_to_q_state(game, action) value = self.getQ(key) action_key_value_pairs.append((action, key, value)) random.shuffle(action_key_value_pairs) action_key_value_pairs.sort(key=lambda x: x[2], reverse=True) max_action = action_key_value_pairs[0][0] max_action_key = action_key_value_pairs[0][1] max_action_value = action_key_value_pairs[0][2] if random.random() < self.epsilon: # pick random action lol max_action_tuple = random.choice(action_key_value_pairs) max_action = max_action_tuple[0] max_action_key = max_action_tuple[1] # print(action_key_value_pairs) # cool line to get percentage confidence of winning based on last move # uncomment when playing against agent # print(f'Confidence: {max_action_value}') if game.current_player == Piece.BLUE: if self.last_state_key_blue is not None: self.q_learn(self.last_state_key_blue, reward, max_action_value) self.last_state_key_blue = max_action_key self.last_action_blue = max_action else: if self.last_state_key_red is not None: self.q_learn(self.last_state_key_red, reward, max_action_value) self.last_state_key_red = max_action_key self.last_action_red = max_action game.make_move_tuple(max_action) # decay self.epsilon = self.epsilon * self.epsilon_decay if self.epsilon < self.epsilon_floor: self.epsilon = self.epsilon_floor
def miniMax(self, game: GameState,depth): if game.winner != Piece.NONE: if game.winner == Piece.RED: return -9999 else: return 9999 if depth>=self.moveDepth: return self.getV(game_to_v_state(game)) chosen_action = None actions = game.get_possible_actions() random.shuffle(actions) best_V=0 if game.current_player==Piece.RED: best_V=10000 for action in actions: tempGame=copy.deepcopy(game) tempGame.make_move_tuple(action) actionV=self.miniMax(tempGame,depth+1) if actionV < best_V: best_V = actionV chosen_action=action else: best_V=-10000 for action in actions: tempGame=copy.deepcopy(game) tempGame.make_move_tuple(action) actionV=self.miniMax(tempGame,depth+1) if actionV > best_V: best_V = actionV chosen_action=action return best_V
def move(self, game: GameState): if game.turn_num == self.last_num: return actions = game.get_possible_actions() action_key_value_pairs = [] for action in actions: key = game_state_to_q_state(game, action) value = self.getQ(key) action_key_value_pairs.append((action, key, value)) random.shuffle(action_key_value_pairs) action_key_value_pairs.sort(key=lambda x: x[2], reverse=True) max_action_value = action_key_value_pairs[0][2] print("suggestion: ") print(action_key_value_pairs) # cool line to get percentage confidence of winning based on last move # uncomment when playing against agent print(f'Confidence: {max_action_value}') self.last_num = game.turn_num
def move(self, game: GameState): if self.last_state_key != None: self.td_learn(self.lastGameState, 0, game) chosen_action = None actions = game.get_possible_actions() random.shuffle(actions) # get a random move if all are equal max_V = -100000 #print("Test1: "+str(max_V)) if random.random() < self.epsilon: chosen_action = random.choice(actions) else: for action in actions: tempGame = copy.deepcopy(game) tempGame.make_move_tuple(action) tempGameV = self.alphaBeta(tempGame, 1, -100000, 100000) if tempGameV == 0: tempGameV = self.getV(game_to_v_state(tempGame)) if tempGameV > max_V: chosen_action = action max_V = tempGameV #print("Test2: "+str(max_V)) self.last_state_key = game_to_v_state(game) self.lastGameState = copy.deepcopy(game) #print("Move's V estimate: "+str(max_V)) #print("Current State's V estimate: "+str(self.getV(game_to_v_state(game)))) game.make_move_tuple(chosen_action)
def move(self, game: GameState): actions = game.get_possible_actions() action_key_value_pairs = [] for action in actions: key = game_state_to_q_state(game, action) value = self.getQA(key) action_key_value_pairs.append((action, key, value)) random.shuffle(action_key_value_pairs) action_key_value_pairs.sort(key=lambda x: x[2], reverse=True) max_action_a = action_key_value_pairs[0][0] max_action_key_a = action_key_value_pairs[0][1] max_action_value_b_from_a = self.getQB(max_action_key_a) action_key_value_pairs = [] for action in actions: key = game_state_to_q_state(game, action) value = self.getQB(key) action_key_value_pairs.append((action, key, value)) random.shuffle(action_key_value_pairs) action_key_value_pairs.sort(key=lambda x: x[2], reverse=True) max_action_key_b = action_key_value_pairs[0][1] max_action_value_a_from_b = self.getQB(max_action_key_b) if random.random() < self.epsilon: # pick random action lol max_action_tuple = random.choice(action_key_value_pairs) max_action_a = max_action_tuple[0] max_action_key_a = max_action_tuple[1] # cool line to get percentage confidence of winning based on last move # uncomment when playing against agent # print(f'Confidence: {max_action_value}') if game.current_player == Piece.BLUE: if self.last_state_key_blue is not None: self.q_learn(self.last_state_key_blue, 0, max_action_value_b_from_a, max_action_value_a_from_b) self.last_state_key_blue = max_action_key_a else: if self.last_state_key_red is not None: self.q_learn(self.last_state_key_red, 0, max_action_value_b_from_a, max_action_value_a_from_b) self.last_state_key_red = max_action_key_a game.make_move_tuple(max_action_a)
def move(self, game: GameState): moves = game.get_possible_actions() # If there is a move if len(moves) > 0: actual_move = random.choice(moves) # print(f'[Random {self.player}] Making move: {actual_move}') game.make_move_tuple(actual_move) # Make it else: # print(f'[Random {self.player}] No moves! Passing turn...') game.pass_move()
def alphaBeta(self, game: GameState, depth, alpha, beta): if game.winner != Piece.NONE: if game.winner == Piece.BLUE: return 50000 - depth else: return -50000 + depth if depth >= self.moveDepth: return self.getV(game_to_v_state(game)) chosen_action = None actions = game.get_possible_actions() random.shuffle(actions) best_V = 0 if game.current_player == Piece.BLUE: best_V = -100000 for action in actions: tempGame = copy.deepcopy(game) tempGame.make_move_tuple(action) actionV = self.alphaBeta(tempGame, depth + 1, alpha, beta) if actionV == 0: actionV = self.getV(game_to_v_state(tempGame)) if actionV > best_V: best_V = actionV chosen_action = action if actionV > alpha: alpha = actionV if alpha >= beta: break else: best_V = 100000 for action in actions: tempGame = copy.deepcopy(game) tempGame.make_move_tuple(action) actionV = 0 actionV = self.alphaBeta(tempGame, depth + 1, alpha, beta) if actionV == 0: actionV = self.getV(game_to_v_state(tempGame)) if actionV < best_V: best_V = actionV chosen_action = action if actionV < beta: beta = actionV if alpha >= beta: break return best_V
def move(self, game: GameState): if self.last_state_key != None: self.td_learn(self.last_state_key, 0, game_to_v_state(game)) chosen_action = None actions = game.get_possible_actions() random.shuffle(actions) # get a random move if all are equal if random.random() < self.epsilon: chosen_action = random.choice(actions) else: max_V = -10000 for action in actions: tempGame = copy.deepcopy(game) tempGame.make_move_tuple(action) VOfAction = self.getV(game_to_v_state(tempGame)) if VOfAction > max_V: chosen_action = action max_V = VOfAction self.last_state_key = game_to_v_state(game) game.make_move_tuple(chosen_action)
def move(self, game: GameState): if self.last_state_key != None: self.td_learn(self.lastGameState, 0, game) chosen_action = None actions = game.get_possible_actions() random.shuffle(actions) # get a random move if all are equal if random.random() < self.epsilon: chosen_action = random.choice(actions) else: min_V = 10000 for action in actions: tempGame=copy.deepcopy(game) tempGame.make_move_tuple(action) tempGameV=self.miniMax(tempGame,1) if tempGameV < min_V: chosen_action = action min_V=tempGameV self.last_state_key=game_to_v_state(game) self.lastGameState=copy.deepcopy(game) game.make_move_tuple(chosen_action)