def move(self, game: GameState): reward = 0 if self.last_action_red and self.last_action_red[ 3] > self.last_action_red[ 1] and game.current_player == Piece.RED: # moving forward is good reward = 0.05 if self.last_action_blue and self.last_action_blue[ 3] < self.last_action_blue[ 1] and game.current_player == Piece.BLUE: # moving forward is good reward = 0.05 actions = game.get_possible_actions() action_key_value_pairs = [] for action in actions: key = game_state_to_q_state(game, action) value = self.getQ(key) action_key_value_pairs.append((action, key, value)) random.shuffle(action_key_value_pairs) action_key_value_pairs.sort(key=lambda x: x[2], reverse=True) max_action = action_key_value_pairs[0][0] max_action_key = action_key_value_pairs[0][1] max_action_value = action_key_value_pairs[0][2] if random.random() < self.epsilon: # pick random action lol max_action_tuple = random.choice(action_key_value_pairs) max_action = max_action_tuple[0] max_action_key = max_action_tuple[1] # print(action_key_value_pairs) # cool line to get percentage confidence of winning based on last move # uncomment when playing against agent # print(f'Confidence: {max_action_value}') if game.current_player == Piece.BLUE: if self.last_state_key_blue is not None: self.q_learn(self.last_state_key_blue, reward, max_action_value) self.last_state_key_blue = max_action_key self.last_action_blue = max_action else: if self.last_state_key_red is not None: self.q_learn(self.last_state_key_red, reward, max_action_value) self.last_state_key_red = max_action_key self.last_action_red = max_action game.make_move_tuple(max_action) # decay self.epsilon = self.epsilon * self.epsilon_decay if self.epsilon < self.epsilon_floor: self.epsilon = self.epsilon_floor
class Game: instance = None def __init__(self, experiment: BaseExperiment): self.game_state = GameState() self.game_state.reset() self.experiment = experiment self.do_render = experiment.do_render self.playing = True def update(self): if not self.playing: return # Tell agents to move. # If agent is human, then human will act on its own through ui to change current_player if self.game_state.winner == Piece.NONE and self.game_state.current_player == Piece.BLUE: self.experiment.blue_agent.move(self.game_state) if self.game_state.winner == Piece.NONE and self.game_state.current_player == Piece.RED: self.experiment.red_agent.move(self.game_state) # If game ended, tell agents and experiment if self.game_state.winner != Piece.NONE: self.experiment.blue_agent.game_end(self.game_state) self.experiment.red_agent.game_end(self.game_state) if self.experiment.game_ended(self.game_state): self.game_state.reset() else: self.playing = False
def move(self, game: GameState): if self.last_state_key != None: self.td_learn(self.lastGameState, 0, game) chosen_action = None actions = game.get_possible_actions() random.shuffle(actions) # get a random move if all are equal max_V = -100000 #print("Test1: "+str(max_V)) if random.random() < self.epsilon: chosen_action = random.choice(actions) else: for action in actions: tempGame = copy.deepcopy(game) tempGame.make_move_tuple(action) tempGameV = self.alphaBeta(tempGame, 1, -100000, 100000) if tempGameV == 0: tempGameV = self.getV(game_to_v_state(tempGame)) if tempGameV > max_V: chosen_action = action max_V = tempGameV #print("Test2: "+str(max_V)) self.last_state_key = game_to_v_state(game) self.lastGameState = copy.deepcopy(game) #print("Move's V estimate: "+str(max_V)) #print("Current State's V estimate: "+str(self.getV(game_to_v_state(game)))) game.make_move_tuple(chosen_action)
def __init__(self, experiment: BaseExperiment): self.game_state = GameState() self.game_state.reset() self.experiment = experiment self.do_render = experiment.do_render self.playing = True
def game_state_to_q_state(game: GameState, action_tuple): state = "" cards = game.cards.copy() # backup while we destroy them LOL # sort cards to ignore order if game.cards[3] > game.cards[4]: temp = game.cards[3] game.cards[3] = game.cards[4] game.cards[4] = temp if game.cards[0] > game.cards[1]: temp = game.cards[0] game.cards[0] = game.cards[1] game.cards[1] = temp if game.current_player == Piece.BLUE: for i in range(0, 5): for j in range(0, 5): state += str(game[j, i].value) for i in [0, 1, 2, 3, 4]: state += str(CARDS_ID[game.cards[i]]) # Add in action state += str(action_tuple[0]) # from x state += str(action_tuple[1]) # from y state += str(action_tuple[2]) # to x state += str(action_tuple[3]) # to y state += str(CARDS_ID[cards[action_tuple[4]]]) # card else: for i in range(0, 5)[::-1]: # flip the board by reversing locations for j in range(0, 5)[::-1]: piece = game[j, i] if piece == Piece.BLUE: piece = Piece.RED elif piece == Piece.RED: piece = Piece.BLUE elif piece == Piece.RED_KING: piece = Piece.BLUE_KING elif piece == Piece.BLUE_KING: piece = Piece.RED_KING state += str(piece.value) for i in [3, 4, 2, 0, 1]: # same here state += str(CARDS_ID[game.cards[i]]) # Add in action state += str(4 - action_tuple[0]) # from x state += str(4 - action_tuple[1]) # from y state += str(4 - action_tuple[2]) # to x state += str(4 - action_tuple[3]) # to y state += str(CARDS_ID[cards[action_tuple[4]]]) # card game.cards = cards return state
def move(self, game: GameState): actions = game.get_possible_actions() action_key_value_pairs = [] for action in actions: key = game_state_to_q_state(game, action) value = self.getQA(key) action_key_value_pairs.append((action, key, value)) random.shuffle(action_key_value_pairs) action_key_value_pairs.sort(key=lambda x: x[2], reverse=True) max_action_a = action_key_value_pairs[0][0] max_action_key_a = action_key_value_pairs[0][1] max_action_value_b_from_a = self.getQB(max_action_key_a) action_key_value_pairs = [] for action in actions: key = game_state_to_q_state(game, action) value = self.getQB(key) action_key_value_pairs.append((action, key, value)) random.shuffle(action_key_value_pairs) action_key_value_pairs.sort(key=lambda x: x[2], reverse=True) max_action_key_b = action_key_value_pairs[0][1] max_action_value_a_from_b = self.getQB(max_action_key_b) if random.random() < self.epsilon: # pick random action lol max_action_tuple = random.choice(action_key_value_pairs) max_action_a = max_action_tuple[0] max_action_key_a = max_action_tuple[1] # cool line to get percentage confidence of winning based on last move # uncomment when playing against agent # print(f'Confidence: {max_action_value}') if game.current_player == Piece.BLUE: if self.last_state_key_blue is not None: self.q_learn(self.last_state_key_blue, 0, max_action_value_b_from_a, max_action_value_a_from_b) self.last_state_key_blue = max_action_key_a else: if self.last_state_key_red is not None: self.q_learn(self.last_state_key_red, 0, max_action_value_b_from_a, max_action_value_a_from_b) self.last_state_key_red = max_action_key_a game.make_move_tuple(max_action_a)
def move(self, game: GameState): moves = game.get_possible_actions() # If there is a move if len(moves) > 0: actual_move = random.choice(moves) # print(f'[Random {self.player}] Making move: {actual_move}') game.make_move_tuple(actual_move) # Make it else: # print(f'[Random {self.player}] No moves! Passing turn...') game.pass_move()
def move(self, game: GameState): if game.turn_num == self.last_num: return actions = game.get_possible_actions() action_key_value_pairs = [] for action in actions: key = game_state_to_q_state(game, action) value = self.getQ(key) action_key_value_pairs.append((action, key, value)) random.shuffle(action_key_value_pairs) action_key_value_pairs.sort(key=lambda x: x[2], reverse=True) max_action_value = action_key_value_pairs[0][2] print("suggestion: ") print(action_key_value_pairs) # cool line to get percentage confidence of winning based on last move # uncomment when playing against agent print(f'Confidence: {max_action_value}') self.last_num = game.turn_num
def miniMax(self, game: GameState,depth): if game.winner != Piece.NONE: if game.winner == Piece.RED: return -9999 else: return 9999 if depth>=self.moveDepth: return self.getV(game_to_v_state(game)) chosen_action = None actions = game.get_possible_actions() random.shuffle(actions) best_V=0 if game.current_player==Piece.RED: best_V=10000 for action in actions: tempGame=copy.deepcopy(game) tempGame.make_move_tuple(action) actionV=self.miniMax(tempGame,depth+1) if actionV < best_V: best_V = actionV chosen_action=action else: best_V=-10000 for action in actions: tempGame=copy.deepcopy(game) tempGame.make_move_tuple(action) actionV=self.miniMax(tempGame,depth+1) if actionV > best_V: best_V = actionV chosen_action=action return best_V
def move(self, game: GameState): if self.last_state_key != None: self.td_learn(self.last_state_key, 0, game_to_v_state(game)) chosen_action = None actions = game.get_possible_actions() random.shuffle(actions) # get a random move if all are equal if random.random() < self.epsilon: chosen_action = random.choice(actions) else: max_V = -10000 for action in actions: tempGame = copy.deepcopy(game) tempGame.make_move_tuple(action) VOfAction = self.getV(game_to_v_state(tempGame)) if VOfAction > max_V: chosen_action = action max_V = VOfAction self.last_state_key = game_to_v_state(game) game.make_move_tuple(chosen_action)
def move(self, game: GameState): if self.last_state_key != None: self.td_learn(self.lastGameState, 0, game) chosen_action = None actions = game.get_possible_actions() random.shuffle(actions) # get a random move if all are equal if random.random() < self.epsilon: chosen_action = random.choice(actions) else: min_V = 10000 for action in actions: tempGame=copy.deepcopy(game) tempGame.make_move_tuple(action) tempGameV=self.miniMax(tempGame,1) if tempGameV < min_V: chosen_action = action min_V=tempGameV self.last_state_key=game_to_v_state(game) self.lastGameState=copy.deepcopy(game) game.make_move_tuple(chosen_action)
def move(self, game: GameState): moves = game.get_possible_actions() # If there is a move if len(moves) > 0: ratings = [[move, 0] for move in moves] for rating in ratings: move = rating[0] if move[3] > move[ 1] and game.current_player == Piece.RED: # moving forward is good rating[1] = rating[1] + 1 if move[3] < move[ 1] and game.current_player == Piece.BLUE: # moving forward is good rating[1] = rating[1] + 1 if 0 <= game[( move[2], move[3] )].value - Piece.BLUE.value <= 1 and game.current_player == Piece.RED: # is red moving onto blue rating[1] = rating[1] + 2 if 0 <= game[( move[2], move[3] )].value - Piece.RED.value <= 1 and game.current_player == Piece.BLUE: # is blue moving onto red rating[1] = rating[1] + 2 if game.does_move_win_tuple(move): # if win, do it rating[1] = rating[1] + 10 random.shuffle(ratings) ratings.sort(key=lambda x: x[1], reverse=True) game.make_move_tuple(ratings[0][0]) # Make it else: game.pass_move()
def game_to_v_state(game: GameState): state = "" cards = game.cards.copy() if game.cards[3] > game.cards[4]: temp = game.cards[3] game.cards[3] = game.cards[4] game.cards[4] = temp if game.cards[0] > game.cards[1]: temp = game.cards[0] game.cards[0] = game.cards[1] game.cards[1] = temp for i in range(0, 5): for j in range(0, 5): state += str(game[j, i].value) for i in [0, 1, 2, 3, 4]: state += str(CARDS_ID[game.cards[i]]) game.cards = cards return state
def alphaBeta(self, game: GameState, depth, alpha, beta): if game.winner != Piece.NONE: if game.winner == Piece.BLUE: return 50000 - depth else: return -50000 + depth if depth >= self.moveDepth: return self.getV(game_to_v_state(game)) chosen_action = None actions = game.get_possible_actions() random.shuffle(actions) best_V = 0 if game.current_player == Piece.BLUE: best_V = -100000 for action in actions: tempGame = copy.deepcopy(game) tempGame.make_move_tuple(action) actionV = self.alphaBeta(tempGame, depth + 1, alpha, beta) if actionV == 0: actionV = self.getV(game_to_v_state(tempGame)) if actionV > best_V: best_V = actionV chosen_action = action if actionV > alpha: alpha = actionV if alpha >= beta: break else: best_V = 100000 for action in actions: tempGame = copy.deepcopy(game) tempGame.make_move_tuple(action) actionV = 0 actionV = self.alphaBeta(tempGame, depth + 1, alpha, beta) if actionV == 0: actionV = self.getV(game_to_v_state(tempGame)) if actionV < best_V: best_V = actionV chosen_action = action if actionV < beta: beta = actionV if alpha >= beta: break return best_V