def move(self, game: GameState):

        reward = 0

        if self.last_action_red and self.last_action_red[
                3] > self.last_action_red[
                    1] and game.current_player == Piece.RED:  # moving forward is good
            reward = 0.05
        if self.last_action_blue and self.last_action_blue[
                3] < self.last_action_blue[
                    1] and game.current_player == Piece.BLUE:  # moving forward is good
            reward = 0.05

        actions = game.get_possible_actions()
        action_key_value_pairs = []

        for action in actions:
            key = game_state_to_q_state(game, action)
            value = self.getQ(key)
            action_key_value_pairs.append((action, key, value))

        random.shuffle(action_key_value_pairs)
        action_key_value_pairs.sort(key=lambda x: x[2], reverse=True)
        max_action = action_key_value_pairs[0][0]
        max_action_key = action_key_value_pairs[0][1]
        max_action_value = action_key_value_pairs[0][2]

        if random.random() < self.epsilon:
            # pick random action lol
            max_action_tuple = random.choice(action_key_value_pairs)
            max_action = max_action_tuple[0]
            max_action_key = max_action_tuple[1]

        # print(action_key_value_pairs)

        # cool line to get percentage confidence of winning based on last move
        # uncomment when playing against agent
        # print(f'Confidence: {max_action_value}')

        if game.current_player == Piece.BLUE:
            if self.last_state_key_blue is not None:
                self.q_learn(self.last_state_key_blue, reward,
                             max_action_value)

            self.last_state_key_blue = max_action_key
            self.last_action_blue = max_action

        else:
            if self.last_state_key_red is not None:
                self.q_learn(self.last_state_key_red, reward, max_action_value)

            self.last_state_key_red = max_action_key
            self.last_action_red = max_action

        game.make_move_tuple(max_action)

        # decay
        self.epsilon = self.epsilon * self.epsilon_decay
        if self.epsilon < self.epsilon_floor:
            self.epsilon = self.epsilon_floor
示例#2
0
class Game:
    instance = None

    def __init__(self, experiment: BaseExperiment):
        self.game_state = GameState()
        self.game_state.reset()

        self.experiment = experiment

        self.do_render = experiment.do_render

        self.playing = True

    def update(self):

        if not self.playing:
            return

        # Tell agents to move.
        # If agent is human, then human will act on its own through ui to change current_player
        if self.game_state.winner == Piece.NONE and self.game_state.current_player == Piece.BLUE:
            self.experiment.blue_agent.move(self.game_state)
        if self.game_state.winner == Piece.NONE and self.game_state.current_player == Piece.RED:
            self.experiment.red_agent.move(self.game_state)

        # If game ended, tell agents and experiment
        if self.game_state.winner != Piece.NONE:
            self.experiment.blue_agent.game_end(self.game_state)
            self.experiment.red_agent.game_end(self.game_state)
            if self.experiment.game_ended(self.game_state):
                self.game_state.reset()
            else:
                self.playing = False
 def move(self, game: GameState):
     if self.last_state_key != None:
         self.td_learn(self.lastGameState, 0, game)
     chosen_action = None
     actions = game.get_possible_actions()
     random.shuffle(actions)  # get a random move if all are equal
     max_V = -100000
     #print("Test1: "+str(max_V))
     if random.random() < self.epsilon:
         chosen_action = random.choice(actions)
     else:
         for action in actions:
             tempGame = copy.deepcopy(game)
             tempGame.make_move_tuple(action)
             tempGameV = self.alphaBeta(tempGame, 1, -100000, 100000)
             if tempGameV == 0:
                 tempGameV = self.getV(game_to_v_state(tempGame))
             if tempGameV > max_V:
                 chosen_action = action
                 max_V = tempGameV
         #print("Test2: "+str(max_V))
     self.last_state_key = game_to_v_state(game)
     self.lastGameState = copy.deepcopy(game)
     #print("Move's V estimate: "+str(max_V))
     #print("Current State's V estimate: "+str(self.getV(game_to_v_state(game))))
     game.make_move_tuple(chosen_action)
示例#4
0
    def __init__(self, experiment: BaseExperiment):
        self.game_state = GameState()
        self.game_state.reset()

        self.experiment = experiment

        self.do_render = experiment.do_render

        self.playing = True
def game_state_to_q_state(game: GameState, action_tuple):
    state = ""

    cards = game.cards.copy()  # backup while we destroy them LOL

    # sort cards to ignore order
    if game.cards[3] > game.cards[4]:
        temp = game.cards[3]
        game.cards[3] = game.cards[4]
        game.cards[4] = temp

    if game.cards[0] > game.cards[1]:
        temp = game.cards[0]
        game.cards[0] = game.cards[1]
        game.cards[1] = temp

    if game.current_player == Piece.BLUE:
        for i in range(0, 5):
            for j in range(0, 5):
                state += str(game[j, i].value)
        for i in [0, 1, 2, 3, 4]:
            state += str(CARDS_ID[game.cards[i]])

        # Add in action
        state += str(action_tuple[0])  # from x
        state += str(action_tuple[1])  # from y
        state += str(action_tuple[2])  # to x
        state += str(action_tuple[3])  # to y
        state += str(CARDS_ID[cards[action_tuple[4]]])  # card
    else:
        for i in range(0, 5)[::-1]:  # flip the board by reversing locations
            for j in range(0, 5)[::-1]:
                piece = game[j, i]
                if piece == Piece.BLUE:
                    piece = Piece.RED
                elif piece == Piece.RED:
                    piece = Piece.BLUE
                elif piece == Piece.RED_KING:
                    piece = Piece.BLUE_KING
                elif piece == Piece.BLUE_KING:
                    piece = Piece.RED_KING
                state += str(piece.value)

        for i in [3, 4, 2, 0, 1]:  # same here
            state += str(CARDS_ID[game.cards[i]])

        # Add in action
        state += str(4 - action_tuple[0])  # from x
        state += str(4 - action_tuple[1])  # from y
        state += str(4 - action_tuple[2])  # to x
        state += str(4 - action_tuple[3])  # to y
        state += str(CARDS_ID[cards[action_tuple[4]]])  # card

    game.cards = cards
    return state
    def move(self, game: GameState):

        actions = game.get_possible_actions()
        action_key_value_pairs = []

        for action in actions:
            key = game_state_to_q_state(game, action)
            value = self.getQA(key)
            action_key_value_pairs.append((action, key, value))

        random.shuffle(action_key_value_pairs)
        action_key_value_pairs.sort(key=lambda x: x[2], reverse=True)
        max_action_a = action_key_value_pairs[0][0]
        max_action_key_a = action_key_value_pairs[0][1]
        max_action_value_b_from_a = self.getQB(max_action_key_a)

        action_key_value_pairs = []

        for action in actions:
            key = game_state_to_q_state(game, action)
            value = self.getQB(key)
            action_key_value_pairs.append((action, key, value))

        random.shuffle(action_key_value_pairs)
        action_key_value_pairs.sort(key=lambda x: x[2], reverse=True)
        max_action_key_b = action_key_value_pairs[0][1]
        max_action_value_a_from_b = self.getQB(max_action_key_b)

        if random.random() < self.epsilon:
            # pick random action lol
            max_action_tuple = random.choice(action_key_value_pairs)
            max_action_a = max_action_tuple[0]
            max_action_key_a = max_action_tuple[1]

        # cool line to get percentage confidence of winning based on last move
        # uncomment when playing against agent
        # print(f'Confidence: {max_action_value}')

        if game.current_player == Piece.BLUE:
            if self.last_state_key_blue is not None:
                self.q_learn(self.last_state_key_blue, 0,
                             max_action_value_b_from_a,
                             max_action_value_a_from_b)

            self.last_state_key_blue = max_action_key_a

        else:
            if self.last_state_key_red is not None:
                self.q_learn(self.last_state_key_red, 0,
                             max_action_value_b_from_a,
                             max_action_value_a_from_b)

            self.last_state_key_red = max_action_key_a

        game.make_move_tuple(max_action_a)
示例#7
0
    def move(self, game: GameState):

        moves = game.get_possible_actions()

        # If there is a move
        if len(moves) > 0:
            actual_move = random.choice(moves)
            # print(f'[Random {self.player}] Making move: {actual_move}')
            game.make_move_tuple(actual_move)  # Make it
        else:
            # print(f'[Random {self.player}] No moves! Passing turn...')
            game.pass_move()
    def move(self, game: GameState):

        if game.turn_num == self.last_num:
            return

        actions = game.get_possible_actions()
        action_key_value_pairs = []

        for action in actions:
            key = game_state_to_q_state(game, action)
            value = self.getQ(key)
            action_key_value_pairs.append((action, key, value))

        random.shuffle(action_key_value_pairs)
        action_key_value_pairs.sort(key=lambda x: x[2], reverse=True)
        max_action_value = action_key_value_pairs[0][2]

        print("suggestion: ")
        print(action_key_value_pairs)

        # cool line to get percentage confidence of winning based on last move
        # uncomment when playing against agent
        print(f'Confidence: {max_action_value}')

        self.last_num = game.turn_num
    def miniMax(self, game: GameState,depth):
        if game.winner != Piece.NONE:
            if game.winner == Piece.RED:
                return -9999
            else:
                return 9999

        if depth>=self.moveDepth:
            return self.getV(game_to_v_state(game))

        chosen_action = None
        actions = game.get_possible_actions()
        random.shuffle(actions)
        best_V=0
        if game.current_player==Piece.RED:
            best_V=10000
            for action in actions:
                tempGame=copy.deepcopy(game)
                tempGame.make_move_tuple(action)
                actionV=self.miniMax(tempGame,depth+1)
                if actionV < best_V:
                    best_V = actionV
                    chosen_action=action
        else:
            best_V=-10000
            for action in actions:
                tempGame=copy.deepcopy(game)
                tempGame.make_move_tuple(action)
                actionV=self.miniMax(tempGame,depth+1)
                if actionV > best_V:
                    best_V = actionV
                    chosen_action=action
        return best_V
示例#10
0
    def move(self, game: GameState):
        if self.last_state_key != None:
            self.td_learn(self.last_state_key, 0, game_to_v_state(game))

        chosen_action = None
        actions = game.get_possible_actions()
        random.shuffle(actions)  # get a random move if all are equal
        if random.random() < self.epsilon:
            chosen_action = random.choice(actions)
        else:
            max_V = -10000
            for action in actions:
                tempGame = copy.deepcopy(game)
                tempGame.make_move_tuple(action)
                VOfAction = self.getV(game_to_v_state(tempGame))
                if VOfAction > max_V:
                    chosen_action = action
                    max_V = VOfAction
        self.last_state_key = game_to_v_state(game)
        game.make_move_tuple(chosen_action)
 def move(self, game: GameState):
     if self.last_state_key != None:
         self.td_learn(self.lastGameState, 0, game)
     chosen_action = None
     actions = game.get_possible_actions()
     random.shuffle(actions) # get a random move if all are equal
     if random.random() < self.epsilon:
         chosen_action = random.choice(actions)
     else:
         min_V = 10000
         for action in actions:
             tempGame=copy.deepcopy(game)
             tempGame.make_move_tuple(action)
             tempGameV=self.miniMax(tempGame,1)
             if tempGameV < min_V:
                 chosen_action = action
                 min_V=tempGameV
     self.last_state_key=game_to_v_state(game)
     self.lastGameState=copy.deepcopy(game)
     game.make_move_tuple(chosen_action)
示例#12
0
    def move(self, game: GameState):

        moves = game.get_possible_actions()

        # If there is a move
        if len(moves) > 0:
            ratings = [[move, 0] for move in moves]

            for rating in ratings:
                move = rating[0]
                if move[3] > move[
                        1] and game.current_player == Piece.RED:  # moving forward is good
                    rating[1] = rating[1] + 1
                if move[3] < move[
                        1] and game.current_player == Piece.BLUE:  # moving forward is good
                    rating[1] = rating[1] + 1

                if 0 <= game[(
                        move[2], move[3]
                )].value - Piece.BLUE.value <= 1 and game.current_player == Piece.RED:  # is red moving onto blue
                    rating[1] = rating[1] + 2
                if 0 <= game[(
                        move[2], move[3]
                )].value - Piece.RED.value <= 1 and game.current_player == Piece.BLUE:  # is blue moving onto red
                    rating[1] = rating[1] + 2

                if game.does_move_win_tuple(move):  # if win, do it
                    rating[1] = rating[1] + 10

            random.shuffle(ratings)
            ratings.sort(key=lambda x: x[1], reverse=True)

            game.make_move_tuple(ratings[0][0])  # Make it
        else:
            game.pass_move()
示例#13
0
def game_to_v_state(game: GameState):
    state = ""

    cards = game.cards.copy()

    if game.cards[3] > game.cards[4]:
        temp = game.cards[3]
        game.cards[3] = game.cards[4]
        game.cards[4] = temp

    if game.cards[0] > game.cards[1]:
        temp = game.cards[0]
        game.cards[0] = game.cards[1]
        game.cards[1] = temp

    for i in range(0, 5):
        for j in range(0, 5):
            state += str(game[j, i].value)
    for i in [0, 1, 2, 3, 4]:
        state += str(CARDS_ID[game.cards[i]])

    game.cards = cards
    return state
示例#14
0
    def alphaBeta(self, game: GameState, depth, alpha, beta):
        if game.winner != Piece.NONE:
            if game.winner == Piece.BLUE:
                return 50000 - depth
            else:
                return -50000 + depth

        if depth >= self.moveDepth:
            return self.getV(game_to_v_state(game))
        chosen_action = None
        actions = game.get_possible_actions()
        random.shuffle(actions)
        best_V = 0
        if game.current_player == Piece.BLUE:
            best_V = -100000
            for action in actions:
                tempGame = copy.deepcopy(game)
                tempGame.make_move_tuple(action)
                actionV = self.alphaBeta(tempGame, depth + 1, alpha, beta)
                if actionV == 0:
                    actionV = self.getV(game_to_v_state(tempGame))
                if actionV > best_V:
                    best_V = actionV
                    chosen_action = action
                if actionV > alpha:
                    alpha = actionV
                if alpha >= beta:
                    break
        else:
            best_V = 100000
            for action in actions:
                tempGame = copy.deepcopy(game)
                tempGame.make_move_tuple(action)
                actionV = 0
                actionV = self.alphaBeta(tempGame, depth + 1, alpha, beta)
                if actionV == 0:
                    actionV = self.getV(game_to_v_state(tempGame))
                if actionV < best_V:
                    best_V = actionV
                    chosen_action = action
                if actionV < beta:
                    beta = actionV
                if alpha >= beta:
                    break
        return best_V