Пример #1
0
    def get_action(self, state):
        """
        :param state : (list) 6x7 list representing the current state of the game
        :return int: Index of the column to put the piece (always checks for valid moves)
        """

        # get all possible moves
        possible_actions = get_valid_moves(state)
        tiles = [self.tile, self.opponent_tile]

        # check if it has a winning move
        for action in possible_actions:
            simulated_state = make_move(state, action, tile=self.tile)
            # if the simulated next state is a winning game
            if get_winner(simulated_state, tiles) == self.tile:
                # take the action
                return action

        # check if the opponent has a winning move
        for action in possible_actions:
            simulated_state = make_move(state, action, tile=self.opponent_tile)
            # if the simulated state is a loosing game
            if get_winner(simulated_state, tiles) == self.opponent_tile:
                # block that move
                return action

        # otherwise take random action
        return Agent.get_action(self, state)
Пример #2
0
def simulate(games=10, log_every=100):
    results = []
    won = 0
    agents = [LearningAgent(1), Agent(-1)]

    players = cycle(agents)

    for _ in range(random.randrange(2)):
        current_player = next(players)

    for iteration in range(1, games + 1):
        state = get_initial_state()

        current_game = []

        while get_valid_moves(state):
            current_player = next(players)
            initial_state = state
            action = current_player.get_action(state)
            state = make_move(state, action, current_player.tile)

            turn = {
                'st': initial_state,
                'a': action,
                'st1': state
            }

            if current_player.tile == 1:
                current_game.append(turn)


        # clean game
        reward = get_winner(state)
        current_game[-1]['r'] = reward
        current_game[-1]['st1'] = None

        # log
        if reward > 0:
            won += 1

        if iteration % log_every == 0:
            print('won %s games out of %s' %(won, log_every))
            won = 0

        # learn

        agents[0].learn(current_game)

        results.append(current_game)

    return agents[0]
Пример #3
0
    def get_action(self, state):
        valid_moves = get_valid_moves(state)

        if random.random() < 0.1:
            return Agent.get_action(self, state)

        parsed_state = parse_state(state)
        array = np.array(self.Q[parsed_state]).argsort()
        predicted_moves = list(array)

        for i in reversed(range(9)):
            # get the index of the tempted move
            tempted_move = predicted_moves.index(i)
            # if the top priority move is among valid moves
            if tempted_move in valid_moves:
                # take this move
                return tempted_move
Пример #4
0
    def get_action(self, state):
        valid_moves = get_valid_moves(state)

        self.add_state(state)

        max_move = None
        max_value = None
        for move in valid_moves:
            simulated_state = make_move(state, move, self.tile)
            parsed_simulated_state = parse_state(simulated_state)

            self.add_state(simulated_state, parsed_simulated_state)
            state_value = self.Q[parsed_simulated_state]
            if state_value > max_value:
                max_value = state_value
                max_move = move

        if max_value > 0:
            return max_move
        else:
            if random.random() < 0.1:
                return Agent.get_action(self, state)
            else:
                return max_move
Пример #5
0
 def get_action(self, state):
     return random.choice(get_valid_moves(state))
Пример #6
0
 def get_action(self, state):
     """
     :param state: 6x7 list representing the current state of the game
     :return int: Index of the column to put the piece (always checks for valid moves)
     """
     return random.choice(get_valid_moves(state))