Пример #1
0
    def _choose(self, state, actions):
        if self.is_train_mode and random.random() < self.exploit_rate:
            next_pos = random.choice(actions)
            if self.debug: print("SELECT", actions, "RANDOM", next_pos)

            return next_pos

        found_p = -1.0
        found_c = []
        
        ob = OptimalBoard(state)
        _id = ob.board_id
        if self.debug: print("FROM", _id)

        scores = self.p_table.lookup(_id)
        converted_actions = ob.convert_action_to_optimal(actions)
        for action in converted_actions:
            p = scores[action]
            if self.debug: print("ACTION", ob.convert_action_to_original(action), p)
            if p > found_p:
                found_p = p
                found_c = [ob.convert_action_to_original(action)]
            elif p == found_p:
                found_c.append(ob.convert_action_to_original(action))

        next_pos = random.choice(found_c)
        if self.debug: print("SELECT", found_c, found_p, next_pos)

        return next_pos
Пример #2
0
    def _choose(self, state, available_actions):
        if self.is_train_mode and random.random() < self.exploit_rate:
            return random.choice(available_actions)

        ob = OptimalBoard(state)
        converted_actions = ob.convert_action_to_optimal(available_actions)
        action = self.q.rargmax(ob.board_id, converted_actions)
        return ob.convert_action_to_original(action)
Пример #3
0
    def _choose(self, state, available_actions):
        optimal_board = OptimalBoard(state)
        converted_actions = optimal_board.convert_action_to_optimal(
            available_actions)
        converted_state = self.convert_state(optimal_board.optimal_board)
        ###
        if self.is_train_mode:
            if random.random() < self.egreedy:
                action = random.choice(converted_actions)
            else:
                action = self.network.predict_one(converted_state)
        else:
            action = self.network.predict_one(converted_state)

        if action not in converted_actions:
            # 여기에 뭐를 학습으로 넣을 지 고민
            # 아니면, predict_one에서 필터를 넣을 지 고민
            self.network.add_train_set(converted_state, action, -1,
                                       self.convert_state([-1] * 9), True)
            action = random.choice(converted_actions)

        original_action = optimal_board.convert_action_to_original(action)

        return original_action