예제 #1
0
 def step(self, action):
     if self.act == ACT_TYPE.PASSIVE:
         if self.mode == MODE.PASSIVE_DECISION:
             if action == 0 or action == 2:
                 self.finished = True
                 if action == 2:
                     self.intention = np.array([16, 17])
                     self.card_type = Category.BIGBANG.value
                 else:
                     self.card_type = Category.EMPTY.value
                 return
             elif action == 1:
                 self.mode = MODE.PASSIVE_BOMB
                 return
             elif action == 3:
                 self.mode = MODE.PASSIVE_RESPONSE
                 return
             else:
                 raise Exception('unexpected action')
         elif self.mode == MODE.PASSIVE_BOMB:
             # convert to value input
             self.intention = np.array([action + 3] * 4)
             self.finished = True
             self.card_type = Category.QUADRIC.value
             return
         elif self.mode == MODE.PASSIVE_RESPONSE:
             self.intention = give_cards_without_minor(
                 action, self.last_cards_value, self.category, None)
             if self.category == Category.THREE_ONE.value or \
                     self.category == Category.THREE_TWO.value or \
                     self.category == Category.THREE_ONE_LINE.value or \
                     self.category == Category.THREE_TWO_LINE.value or \
                     self.category == Category.FOUR_TWO.value:
                 if self.category == Category.THREE_TWO.value or self.category == Category.THREE_TWO_LINE.value:
                     self.minor_type = 1
                 self.mode = MODE.MINOR_RESPONSE
                 # modify the state for minor cards
                 # discard_onehot_from_s_60(self.prob_state, Card.val2onehot60(self.intention))
                 intention_char = to_char(self.intention)
                 for c in intention_char:
                     self.handcards_char.remove(c)
                 self.minor_length = get_seq_length(self.category,
                                                    self.last_cards_value)
                 if self.minor_length is None:
                     self.minor_length = 2 if self.category == Category.FOUR_TWO.value else 1
                 self.card_type = self.category
                 return
             else:
                 self.finished = True
                 self.card_type = self.category
                 return
         elif self.mode == MODE.MINOR_RESPONSE:
             minor_value_cards = [action + 3
                                  ] * (1 if self.minor_type == 0 else 2)
             # modify the state for minor cards
             minor_char = to_char(minor_value_cards)
             for c in minor_char:
                 self.handcards_char.remove(c)
             # discard_onehot_from_s_60(self.prob_state, Card.val2onehot60(minor_value_cards))
             self.intention = np.append(self.intention, minor_value_cards)
             assert self.minor_length > 0
             self.minor_length -= 1
             if self.minor_length == 0:
                 self.finished = True
                 return
             else:
                 return
     elif self.act == ACT_TYPE.ACTIVE:
         if self.mode == MODE.ACTIVE_DECISION:
             self.category = action + 1
             self.active_decision = action
             self.mode = MODE.ACTIVE_RESPONSE
             self.card_type = self.category
             return
         elif self.mode == MODE.ACTIVE_RESPONSE:
             if self.category == Category.SINGLE_LINE.value or \
                     self.category == Category.DOUBLE_LINE.value or \
                     self.category == Category.TRIPLE_LINE.value or \
                     self.category == Category.THREE_ONE_LINE.value or \
                     self.category == Category.THREE_TWO_LINE.value:
                 self.active_response = action
                 self.mode = MODE.ACTIVE_SEQ
                 return
             elif self.category == Category.THREE_ONE.value or \
                     self.category == Category.THREE_TWO.value or \
                     self.category == Category.FOUR_TWO.value:
                 if self.category == Category.THREE_TWO.value or self.category == Category.THREE_TWO_LINE.value:
                     self.minor_type = 1
                 self.mode = MODE.MINOR_RESPONSE
                 self.intention = give_cards_without_minor(
                     action, np.array([]), self.category, None)
                 # modify the state for minor cards
                 intention_char = to_char(self.intention)
                 for c in intention_char:
                     self.handcards_char.remove(c)
                 # discard_onehot_from_s_60(self.prob_state, Card.val2onehot60(self.intention))
                 self.minor_length = 2 if self.category == Category.FOUR_TWO.value else 1
                 return
             else:
                 self.intention = give_cards_without_minor(
                     action, np.array([]), self.category, None)
                 self.finished = True
                 return
         elif self.mode == MODE.ACTIVE_SEQ:
             self.minor_length = action + 1
             self.intention = give_cards_without_minor(
                 self.active_response, np.array([]), self.category,
                 action + 1)
             if self.category == Category.THREE_ONE_LINE.value or \
                     self.category == Category.THREE_TWO_LINE.value:
                 if self.category == Category.THREE_TWO.value or self.category == Category.THREE_TWO_LINE.value:
                     self.minor_type = 1
                 self.mode = MODE.MINOR_RESPONSE
                 # modify the state for minor cards
                 intention_char = to_char(self.intention)
                 for c in intention_char:
                     self.handcards_char.remove(c)
                 # discard_onehot_from_s_60(self.prob_state, Card.val2onehot60(self.intention))
             else:
                 self.finished = True
             return
         elif self.mode == MODE.MINOR_RESPONSE:
             minor_value_cards = [action + 3
                                  ] * (1 if self.minor_type == 0 else 2)
             # modify the state for minor cards
             minor_char = to_char(minor_value_cards)
             for c in minor_char:
                 self.handcards_char.remove(c)
             # discard_onehot_from_s_60(self.prob_state, Card.val2onehot60(minor_value_cards))
             self.intention = np.append(self.intention, minor_value_cards)
             assert self.minor_length > 0
             self.minor_length -= 1
             if self.minor_length == 0:
                 self.finished = True
                 return
             else:
                 return
예제 #2
0
def play_one_episode(env, func):
    def take_action_from_prob(prob, mask):
        prob = prob[0]
        # to avoid numeric difficulty
        prob[mask == 0] = -1
        return np.argmax(prob)

    # return char minor cards output
    def inference_minor_util60(role_id, handcards, num, is_pair, dup_mask, main_cards_char):
        for main_card in main_cards_char:
            handcards.remove(main_card)

        s = get_mask(handcards, action_space, None).astype(np.float32)
        outputs = []
        minor_type = 1 if is_pair else 0
        for i in range(num):
            input_single, input_pair, _, _ = get_masks(handcards, None)
            _, _, _, _, _, _, minor_response_prob = func(
                [np.array([role_id]), s.reshape(1, -1), np.zeros([1, 9085]), np.array([minor_type])]
            )

            # give minor cards
            mask = None
            if is_pair:
                mask = np.concatenate([input_pair, [0, 0]]) * dup_mask
            else:
                mask = input_single * dup_mask

            minor_response = take_action_from_prob(minor_response_prob, mask)
            dup_mask[minor_response] = 0

            # convert network output to char cards
            handcards.remove(to_char(minor_response + 3))
            if is_pair:
                handcards.remove(to_char(minor_response + 3))
            s = get_mask(handcards, action_space, None).astype(np.float32)

            # save to output
            outputs.append(to_char(minor_response + 3))
            if is_pair:
                outputs.append(to_char(minor_response + 3))
        return outputs

    def inference_minor_cards60(role_id, category, s, handcards, seq_length, dup_mask, main_cards_char):
        if category == Category.THREE_ONE.value:
            return inference_minor_util60(role_id, handcards, 1, False, dup_mask, main_cards_char)
        if category == Category.THREE_TWO.value:
            return inference_minor_util60(role_id, handcards, 1, True, dup_mask, main_cards_char)
        if category == Category.THREE_ONE_LINE.value:
            return inference_minor_util60(role_id, handcards, seq_length, False, dup_mask, main_cards_char)
        if category == Category.THREE_TWO_LINE.value:
            return inference_minor_util60(role_id, handcards, seq_length, True, dup_mask, main_cards_char)
        if category == Category.FOUR_TWO.value:
            return inference_minor_util60(role_id, handcards, 2, False, dup_mask, main_cards_char)

    env.reset()
    init_cards = np.arange(21)
    # init_cards = np.append(init_cards[::4], init_cards[1::4])
    env.prepare_manual(init_cards)
    r = 0
    while r == 0:
        last_cards_value = env.get_last_outcards()
        last_cards_char = to_char(last_cards_value)
        last_out_cards = Card.val2onehot60(last_cards_value)
        last_category_idx = env.get_last_outcategory_idx()
        curr_cards_char = to_char(env.get_curr_handcards())
        is_active = True if last_cards_value.size == 0 else False

        s = get_mask(curr_cards_char, action_space, None if is_active else last_cards_char).astype(np.float32)
        last_state = get_mask(last_cards_char, action_space, None).astype(np.float32)
        # print(s.shape)

        role_id = env.get_role_ID()
        # print('%s current cards' % ('lord' if role_id == 2 else 'farmer'), curr_cards_char)

        intention = None
        if role_id == 2:
            if is_active:

                # first get mask
                decision_mask, response_mask, _, length_mask = get_mask_alter(curr_cards_char, [], last_category_idx)

                _, _, _, active_decision_prob, active_response_prob, active_seq_prob, _ = func(
                    [np.array([role_id]), s.reshape(1, -1), np.zeros([1, 9085]), np.zeros([s.shape[0]])]
                )

                # make decision depending on output
                active_decision = take_action_from_prob(active_decision_prob, decision_mask)

                active_category_idx = active_decision + 1

                # get response
                active_response = take_action_from_prob(active_response_prob, response_mask[active_decision])

                seq_length = 0
                # next sequence length
                if active_category_idx == Category.SINGLE_LINE.value or \
                        active_category_idx == Category.DOUBLE_LINE.value or \
                        active_category_idx == Category.TRIPLE_LINE.value or \
                        active_category_idx == Category.THREE_ONE_LINE.value or \
                        active_category_idx == Category.THREE_TWO_LINE.value:
                    seq_length = take_action_from_prob(active_seq_prob, length_mask[active_decision][active_response]) + 1

                # give main cards
                intention = give_cards_without_minor(active_response, last_cards_value, active_category_idx, seq_length)

                # then give minor cards
                if active_category_idx == Category.THREE_ONE.value or \
                        active_category_idx == Category.THREE_TWO.value or \
                        active_category_idx == Category.THREE_ONE_LINE.value or \
                        active_category_idx == Category.THREE_TWO_LINE.value or \
                        active_category_idx == Category.FOUR_TWO.value:
                    dup_mask = np.ones([15])
                    if seq_length > 0:
                        for i in range(seq_length):
                            dup_mask[intention[0] - 3 + i] = 0
                    else:
                        dup_mask[intention[0] - 3] = 0
                    intention = np.concatenate([intention,
                                                to_value(inference_minor_cards60(role_id, active_category_idx, s.copy(),
                                                                                 curr_cards_char.copy(), seq_length,
                                                                                 dup_mask, to_char(intention)))])
            else:
                # print(to_char(last_cards_value), is_bomb, last_category_idx)
                decision_mask, response_mask, bomb_mask, _ = get_mask_alter(curr_cards_char, to_char(last_cards_value),
                                                                            last_category_idx)

                passive_decision_prob, passive_bomb_prob, passive_response_prob, _, _, _, _ = func(
                    [np.array([role_id]), s.reshape(1, -1), last_state.reshape(1, -1), np.zeros([s.shape[0]])])

                passive_decision = take_action_from_prob(passive_decision_prob, decision_mask)

                if passive_decision == 0:
                    intention = np.array([])
                elif passive_decision == 1:

                    passive_bomb = take_action_from_prob(passive_bomb_prob, bomb_mask)

                    # converting 0-based index to 3-based value
                    intention = np.array([passive_bomb + 3] * 4)

                elif passive_decision == 2:
                    intention = np.array([16, 17])
                elif passive_decision == 3:
                    passive_response = take_action_from_prob(passive_response_prob, response_mask)

                    intention = give_cards_without_minor(passive_response, last_cards_value, last_category_idx, None)
                    if last_category_idx == Category.THREE_ONE.value or \
                            last_category_idx == Category.THREE_TWO.value or \
                            last_category_idx == Category.THREE_ONE_LINE.value or \
                            last_category_idx == Category.THREE_TWO_LINE.value or \
                            last_category_idx == Category.FOUR_TWO.value:
                        dup_mask = np.ones([15])
                        seq_length = get_seq_length(last_category_idx, last_cards_value)
                        if seq_length:
                            for i in range(seq_length):
                                dup_mask[intention[0] - 3 + i] = 0
                        else:
                            dup_mask[intention[0] - 3] = 0
                        intention = np.concatenate([intention,
                                                    to_value(inference_minor_cards60(role_id, last_category_idx, s.copy(),
                                                                                     curr_cards_char.copy(), seq_length,
                                                                                     dup_mask, to_char(intention)))])
            # since step auto needs full last card group info, we do not explicitly feed card type
            r, _, _ = env.step_manual(intention)
            # print('lord gives', to_char(intention))
            assert (intention is not None)
        else:
            intention, r, _ = env.step_auto()
            # print('farmer gives', to_char(intention))
    # if r > 0:
    #     print('farmer wins')
    # else:
    #     print('lord wins')
    return int(r > 0)