示例#1
0
    def get_combinations(self, curr_cards_char, last_cards_char):
        if len(curr_cards_char) > 10:
            card_mask = Card.char2onehot60(curr_cards_char).astype(np.uint8)
            mask = augment_action_space_onehot60
            a = np.expand_dims(1 - card_mask, 0) * mask
            invalid_row_idx = set(np.where(a > 0)[0])
            if len(last_cards_char) == 0:
                invalid_row_idx.add(0)

            valid_row_idx = [i for i in range(len(augment_action_space)) if i not in invalid_row_idx]

            mask = mask[valid_row_idx, :]
            idx_mapping = dict(zip(range(mask.shape[0]), valid_row_idx))

            # augment mask
            # TODO: known issue: 555444666 will not decompose into 5554 and 66644
            combs = get_combinations_nosplit(mask, card_mask)
            combs = [([] if len(last_cards_char) == 0 else [0]) + [clamp_action_idx(idx_mapping[idx]) for idx in comb] for comb in combs]

            if len(last_cards_char) > 0:
                idx_must_be_contained = set(
                    [idx for idx in valid_row_idx if CardGroup.to_cardgroup(augment_action_space[idx]). \
                        bigger_than(CardGroup.to_cardgroup(last_cards_char))])
                combs = [comb for comb in combs if not idx_must_be_contained.isdisjoint(comb)]
                fine_mask = np.zeros([len(combs), self.num_actions[1]], dtype=np.bool)
                for i in range(len(combs)):
                    for j in range(len(combs[i])):
                        if combs[i][j] in idx_must_be_contained:
                            fine_mask[i][j] = True
            else:
                fine_mask = None
        else:
            mask = get_mask_onehot60(curr_cards_char, action_space, None).reshape(len(action_space), 15, 4).sum(-1).astype(
                np.uint8)
            valid = mask.sum(-1) > 0
            cards_target = Card.char2onehot60(curr_cards_char).reshape(-1, 4).sum(-1).astype(np.uint8)
            # do not feed empty to C++, which will cause infinite loop
            combs = get_combinations_recursive(mask[valid, :], cards_target)
            idx_mapping = dict(zip(range(valid.shape[0]), np.where(valid)[0]))

            combs = [([] if len(last_cards_char) == 0 else [0]) + [idx_mapping[idx] for idx in comb] for comb in combs]

            if len(last_cards_char) > 0:
                valid[0] = True
                idx_must_be_contained = set(
                    [idx for idx in range(len(action_space)) if valid[idx] and CardGroup.to_cardgroup(action_space[idx]). \
                        bigger_than(CardGroup.to_cardgroup(last_cards_char))])
                combs = [comb for comb in combs if not idx_must_be_contained.isdisjoint(comb)]
                fine_mask = np.zeros([len(combs), self.num_actions[1]], dtype=np.bool)
                for i in range(len(combs)):
                    for j in range(len(combs[i])):
                        if combs[i][j] in idx_must_be_contained:
                            fine_mask[i][j] = True
            else:
                fine_mask = None
        return combs, fine_mask
示例#2
0
def dancing_link():
    env = Pyenv()
    env.reset()
    env.prepare()
    # print(env.get_handcards())
    cards = env.get_handcards()
    cards = ['3', '3', '3', '4', '4', '4']
    import timeit
    begin = timeit.default_timer()
    card_mask = Card.char2onehot60(cards).astype(np.uint8)
    # mask = get_mask_onehot60(cards, action_space, None).astype(np.uint8)
    last_cards = ['3', '3']
    mask = augment_action_space_onehot60
    a = np.expand_dims(1 - card_mask, 0) * mask
    row_idx = set(np.where(a > 0)[0])

    # tmp = np.ones(len(augment_action_space))
    # tmp[row_idx] = 0
    # tmp[0] = 0
    # valid_row_idx = np.where(tmp > 0)[0]
    valid_row_idx = [
        i for i in range(1, len(augment_action_space)) if i not in row_idx
    ]
    idx_must_be_contained = set([idx for idx in valid_row_idx if CardGroup.to_cardgroup(augment_action_space[idx]).\
                    bigger_than(CardGroup.to_cardgroup(last_cards))])
    print(idx_must_be_contained)
    mask = mask[valid_row_idx, :]
    idx_mapping = dict(zip(range(mask.shape[0]), valid_row_idx))

    # augment mask
    # TODO: known issue: 555444666 will not decompose into 5554 and 66644

    combs = get_combinations_nosplit(
        mask,
        Card.char2onehot60(cards).astype(np.uint8))
    combs = [[clamp_action_idx(idx_mapping[idx]) for idx in comb]
             for comb in combs]
    combs = [
        comb for comb in combs if not idx_must_be_contained.isdisjoint(comb)
    ]
    fine_mask = np.zeros([len(combs), 21])
    for i in range(len(combs)):
        for j in range(len(combs[i])):
            if combs[i][j] in idx_must_be_contained:
                fine_mask[i][j] = 1
    print(fine_mask)
    end = timeit.default_timer()
    print(end - begin)

    print(len(combs))
    for comb in combs:
        for idx in comb:
            print(action_space[idx], end=', ')
        print()
示例#3
0
 def step(self, i, a, single_step=False):
     if a != 0:
         self.players[i].discard(self.action_space[a])
         self.last_player = self.players[i]
         assert self.players[i] is self.last_player
         self.last_cards = CardGroup.to_cardgroup(self.action_space[a])
         self.history += self.last_cards.cards
         self.log(i, self.last_cards.cards, False)
         if not self.players[i].cards:
             return 2 if self.players[i].is_lord else 1, True
     else:
         self.log(i, [], True)
     if not single_step:
         ai = 0
         for k in range(i + 1, i + 3):
             ai = k % 3
             if self.players[ai].trainable:
                 break
             if not self.players[ai].cards:
                 # TODO: add coordination rewards
                 return -1, True
             self.last_player, self.last_cards, passed = self.players[
                 ai].respond(self.last_player, self.last_cards,
                             self.players[(ai - 1) % 3],
                             self.players[(ai + 1) % 3])
             self.log(ai, self.last_cards.cards, passed)
             if not passed:
                 self.history += self.last_cards.cards
         self.next_turn = ai % 3
     else:
         self.next_turn = (self.next_turn + 1) % 3
     return 0, False
示例#4
0
    def step(self, intention):
        if not intention:
            self.controller = 1
            for a in action_space:
                if not a:
                    continue
                if counter_subset(a, self.oppo_cards):
                    self.last_cards = a

                    group = CardGroup.to_cardgroup(a)
                    for card in a:
                        self.oppo_cards.remove(card)
                        self.history[1].append(card)
                    if not self.oppo_cards:
                        return -1, True
                    return 0, False

        self.controller = 0
        self.last_cards = intention
        for card in intention:
            self.agent_cards.remove(card)
            self.history[0].append(card)
        if not self.agent_cards:
            return 1, True

        group_intention = CardGroup.to_cardgroup(intention)
        for a in action_space:
            if not a:
                continue
            if counter_subset(a, self.oppo_cards):
                group = CardGroup.to_cardgroup(a)
                if group.bigger_than(group_intention):
                    for card in a:
                        self.oppo_cards.remove(card)
                        self.history[1].append(card)
                    self.last_cards = a
                    self.controller = 1
                    break
        if not self.oppo_cards:
            return -1, True

        return 0, False
示例#5
0
 def reset(self):
     self.status = 0
     self.cnt = 0
     self.winner = None
     self.c_id = self.players[0].id
     self.cc = CardGroup(CType.Null)
     self.historyCards.clear()
     for i in range(3, 18):
         self.historyCards[i] = 0
     for player in self.players:
         player.reset()
示例#6
0
 def get_mask(self, i):
     mask = np.zeros_like(self.action_space)
     for j in range(mask.size):
         if counter_subset(self.action_space[j], self.players[i].cards):
             mask[j] = 1
     mask = mask.astype(bool)
     if self.last_player is not None:
         if self.last_player is not self.players[i]:
             for j in range(1, mask.size):
                 if mask[j] == 1 and not CardGroup.to_cardgroup(
                         self.action_space[j]).bigger_than(self.last_cards):
                     mask[j] = False
         elif self.last_player is self.players[i]:
             mask[0] = False
     else:
         mask[0] = False
     return mask
示例#7
0
def char2ccardgroup(chars):
    cg = CardGroup.to_cardgroup(chars)
    ccg = CCardGroup([CCard(to_value(c) - 3) for c in cg.cards], CCategory(cg.type), cg.value, cg.len)
    return ccg
示例#8
0
    def _populate_exp(self):
        """ populate a transition by epsilon-greedy"""
        old_s = self._current_ob
        comb_mask = self._comb_mask
        if not self._comb_mask and self._fine_mask is not None:
            fine_mask = self._fine_mask if self._fine_mask.shape[0] == max(self.num_actions[0], self.num_actions[1]) \
                else np.pad(self._fine_mask, (0, max(self.num_actions[0], self.num_actions[1]) - self._fine_mask.shape[0]), 'constant', constant_values=(0, 0))
        else:
            fine_mask = np.ones([max(self.num_actions[0], self.num_actions[1])], dtype=np.bool)
        last_cards_char = self.player.get_last_outcards()
        if self.rng.rand() <= self.exploration:
            if not self._comb_mask and self._fine_mask is not None:
                q_values = np.random.rand(self.num_actions[1])
                q_values[np.where(np.logical_not(self._fine_mask))[0]] = np.nan
                act = np.nanargmax(q_values)
                # print(q_values)
                # print(act)
            else:
                act = self.rng.choice(range(self.num_actions[0 if comb_mask else 1]))
        else:
            q_values = self.curr_predictor(old_s[None, :, :, :], np.array([comb_mask]), np.array([fine_mask]))[0][0]
            if not self._comb_mask and self._fine_mask is not None:
                q_values = q_values[:self.num_actions[1]]
                assert np.all(q_values[np.where(np.logical_not(self._fine_mask))[0]] < -100)
                q_values[np.where(np.logical_not(self._fine_mask))[0]] = np.nan
            act = np.nanargmax(q_values)
            assert act < self.num_actions[0 if comb_mask else 1]
            # print(q_values)
            # print(act)
            # clamp action to valid range
            act = min(act, self.num_actions[0 if comb_mask else 1] - 1)
        winner = -1
        reward = 0
        if comb_mask:
            isOver = False
        else:
            if len(last_cards_char) > 0:
                if act > 0:
                    if not CardGroup.to_cardgroup(self._action_space[act]).bigger_than(CardGroup.to_cardgroup(last_cards_char)):
                        print('warning, some error happened, ', self._action_space[act], last_cards_char)
                        raise Exception("card comparison error")
            winner, isOver = self.player.step(self._action_space[act])

        # step for AI farmers
        while not isOver and self.player.get_curr_agent_name() != self.agent_name:
            handcards = self.player.get_curr_handcards()
            last_two_cards = self.player.get_last_two_cards()
            prob_state = self.player.get_state_prob()
            action = self.predictors[self.player.get_curr_agent_name()].predict(handcards, last_two_cards, prob_state)
            winner, isOver = self.player.step(action)

        if isOver:
            if self.agent_name == winner:
                reward = 1
            else:
                if self.player.get_all_agent_names().index(winner) + self.player.get_all_agent_names().index(self.agent_name) == 3:
                    reward = 1
                else:
                    reward = -1
        self._current_game_score.feed(reward)

        if isOver:
            self._player_scores.feed(self._current_game_score.sum)
            self.player.reset()
            self.player.prepare()
            self._comb_mask = True
            self.prestart()
            self._current_game_score.reset()
        else:
            self._comb_mask = not self._comb_mask
        self._current_ob, self._action_space = self.get_state_and_action_spaces(act if not self._comb_mask else None)
        self.mem.append(Experience(old_s, act, reward, isOver, comb_mask, fine_mask))
示例#9
0
    def _populate_exp(self):
        """ populate a transition by epsilon-greedy"""
        old_s = self._current_ob
        comb_mask = self._comb_mask
        if not self._comb_mask and self._fine_mask is not None:
            fine_mask = self._fine_mask if self._fine_mask.shape[0] == max(self.num_actions[0], self.num_actions[1]) \
                else np.pad(self._fine_mask, (0, max(self.num_actions[0], self.num_actions[1]) - self._fine_mask.shape[0]), 'constant', constant_values=(0, 0))
        else:
            fine_mask = np.ones(
                [max(self.num_actions[0], self.num_actions[1])], dtype=np.bool)
        last_cards_value = self.player.get_last_outcards()
        if self.rng.rand() <= self.exploration:
            if not self._comb_mask and self._fine_mask is not None:
                q_values = np.random.rand(self.num_actions[1])
                q_values[np.where(np.logical_not(self._fine_mask))[0]] = np.nan
                act = np.nanargmax(q_values)
                # print(q_values)
                # print(act)
            else:
                act = self.rng.choice(
                    range(self.num_actions[0 if comb_mask else 1]))
        else:
            q_values = self.predictor(old_s[None, :, :, :],
                                      np.array([comb_mask]),
                                      np.array([fine_mask]))[0][0]
            if not self._comb_mask and self._fine_mask is not None:
                q_values = q_values[:self.num_actions[1]]
                assert np.all(q_values[np.where(np.logical_not(
                    self._fine_mask))[0]] < -100)
                q_values[np.where(np.logical_not(self._fine_mask))[0]] = np.nan
            act = np.nanargmax(q_values)
            assert act < self.num_actions[0 if comb_mask else 1]
            # print(q_values)
            # print(act)
            # clamp action to valid range
            act = min(act, self.num_actions[0 if comb_mask else 1] - 1)
        if comb_mask:
            reward = 0
            isOver = False
        else:
            if last_cards_value.size > 0:
                if act > 0:
                    if not CardGroup.to_cardgroup(
                            self._action_space[act]).bigger_than(
                                CardGroup.to_cardgroup(
                                    to_char(last_cards_value))):
                        print('warning, some error happened')
            # print(to_char(self.player.get_curr_handcards()))
            reward, isOver, _ = self.player.step_manual(
                to_value(self._action_space[act]))

            # print(self._action_space[act])

        # step for AI
        while not isOver and self.player.get_role_ID() != ROLE_ID_TO_TRAIN:
            _, reward, _ = self.player.step_auto()
            isOver = (reward != 0)
        # if landlord negate the reward
        if ROLE_ID_TO_TRAIN == 2:
            reward = -reward
        self._current_game_score.feed(reward)

        if isOver:
            # print('lord wins' if reward > 0 else 'farmer wins')
            self._player_scores.feed(self._current_game_score.sum)
            # print(self._current_game_score.sum)
            while True:
                self.player.reset()
                # init_cards = np.arange(36)
                # self.player.prepare_manual(init_cards)
                self.player.prepare()
                self._comb_mask = True
                early_stop = False
                while self.player.get_role_ID() != ROLE_ID_TO_TRAIN:
                    _, reward, _ = self.player.step_auto()
                    isOver = (reward != 0)
                    if isOver:
                        print('prestart ends too early! now resetting env')
                        early_stop = True
                        break
                if early_stop:
                    continue
                self._current_ob, self._action_space = self.get_state_and_action_spaces(
                )
                break
            self._current_game_score.reset()
        else:
            self._comb_mask = not self._comb_mask
        self._current_ob, self._action_space = self.get_state_and_action_spaces(
            act if not self._comb_mask else None)
        self.mem.append(
            Experience(old_s, act, reward, isOver, comb_mask, fine_mask))
示例#10
0
    def respond(self, last_player, cards, before_player, next_player):
        if self.is_human:
            print("your cards: ", end='')
            print(self.cards)
            intend = raw_input("enter your intention(0 for pass): ")
            intend = intend.strip()
            intend = intend.split(',')
            if intend[0] == '0':
                return last_player, cards, True
            else:
                if not counter_subset(intend, self.cards) or \
                        not CardGroup.isvalid(intend):
                    print("invalid intention, try again")
                    return self.respond(last_player, cards, before_player, next_player)
                else:
                    if last_player is not None and last_player != self:
                        if not (CardGroup.to_cardgroup(intend)).bigger_than(cards):
                            print('you must give bigger cards')
                            return self.respond(last_player, cards, before_player, next_player)
            self.discard(intend)
            return self, CardGroup.to_cardgroup(intend), False
        if self.need_analyze:
            self.candidates = CardGroup.analyze(self.cards)

        self.need_analyze = False
        if last_player is None or self is last_player:
            if CardGroup.folks(self.cards) == 2:
                self.discard(self.candidates[-1].cards)
                return self, self.candidates[-1], False
            elif not next_player.is_lord and len(next_player.cards) == 1:
                for group in self.candidates:
                    if group.type == 'single':
                        self.discard(group.cards)
                        return self, group, False
                self.discard(self.candidates[0].cards)
                return self, self.candidates[0], False
            elif next_player.is_lord and len(next_player.cards) == 1:
                for group in self.candidates:
                    if group.type != 'single':
                        self.discard(group.cards)
                        return self, group, False
                self.discard(self.candidates[-1].cards)
                return self, self.candidates[-1], False
            else:
                for group in self.candidates:
                    if group.type != 'single' or Card.to_value(group.cards[0]) < Card.to_value('A'):
                        self.discard(group.cards)
                        return self, group, False
                self.discard(self.candidates[0].cards)
                return self, self.candidates[0], False
            # print "player %s cards:" % self.name
            # print self.cards
            # print "player %s respond:" % self.name
            # print self.candidates[0].cards
            # self.discard(self.candidates[0].cards)
            # return self.name, self.candidates[0]
        elif not last_player.is_lord:
            if CardGroup.folks(self.cards) <= 2:
                for c in self.candidates:
                    if c.bigger_than(cards):
                        self.discard(c.cards)
                        return self, c, False
                return last_player, cards, True
            elif before_player.is_lord and last_player is not before_player:
                return last_player, cards, True
            else:
                for c in self.candidates:
                    if c.bigger_than(cards) and cards.type not in ['bomb', 'bigbang'] \
                            and Card.to_value(c.cards[0]) < Card.to_value('A'):
                        self.discard(c.cards)
                        return self, c, False
                return last_player, cards, True
        else:
            for c in self.candidates:
                if c.bigger_than(cards) and c.type not in ['bomb', 'bigbang']:
                    self.discard(c.cards)
                    return self, c, False
            # use bomb
            for c in self.candidates:
                if c.bigger_than(cards):
                    self.discard(c.cards)
                    return self, c, False
            return last_player, cards, True