Exemplo n.º 1
0
    def set_flags(self, flag, cond):
        dotype = 'true' if cond else 'false'
        todo_list = flag.get(dotype, None)
        if todo_list is None:
            return

        for todo in todo_list:
            name = todo['name']
            val = todo['value']
            operator = todo['op']
            op = operator.lower()
            if not name:
                pass
            elif op in ('set', '='):
                self.rule_flags[name] = val
            elif op in ('decr', '-='):
                oldval = to_value(self.rule_flags[name])
                newval = to_value(val)
                self.rule_flags[name] = str(oldval - newval)
            elif op in ('incr', '+='):
                oldval = to_value(self.rule_flags[name])
                newval = to_value(val)
                self.rule_flags[name] = str(oldval + newval)
            else:
                raise Exception(f"Unknown flag operator: '{op}'")
Exemplo n.º 2
0
    def intention(self, env):
        def char2ccardgroup(chars):
            cg = CardGroup.to_cardgroup(chars)
            ccg = CCardGroup([CCard(to_value(c) - 3) for c in cg.cards], CCategory(cg.type), cg.value, cg.len)
            return ccg

        def ccardgroup2char(cg):
            return [to_char(int(c) + 3) for c in cg.cards]
        handcards_char = env.get_curr_handcards()
        chandcards = [CCard(to_value(c) - 3) for c in handcards_char]
        player_idx = env.get_current_idx()
        unseen_cards = env.player_cards[env.agent_names[(player_idx + 1) % 3]] + env.player_cards[env.agent_names[(player_idx + 2) % 3]]
        cunseen_cards = [CCard(to_value(c) - 3) for c in unseen_cards]

        # print(env.player_cards)
        next_handcards_cnt = len(env.player_cards[env.agent_names[(player_idx + 1) % 3]])

        last_cg = char2ccardgroup(env.get_last_outcards())

        # print(handcards_char, env.get_last_outcards(), next_handcards_cnt, env.curr_player, env.controller, env.lord)
        caction = mcsearch(chandcards, cunseen_cards, next_handcards_cnt, last_cg,
                           (env.agent_names.index(env.curr_player) - env.agent_names.index(env.lord) + 3) % 3,
                           (env.agent_names.index(env.controller) - env.agent_names.index(env.lord) + 3) % 3, 10, 50, 500)
        intention = ccardgroup2char(caction)
        return intention
Exemplo n.º 3
0
    def step_auto(self):
        def char2ccardgroup(chars):
            cg = CardGroup.to_cardgroup(chars)
            ccg = CCardGroup([CCard(to_value(c) - 3) for c in cg.cards],
                             CCategory(cg.type), cg.value, cg.len)
            return ccg

        def ccardgroup2char(cg):
            return [to_char(int(c) + 3) for c in cg.cards]

        handcards_char = self.get_curr_handcards()
        chandcards = [CCard(to_value(c) - 3) for c in handcards_char]
        player_idx = self.get_current_idx()
        unseen_cards = self.player_cards[self.agent_names[
            (player_idx + 1) %
            3]] + self.player_cards[self.agent_names[(player_idx + 2) % 3]]
        cunseen_cards = [CCard(to_value(c) - 3) for c in unseen_cards]

        next_handcards_cnt = len(
            self.player_cards[self.agent_names[(player_idx + 1) % 3]])

        last_cg = char2ccardgroup(self.get_last_outcards())
        caction = mcsearch(chandcards, cunseen_cards, next_handcards_cnt,
                           last_cg,
                           (self.agent_names.index(self.curr_player) -
                            self.agent_names.index(self.lord) + 3) % 3,
                           (self.agent_names.index(self.controller) -
                            self.agent_names.index(self.lord) + 3) % 3, 10, 50,
                           500)
        intention = ccardgroup2char(caction)
        return self.step(intention)
Exemplo n.º 4
0
 def step(self, intention):
     # print(intention)
     idx = self.get_current_idx()
     r, done, category = self.step_manual(to_value(intention))
     if category > 0:
         self.controller = self.agent_names[idx]
     # print(self.agent_names[idx], 'gives', intention, self.controller)
     return r, done
Exemplo n.º 5
0
    def evaluate_flag(self, flag):
        try:
            name = flag['name']
            uv = flag['value']
            operator = flag['condition']
        except KeyError as error:
            raise Exception(f"Missing key in flagCheck: '{error}'")

        op = operator.lower()
        ev = self.rule_flags.get(name, "")
        if not name:
            result = True
        elif op in ('equals', '=='):
            result = ev == uv
        elif op in ('notequals', '!='):
            result = ev != uv
        elif op in ('contains', '@'):
            result = uv in ev
        elif op in ('notcontains', '!@'):
            result = uv not in ev
        elif op in ('search', '~'):
            result = re.search(uv, ev) is not None
        elif op in ('notsearch', '!~'):
            result = re.search(uv, ev) is None
        elif op in ('lessthan', '<'):
            result = to_value(ev) < to_value(uv)
        elif op in ('lessthanequals', '<='):
            result = to_value(ev) <= to_value(uv)
        elif op in ('greaterthan', '>'):
            result = to_value(ev) > to_value(uv)
        elif op in ('greaterthanequals', '>='):
            result = to_value(ev) >= to_value(uv)
        else:
            raise SyntaxError(f"Unknown flag condition operator: '{operator}'")

        if 'and' in flag and 'or' in flag:
            raise SyntaxError(
                f"flag only allows either 'and' or 'or', not both")
        elif 'and' in flag:
            result2 = self.evaluate_flag(flag['and'])
            result = result and result2
        elif 'or' in flag:
            result2 = self.evaluate_flag(flag['or'])
            result = result or result2

        self.set_flags(flag, result)
        return result
Exemplo n.º 6
0
    def check_criteria(self, action):
        try:
            criterion = action['addon']
            xpath = dict_gets(criterion, ('xpath', 'elementFinder'))
            if not xpath:
                return True

            uv = criterion['value']
            operator = criterion['condition']
        except KeyError as error:
            raise Exception(f"Missing key in addon: '{error}'")

        try:
            elem = self.driver.find_element_by_xpath(xpath)
            if elem.tag_name == 'input':
                ev = elem.get_attribute('value')
            elif elem.tag_name == 'label':
                ev = elem.text
            else:
                ev = elem.text
            op = operator.lower()
            if op in ('equals', '=='):
                result = ev == uv
            elif op in ('notequals', '!='):
                result = ev != uv
            elif op in ('contains', '@'):
                result = uv in ev
            elif op in ('notcontains', '!@'):
                result = uv not in ev
            elif op in ('search', '~'):
                result = re.search(uv, ev) is not None
            elif op in ('notsearch', '!~'):
                result = re.search(uv, ev) is None
            elif op in ('lessthan', '<'):
                result = to_value(ev) < to_value(uv)
            elif op in ('lessthanequals', '<='):
                result = to_value(ev) <= to_value(uv)
            elif op in ('greaterthan', '>'):
                result = to_value(ev) > to_value(uv)
            elif op in ('greaterthanequals', '>='):
                result = to_value(ev) >= to_value(uv)
            else:
                raise Exception(f"Unknown condition operator: '{operator}'")
        except NoSuchElementException:
            result = False
        return result
Exemplo n.º 7
0
    def _populate_exp(self):
        """ populate a transition by epsilon-greedy"""
        old_s = self._current_ob
        if self.rng.rand() <= self.exploration:
            act = self.rng.choice(range(self.num_actions))
        else:
            mask = get_mask(to_char(self.player.get_curr_handcards()),
                            action_space,
                            to_char(self.player.get_last_outcards()))
            q_values = self.predictor(old_s[None, ...])[0][0]
            q_values[mask == 0] = np.nan
            act = np.nanargmax(q_values)
            assert act < self.num_actions
        reward, isOver, _ = self.player.step_manual(to_value(
            action_space[act]))

        # step for AI
        while not isOver and self.player.get_role_ID() != ROLE_ID_TO_TRAIN:
            _, reward, _ = self.player.step_auto()
            isOver = (reward != 0)
        if ROLE_ID_TO_TRAIN == 2:
            reward = -reward
        self._current_game_score.feed(reward)

        if isOver:
            # print('lord wins' if reward > 0 else 'farmer wins')
            self._player_scores.feed(self._current_game_score.sum)
            # print(self._current_game_score.sum)
            while True:
                self.player.reset()
                # init_cards = np.arange(36)
                # self.player.prepare_manual(init_cards)
                self.player.prepare()
                early_stop = False
                while self.player.get_role_ID() != ROLE_ID_TO_TRAIN:
                    _, reward, _ = self.player.step_auto()
                    isOver = (reward != 0)
                    if isOver:
                        print('prestart ends too early! now resetting env')
                        early_stop = True
                        break
                if early_stop:
                    continue
                self._current_ob = self.get_state()
                break
            self._current_game_score.reset()
        self._current_ob = self.get_state()
        self.mem.append(Experience(old_s, act, reward, isOver))
Exemplo n.º 8
0
def play_one_episode(env, func):

    env.reset()
    env.prepare()
    r = 0
    while r == 0:
        role_id = env.get_role_ID()
        if role_id == ROLE_ID_TO_TRAIN:
            s = get_state(env)
            mask = get_mask(to_char(env.get_curr_handcards()), action_space,
                            to_char(env.get_last_outcards()))
            q_values = func(s[None, ...])[0][0]
            q_values[mask == 0] = np.nan
            act = np.nanargmax(q_values)
            intention = to_value(action_space[act])
            r, _, _ = env.step_manual(intention)
        else:
            intention, r, _ = env.step_auto()
    return int(r > 0)
Exemplo n.º 9
0
def play_one_episode(env, func):
    env.reset()
    env.prepare()
    r = 0
    while r == 0:
        role_id = env.get_role_ID()
        if role_id == ROLE_ID_TO_TRAIN:
            handcards = to_char(env.get_curr_handcards())
            last_two_cards = env.get_last_two_cards()
            last_two_cards = [to_char(cards) for cards in last_two_cards]
            prob_state = env.get_state_prob()
            # print(agent, handcards)

            action = func.predict(handcards, last_two_cards, prob_state)
            # print(agent, ' gives ', action)
            intention = to_value(action)
            r, _, _ = env.step_manual(intention)
        else:
            intention, r, _ = env.step_auto()
    return int(r > 0)
Exemplo n.º 10
0
def ai_play():
    data = request.json
    print(data)
    pos = int(data['current_player'])

    player_cards = data['player_cards']
    my_cards = trans_cards(player_cards.split("|")[pos])
    last_move = trans_cards(data['last_move'])
    if int(data['last_player']) == int(data['current_player']):
        last_move = []
    else:
        last_move = trans_cards(data['last_move'])

    intention = to_char(
        CEnv.step_auto_static(Card.char2color(my_cards), to_value(last_move)))

    res = trans_cards_reverse(intention)
    if res == "":
        res = 'P'
    print("result is {}".format(res))
    return jsonify({'move': res})
Exemplo n.º 11
0
def play_one_episode(env, func, role_id):

    env.reset()
    env.prepare()
    r = 0
    while r == 0:
        if env.get_role_ID() == role_id:
            handcards = to_char(env.get_curr_handcards())
            last_two_cards = env.get_last_two_cards()
            last_two_cards = [to_char(cards) for cards in last_two_cards]
            prob_state = env.get_state_prob()
            # print(agent, handcards)

            action = func.predict(handcards, last_two_cards, prob_state)
            # print(agent, ' gives ', action)
            intention = to_value(action)
            r, _, _ = env.step_manual(intention)
            # print('lord gives', to_char(intention), file=f)
            assert (intention is not None)
        else:
            intention, r, _ = env.step_auto()

    return int(r > 0)
Exemplo n.º 12
0
    def _populate_exp(self):
        """ populate a transition by epsilon-greedy"""
        old_s = self._current_ob
        comb_mask = self._comb_mask
        if not self._comb_mask and self._fine_mask is not None:
            fine_mask = self._fine_mask if self._fine_mask.shape[0] == max(self.num_actions[0], self.num_actions[1]) \
                else np.pad(self._fine_mask, (0, max(self.num_actions[0], self.num_actions[1]) - self._fine_mask.shape[0]), 'constant', constant_values=(0, 0))
        else:
            fine_mask = np.ones(
                [max(self.num_actions[0], self.num_actions[1])], dtype=np.bool)
        last_cards_value = self.player.get_last_outcards()
        if self.rng.rand() <= self.exploration:
            if not self._comb_mask and self._fine_mask is not None:
                q_values = np.random.rand(self.num_actions[1])
                q_values[np.where(np.logical_not(self._fine_mask))[0]] = np.nan
                act = np.nanargmax(q_values)
                # print(q_values)
                # print(act)
            else:
                act = self.rng.choice(
                    range(self.num_actions[0 if comb_mask else 1]))
        else:
            q_values = self.predictor(old_s[None, :, :, :],
                                      np.array([comb_mask]),
                                      np.array([fine_mask]))[0][0]
            if not self._comb_mask and self._fine_mask is not None:
                q_values = q_values[:self.num_actions[1]]
                assert np.all(q_values[np.where(np.logical_not(
                    self._fine_mask))[0]] < -100)
                q_values[np.where(np.logical_not(self._fine_mask))[0]] = np.nan
            act = np.nanargmax(q_values)
            assert act < self.num_actions[0 if comb_mask else 1]
            # print(q_values)
            # print(act)
            # clamp action to valid range
            act = min(act, self.num_actions[0 if comb_mask else 1] - 1)
        if comb_mask:
            reward = 0
            isOver = False
        else:
            if last_cards_value.size > 0:
                if act > 0:
                    if not CardGroup.to_cardgroup(
                            self._action_space[act]).bigger_than(
                                CardGroup.to_cardgroup(
                                    to_char(last_cards_value))):
                        print('warning, some error happened')
            # print(to_char(self.player.get_curr_handcards()))
            reward, isOver, _ = self.player.step_manual(
                to_value(self._action_space[act]))

            # print(self._action_space[act])

        # step for AI
        while not isOver and self.player.get_role_ID() != ROLE_ID_TO_TRAIN:
            _, reward, _ = self.player.step_auto()
            isOver = (reward != 0)
        # if landlord negate the reward
        if ROLE_ID_TO_TRAIN == 2:
            reward = -reward
        self._current_game_score.feed(reward)

        if isOver:
            # print('lord wins' if reward > 0 else 'farmer wins')
            self._player_scores.feed(self._current_game_score.sum)
            # print(self._current_game_score.sum)
            while True:
                self.player.reset()
                # init_cards = np.arange(36)
                # self.player.prepare_manual(init_cards)
                self.player.prepare()
                self._comb_mask = True
                early_stop = False
                while self.player.get_role_ID() != ROLE_ID_TO_TRAIN:
                    _, reward, _ = self.player.step_auto()
                    isOver = (reward != 0)
                    if isOver:
                        print('prestart ends too early! now resetting env')
                        early_stop = True
                        break
                if early_stop:
                    continue
                self._current_ob, self._action_space = self.get_state_and_action_spaces(
                )
                break
            self._current_game_score.reset()
        else:
            self._comb_mask = not self._comb_mask
        self._current_ob, self._action_space = self.get_state_and_action_spaces(
            act if not self._comb_mask else None)
        self.mem.append(
            Experience(old_s, act, reward, isOver, comb_mask, fine_mask))
Exemplo n.º 13
0
def play_one_episode(env, func):
    def take_action_from_prob(prob, mask):
        prob = prob[0]
        # to avoid numeric difficulty
        prob[mask == 0] = -1
        return np.argmax(prob)

    # return char minor cards output
    def inference_minor_util60(role_id, handcards, num, is_pair, dup_mask, main_cards_char):
        for main_card in main_cards_char:
            handcards.remove(main_card)

        s = get_mask(handcards, action_space, None).astype(np.float32)
        outputs = []
        minor_type = 1 if is_pair else 0
        for i in range(num):
            input_single, input_pair, _, _ = get_masks(handcards, None)
            _, _, _, _, _, _, minor_response_prob = func(
                [np.array([role_id]), s.reshape(1, -1), np.zeros([1, 9085]), np.array([minor_type])]
            )

            # give minor cards
            mask = None
            if is_pair:
                mask = np.concatenate([input_pair, [0, 0]]) * dup_mask
            else:
                mask = input_single * dup_mask

            minor_response = take_action_from_prob(minor_response_prob, mask)
            dup_mask[minor_response] = 0

            # convert network output to char cards
            handcards.remove(to_char(minor_response + 3))
            if is_pair:
                handcards.remove(to_char(minor_response + 3))
            s = get_mask(handcards, action_space, None).astype(np.float32)

            # save to output
            outputs.append(to_char(minor_response + 3))
            if is_pair:
                outputs.append(to_char(minor_response + 3))
        return outputs

    def inference_minor_cards60(role_id, category, s, handcards, seq_length, dup_mask, main_cards_char):
        if category == Category.THREE_ONE.value:
            return inference_minor_util60(role_id, handcards, 1, False, dup_mask, main_cards_char)
        if category == Category.THREE_TWO.value:
            return inference_minor_util60(role_id, handcards, 1, True, dup_mask, main_cards_char)
        if category == Category.THREE_ONE_LINE.value:
            return inference_minor_util60(role_id, handcards, seq_length, False, dup_mask, main_cards_char)
        if category == Category.THREE_TWO_LINE.value:
            return inference_minor_util60(role_id, handcards, seq_length, True, dup_mask, main_cards_char)
        if category == Category.FOUR_TWO.value:
            return inference_minor_util60(role_id, handcards, 2, False, dup_mask, main_cards_char)

    env.reset()
    init_cards = np.arange(21)
    # init_cards = np.append(init_cards[::4], init_cards[1::4])
    env.prepare_manual(init_cards)
    r = 0
    while r == 0:
        last_cards_value = env.get_last_outcards()
        last_cards_char = to_char(last_cards_value)
        last_out_cards = Card.val2onehot60(last_cards_value)
        last_category_idx = env.get_last_outcategory_idx()
        curr_cards_char = to_char(env.get_curr_handcards())
        is_active = True if last_cards_value.size == 0 else False

        s = get_mask(curr_cards_char, action_space, None if is_active else last_cards_char).astype(np.float32)
        last_state = get_mask(last_cards_char, action_space, None).astype(np.float32)
        # print(s.shape)

        role_id = env.get_role_ID()
        # print('%s current cards' % ('lord' if role_id == 2 else 'farmer'), curr_cards_char)

        intention = None
        if role_id == 2:
            if is_active:

                # first get mask
                decision_mask, response_mask, _, length_mask = get_mask_alter(curr_cards_char, [], last_category_idx)

                _, _, _, active_decision_prob, active_response_prob, active_seq_prob, _ = func(
                    [np.array([role_id]), s.reshape(1, -1), np.zeros([1, 9085]), np.zeros([s.shape[0]])]
                )

                # make decision depending on output
                active_decision = take_action_from_prob(active_decision_prob, decision_mask)

                active_category_idx = active_decision + 1

                # get response
                active_response = take_action_from_prob(active_response_prob, response_mask[active_decision])

                seq_length = 0
                # next sequence length
                if active_category_idx == Category.SINGLE_LINE.value or \
                        active_category_idx == Category.DOUBLE_LINE.value or \
                        active_category_idx == Category.TRIPLE_LINE.value or \
                        active_category_idx == Category.THREE_ONE_LINE.value or \
                        active_category_idx == Category.THREE_TWO_LINE.value:
                    seq_length = take_action_from_prob(active_seq_prob, length_mask[active_decision][active_response]) + 1

                # give main cards
                intention = give_cards_without_minor(active_response, last_cards_value, active_category_idx, seq_length)

                # then give minor cards
                if active_category_idx == Category.THREE_ONE.value or \
                        active_category_idx == Category.THREE_TWO.value or \
                        active_category_idx == Category.THREE_ONE_LINE.value or \
                        active_category_idx == Category.THREE_TWO_LINE.value or \
                        active_category_idx == Category.FOUR_TWO.value:
                    dup_mask = np.ones([15])
                    if seq_length > 0:
                        for i in range(seq_length):
                            dup_mask[intention[0] - 3 + i] = 0
                    else:
                        dup_mask[intention[0] - 3] = 0
                    intention = np.concatenate([intention,
                                                to_value(inference_minor_cards60(role_id, active_category_idx, s.copy(),
                                                                                 curr_cards_char.copy(), seq_length,
                                                                                 dup_mask, to_char(intention)))])
            else:
                # print(to_char(last_cards_value), is_bomb, last_category_idx)
                decision_mask, response_mask, bomb_mask, _ = get_mask_alter(curr_cards_char, to_char(last_cards_value),
                                                                            last_category_idx)

                passive_decision_prob, passive_bomb_prob, passive_response_prob, _, _, _, _ = func(
                    [np.array([role_id]), s.reshape(1, -1), last_state.reshape(1, -1), np.zeros([s.shape[0]])])

                passive_decision = take_action_from_prob(passive_decision_prob, decision_mask)

                if passive_decision == 0:
                    intention = np.array([])
                elif passive_decision == 1:

                    passive_bomb = take_action_from_prob(passive_bomb_prob, bomb_mask)

                    # converting 0-based index to 3-based value
                    intention = np.array([passive_bomb + 3] * 4)

                elif passive_decision == 2:
                    intention = np.array([16, 17])
                elif passive_decision == 3:
                    passive_response = take_action_from_prob(passive_response_prob, response_mask)

                    intention = give_cards_without_minor(passive_response, last_cards_value, last_category_idx, None)
                    if last_category_idx == Category.THREE_ONE.value or \
                            last_category_idx == Category.THREE_TWO.value or \
                            last_category_idx == Category.THREE_ONE_LINE.value or \
                            last_category_idx == Category.THREE_TWO_LINE.value or \
                            last_category_idx == Category.FOUR_TWO.value:
                        dup_mask = np.ones([15])
                        seq_length = get_seq_length(last_category_idx, last_cards_value)
                        if seq_length:
                            for i in range(seq_length):
                                dup_mask[intention[0] - 3 + i] = 0
                        else:
                            dup_mask[intention[0] - 3] = 0
                        intention = np.concatenate([intention,
                                                    to_value(inference_minor_cards60(role_id, last_category_idx, s.copy(),
                                                                                     curr_cards_char.copy(), seq_length,
                                                                                     dup_mask, to_char(intention)))])
            # since step auto needs full last card group info, we do not explicitly feed card type
            r, _, _ = env.step_manual(intention)
            # print('lord gives', to_char(intention))
            assert (intention is not None)
        else:
            intention, r, _ = env.step_auto()
            # print('farmer gives', to_char(intention))
    # if r > 0:
    #     print('farmer wins')
    # else:
    #     print('lord wins')
    return int(r > 0)
Exemplo n.º 14
0
    def auto_shot_poker(self):
        def pokers_to_char(cards):
            cards = rule._to_cards(cards)
            for i, card in enumerate(cards):
                if card == 'w':
                    cards[i] = '*'
                elif card == 'W':
                    cards[i] = '$'
                elif card == '0':
                    cards[i] = '10'
            return cards

        def char_to_pokers(cards):
            cards = cards.copy()
            for i, card in enumerate(cards):
                if card == '*':
                    cards[i] = 'w'
                elif card == '$':
                    cards[i] = 'W'
                elif card == '10':
                    cards[i] = '0'
            return rule._to_pokers(self.hand_pokers, cards)

        def char2ccardgroup(chars):
            cg = CardGroup.to_cardgroup(chars)
            ccg = CCardGroup([CCard(to_value(c) - 3) for c in cg.cards],
                             CCategory(cg.type), cg.value, cg.len)
            return ccg

        def ccardgroup2char(cg):
            return [to_char(int(c) + 3) for c in cg.cards]

        handcards_char = pokers_to_char(self.hand_pokers)
        total_cards_cnt = sum(
            [len(self.table.players[i].hand_pokers) for i in range(3)])
        if total_cards_cnt <= AiPlayer.MCT_THRESH:
            chandcards = [CCard(to_value(c) - 3) for c in handcards_char]
            unseen_cards = pokers_to_char(
                self.table.players[(self.table.whose_turn + 1) %
                                   3].hand_pokers +
                self.table.players[(self.table.whose_turn + 2) %
                                   3].hand_pokers)
            cunseen_cards = [CCard(to_value(c) - 3) for c in unseen_cards]
            next_handcards_cnt = len(
                self.table.players[(self.table.whose_turn + 1) %
                                   3].hand_pokers)

            last_shot_poker = self.table.last_shot_poker if self.table.whose_turn != self.table.controller else []
            last_cg = char2ccardgroup(pokers_to_char(last_shot_poker))
            if not self.table.controller:
                self.table.controller = self.table.whose_turn
            caction = mcsearch(
                chandcards, cunseen_cards, next_handcards_cnt, last_cg,
                (self.table.whose_turn - self.table.lord_turn + 3) % 3,
                (self.table.controller - self.table.lord_turn + 3) % 3, 10, 50,
                500)
            intention = ccardgroup2char(caction)
        else:
            last_two_cards = self.table.get_last_two_cards()
            last_two_cards = [pokers_to_char(c) for c in last_two_cards]
            # # last_cards_char = ['10', 'J', 'Q', 'K', 'A']
            # # print(handcards_char)
            # # print(last_cards_char)
            # if self.table.last_shot_seat == self.seat:
            #     last_cards_char = []

            total_cards = np.ones([60])
            total_cards[53:56] = 0
            total_cards[57:60] = 0
            remain_cards = total_cards - Card.char2onehot60(
                handcards_char +
                pokers_to_char(self.table.history[self.seat] +
                               self.table.history[(self.seat + 1) % 3] +
                               self.table.history[(self.seat + 2) % 3]))
            next_cnt = len(self.table.players[(self.seat + 1) % 3].hand_pokers)
            next_next_cnt = len(self.table.players[(self.seat + 2) %
                                                   3].hand_pokers)
            next_state = remain_cards * (next_cnt / (next_cnt + next_next_cnt))
            next_next_state = remain_cards * (next_next_cnt /
                                              (next_cnt + next_next_cnt))
            prob_state = np.concatenate([next_state, next_next_state])
            assert np.all(prob_state < 1.) and np.all(prob_state >= 0.)
            # print(self.table.last_shot_poker)
            # print(self.hand_pokers)
            # print(self.table.players[self.seat].hand_pokers)
            intention, combs, groups = self.predictor.predict(
                handcards_char, last_two_cards, prob_state)
            # print(intention)

            top_k = 5
            top_combs = combs[:top_k]
            a, q = zip(*top_combs)
            for comb in a:
                test = []
                for i, c in enumerate(comb):
                    test += c
                    if collections.Counter(test) == collections.Counter(
                            handcards_char):
                        del comb[i + 1:]
                        break
            top_combs = list(
                zip([[char_to_pokers(c) for c in comb] for comb in a], q))
            # print(top_combs)
            top_groups = groups[:top_k]
            a, q = zip(*top_groups)
            top_groups = list(zip([char_to_pokers(g) for g in a], q))
            # print(top_groups)

            # if not self.table.last_shot_poker or self.table.last_shot_seat == self.seat:
            #     pokers.append(self.hand_pokers[0])
            # else:
            #     pokers = rule.cards_above(self.hand_pokers, self.table.last_shot_poker)

            packet_comb = [Pt.REQ_Q_COMB, top_combs]
            packet_fine = [Pt.REQ_Q_FINE, top_groups]
            IOLoop.current().call_later(1, self.to_server, packet_comb)
            IOLoop.current().call_later(2, self.to_server, packet_fine)

        pokers = char_to_pokers(intention)
        packet = [Pt.REQ_SHOT_POKER, pokers]
        # IOLoop.current().add_callback(self.to_server, packet)
        IOLoop.current().call_later(2, self.to_server, packet)
Exemplo n.º 15
0
 def char2ccardgroup(chars):
     cg = CardGroup.to_cardgroup(chars)
     ccg = CCardGroup([CCard(to_value(c) - 3) for c in cg.cards],
                      CCategory(cg.type), cg.value, cg.len)
     return ccg
Exemplo n.º 16
0
    def run(self):
        player = self._build_player()
        context = zmq.Context()
        c2s_socket = context.socket(zmq.PUSH)
        c2s_socket.setsockopt(zmq.IDENTITY, self.identity)
        c2s_socket.set_hwm(10)
        c2s_socket.connect(self.c2s)

        s2c_socket = context.socket(zmq.DEALER)
        s2c_socket.setsockopt(zmq.IDENTITY, self.identity)
        s2c_socket.connect(self.s2c)

        player.reset()
        # init_cards = np.arange(52)
        # init_cards = np.append(init_cards[::4], init_cards[1::4])
        # player.prepare_manual(init_cards)
        player.prepare()
        r, is_over = 0, False
        lstm_state = np.zeros([1024 * 2])
        while True:
            role_id = player.get_role_ID()
            if role_id in ROLE_IDS_TO_TRAIN:
                prob_state, all_state, curr_handcards_value, last_cards_value, last_category = \
                    player.get_state_prob(), player.get_state_all_cards(), player.get_curr_handcards(), player.get_last_outcards(), player.get_last_outcategory_idx()
                prob_state = np.concatenate(
                    [Card.val2onehot60(curr_handcards_value), prob_state])
                # after taking the last action, get to this state and get this reward/isOver.
                # If isOver, get to the next-episode state immediately.
                # This tuple is not the same as the one put into the memory buffer

                is_active = False if last_cards_value.size > 0 else True
                mask = get_mask(
                    to_char(curr_handcards_value), action_space,
                    None if is_active else to_char(last_cards_value))
                if is_active:
                    mask[0] = 0
                last_two_cards = player.get_last_two_cards()
                last_two_cards_onehot = np.concatenate([
                    Card.val2onehot60(last_two_cards[0]),
                    Card.val2onehot60(last_two_cards[1])
                ])
                c2s_socket.send(dumps(
                    (self.identity, role_id, prob_state, all_state,
                     last_two_cards_onehot, mask, 0 if is_active else 1,
                     lstm_state, r, is_over)),
                                copy=False)
                action_idx, lstm_state = loads(
                    s2c_socket.recv(copy=False).bytes)

                r, is_over, _ = player.step_manual(
                    to_value(action_space[action_idx]))
            else:
                _, r, _ = player.step_auto()
                is_over = (r != 0)
            if is_over:
                # print('{} over with reward {}'.format(self.identity, r))
                # logger.info('{} over with reward {}'.format(self.identity, r))
                # sys.stdout.flush()
                player.reset()
                player.prepare()
                lstm_state = np.zeros([1024 * 2])
Exemplo n.º 17
0
    # python env usage
    env = Env(['1', '2', '3'])
    agent_names = ['1', '2', '3']

    for _ in range(1):
        env.reset()
        env.prepare()
        done = False
        while not done:
            print('here')
            handcards = env.get_curr_handcards()

            env.get_state_prob()
            t = time.perf_counter()
            chandcards = [CCard(to_value(c) - 3) for c in handcards]
            unseen_cards = env.player_cards[agent_names[(env.get_current_idx() + 1) % len(env.agent_names)]].copy() \
                            + env.player_cards[agent_names[(env.get_current_idx() + 2) % len(env.agent_names)]].copy()
            print('here')
            cunseen_cards = [CCard(to_value(c) - 3) for c in unseen_cards]
            print('here')
            next_handcards_cnt = len(env.player_cards[agent_names[(env.get_current_idx() + 1) % len(env.agent_names)]])

            last_cg = char2ccardgroup(env.get_last_outcards())
            caction = mcsearch(chandcards, cunseen_cards, next_handcards_cnt, last_cg, env.agent_names.index(env.curr_player), env.agent_names.index(env.controller))

            action = ccardgroup2char(caction)
            print(action)
            winner, done = env.step(action)
            if done:
                for agent_name in agent_names:
Exemplo n.º 18
0
 def intention(self, env):
     intention = to_char(
         CEnv.step_auto_static(Card.char2color(env.get_curr_handcards()),
                               to_value(env.get_last_outcards())))
     return intention
Exemplo n.º 19
0
 def step(self, intention):
     # print(intention)
     r, done, _ = self.step_manual(to_value(intention))
     return r, done
Exemplo n.º 20
0
def play_one_episode(env, func):
    def take_action_from_prob(prob, mask):
        prob = prob[0]
        # to avoid numeric difficulty
        prob[mask == 0] = -1
        return np.argmax(prob)

    env.reset()
    # init_cards = np.arange(52)
    # init_cards = np.append(init_cards[::4], init_cards[1::4])
    # env.prepare_manual(init_cards)
    env.prepare()
    r = 0
    lstm_state = np.zeros([1024 * 2])
    while r == 0:
        last_cards_value = env.get_last_outcards()
        last_cards_char = to_char(last_cards_value)
        last_two_cards = env.get_last_two_cards()
        last_two_cards_onehot = np.concatenate([
            Card.val2onehot60(last_two_cards[0]),
            Card.val2onehot60(last_two_cards[1])
        ])
        curr_cards_char = to_char(env.get_curr_handcards())
        is_active = True if last_cards_value.size == 0 else False

        s = env.get_state_prob()
        s = np.concatenate([Card.char2onehot60(curr_cards_char), s])
        # print(s.shape)

        role_id = env.get_role_ID()
        # print('%s current cards' % ('lord' if role_id == 2 else 'farmer'), curr_cards_char)

        if role_id in ROLE_IDS_TO_TRAIN:
            if is_active:
                # first get mask
                mask = get_mask(curr_cards_char, action_space, None)
                # not valid for active
                mask[0] = 0

                active_prob, _, lstm_state = func(np.array([role_id]),
                                                  s.reshape(1, -1),
                                                  np.zeros([1, 120]),
                                                  lstm_state.reshape(1, -1))

                # make decision depending on output
                action_idx = take_action_from_prob(active_prob, mask)
            else:
                # print('last cards char', last_cards_char)
                mask = get_mask(curr_cards_char, action_space, last_cards_char)

                _, passive_prob, lstm_state = func(
                    np.array([role_id]), s.reshape(1, -1),
                    last_two_cards_onehot.reshape(1, -1),
                    lstm_state.reshape(1, -1))

                action_idx = take_action_from_prob(passive_prob, mask)

            # since step auto needs full last card group info, we do not explicitly feed card type
            intention = to_value(action_space[action_idx])
            r, _, _ = env.step_manual(intention)
            # print('lord gives', to_char(intention))
            assert (intention is not None)
        else:
            intention, r, _ = env.step_auto()
            # print('farmer gives', to_char(intention))
    # if r > 0:
    #     print('farmer wins')
    # else:
    #     print('lord wins')
    return int(r > 0)