class FeatureStrengthOffline:
    step_map = {
        0: 0,
        3: 1,
        4: 2,
        5: 3
    }

    def __init__(self):
        self.cfc = CardFeatureCompute()
        self.cfvc = CardFeatureVectorCompute()
        self.feature_num = self.cfc.feature_num
        self.step_num = 4
        self.feature_prediction_map = np.zeros([self.step_num, self.feature_num])
        self.feature_frequency_map = np.zeros([self.step_num, self.feature_num])
        # consists of 2 elements: 1. a list of 4 step vectors 2. round result [0/1]
        self.info_list = []
        # consists of 4 step vectors
        self.step_list = []

    def raw_feed(self, hole, community, step):
        # step represent the number of community card: 0,3,4,5
        assert (step in [0, 3, 4, 5])
        hole_list = []
        community_list = []
        step_true = self.step_map[step]
        for card in hole:
            hole_list.append(Card.from_str(card))
        for card in community:
            community_list.append(Card.from_str(card))
        feature_vector = self.get_step_feature_vector(hole_list, community_list, step_true)
        self.step_list.append(feature_vector)

    def feed_result(self, result):
        assert(len(self.step_list) < 5)
        self.info_list.append([self.step_list, result])
        self.step_list = []

    def get_step_feature_dict(self, hole, community, step):
        assert((step >= 0) and (step < 4))
        feature_dict = self.cfc.fetch_feature(hole, community)
        return feature_dict

    def get_step_feature_vector(self, hole, community, step):
        assert((step >= 0) and (step < 4))
        feature_vector = np.array(self.cfvc.fetch_feature(hole, community))
        return feature_vector

    def feed_bunch_feature_prob_map(self, round_info_list):
        for info in round_info_list:
            self.feed_round_feature_prob_map(info[0], info[1])

    def feed_round_feature_prob_map(self, feature_vector_list, result):
        assert(len(feature_vector_list) < 5)
        for i in range(len(feature_vector_list)):
            self.feed_step_feature_prob_map(feature_vector_list[i], result, i)

    def feed_step_feature_prob_map(self, feature_vector, result, step):
        vfunc_f = np.vectorize(self.check_if_present)
        vfunc_p = np.vectorize(self.compare_predict)
        self.feature_frequency_map[step] = np.add(self.feature_frequency_map[step], vfunc_f(feature_vector))
        self.feature_prediction_map[step] = np.add(self.feature_prediction_map[step], vfunc_p(feature_vector, result))

    def feed_self_feature_prob_map(self):
        self.feed_bunch_feature_prob_map(self.info_list)

    def output_feature_map(self):
        final_feature_map = []
        for i in range(self.step_num):
            f = self.feature_frequency_map[i]
            p = self.feature_prediction_map[i]
            feature_step_map = np.true_divide(p, f, out=np.zeros_like(p), where=f != 0)
            final_feature_map.append(feature_step_map)
        return final_feature_map

    def output_weight_suggest(self):
        final_weight_suggest = []
        normalised_feature_p = self.output_feature_map()
        for v in normalised_feature_p:
            sum = np.sum(v)
            if sum == 0:
                final_weight_suggest.append(np.zeros(self.feature_num))
            else:
                normalised = np.true_divide(v, sum)
                final_weight_suggest.append(normalised)
        return final_weight_suggest

    @staticmethod
    def check_if_present(predict):
        return predict > 0.5

    @staticmethod
    def compare_predict(predict, result):
        return result if predict > 0.5 else 0
示例#2
0
class RTPlayer(BasePokerPlayer):
    def __init__(self):
        super(BasePokerPlayer, self).__init__()
        self.pp = pprint.PrettyPrinter(indent=2)

        ## basic records
        self.street_map = {'preflop': 0, 'flop': 1, 'river': 2, 'turn': 3}
        self.rev_street_map = {0: 'preflop', 1: 'flop', 2: 'river', 3: 'turn'}
        self.nParams = 4
        # self.learn_factor = [0.01, 0.01, 0.01, 0.01]
        self.learn_factor = 0
        self.accumulate = 0

        ## update every game
        self.initial_stack = 0
        self.seat_id = 0
        self.max_round = 0
        self.small_blind_amount = 0

        ## params used in training part
        ## update at the start of every round
        self.hole_card = []
        self.stack_record = [[0] * 2] * 2
        self.total_gain = [0, 0]
        self.bet_has_placed = [0, 0]
        self.estimated_step_rewards = []
        self.ifRandom = False

        #update every street
        self.feature_vector = np.ones(self.nParams + 1)
        self.q_suggest = {'raise': 0, 'call': 0, 'fold': 0}
        self.street_idx = 0

        self.probs = {'raise': 0.5, 'call': 0.4, 'fold': 0.1}

        #TODO: how to initialize theta
        #
        self.step_theta = Trained_other_feature().get_weights()
        #[self.theta_single_step(self.nParams + 1),\
        # self.theta_single_step(self.nParams + 1),\
        # self.theta_single_step(self.nParams + 1),\
        # self.theta_single_step(self.nParams + 1)]
        # helper to compute the strength
        self.cfvc = CardFeatureVectorCompute()
        self.thf = Trained_hand_feature()
        self.eps = 0
        self.game_count = 0

        ## a list to keep record of all results
        self.results = []
        self.estimated_step_rewards = []

    def declare_action(self, valid_actions, hole_card, round_state):
        # check if raise 4 times alr, cannot raise any more
        # flag is true -- still can raise, otherwise cannot
        flag = True
        current_round = round_state['action_histories'][round_state['street']]
        uuid1 = round_state['seats'][0]['uuid']
        uuid2 = round_state['seats'][1]['uuid']
        # raise count for current round
        raiseCount = collections.defaultdict(int)
        for action_details in current_round:
            if action_details['action'] is 'RAISE' or 'BIGBLIND':
                # Big blind is also considered as 'RAISE'
                raiseCount[action_details['uuid']] += 1

        if raiseCount[uuid1] >= 4 or raiseCount[uuid2] >= 4:
            flag = False

        # read feature
        my_id = self.seat_id
        opp_id = 1 - my_id
        card_feature = self.cfvc.fetch_feature(
            Card_util.gen_cards(self.hole_card),
            Card_util.gen_cards(round_state['community_card']))
        card_feature = self.get_transferred_vec(card_feature)
        card_strength = np.dot(card_feature,
                               self.thf.get_strength(self.street_idx))
        my_stack = round_state['seats'][my_id]['stack']
        opp_stack = round_state['seats'][opp_id]['stack']
        my_bet = self.stack_record[my_id][1] - my_stack
        opp_bet = self.stack_record[opp_id][1] - opp_stack
        my_total_gain = self.total_gain[my_id]

        # get the feature vector for every possible action
        feature_vec = self.phi(card_strength, my_stack, opp_stack, my_bet,
                               opp_bet, my_total_gain)
        self.feature_vector = feature_vec

        # value for taking different action
        q_raise = np.dot(self.step_theta[self.street_idx]['raise'],
                         feature_vec)
        q_call = np.dot(self.step_theta[self.street_idx]['call'], feature_vec)
        q_fold = np.dot(self.step_theta[self.street_idx]['fold'], feature_vec)
        # print('raise %10.6f, call %10.6f, fold %10.6f' % (q_raise, q_call, q_fold))

        self.q_suggest['raise'] = q_raise
        self.q_suggest['call'] = q_call
        self.q_suggest['fold'] = q_fold

        sig_raise = self.sigmoid(q_raise)
        sig_call = self.sigmoid(q_call)
        sig_fold = self.sigmoid(q_fold)
        sum_sig = sig_raise + sig_call + sig_fold

        self.probs['raise'] = sig_raise / sum_sig
        self.probs['call'] = sig_call / sum_sig
        self.probs['fold'] = sig_fold

        # choose action
        next_action, probability = self.action_select_helper(
            valid_actions, flag)
        # print('next action: %s' % next_action)
        expected_reward = self.q_suggest[next_action]
        self.estimated_step_rewards.append([
            next_action, expected_reward, probability, self.street_idx,
            self.feature_vector
        ])
        return next_action
        # action returned here is sent to the poker engine

    def receive_game_start_message(self, game_info):
        # initialise stack record when enters the first round
        self.initial_stack = game_info['rule']['initial_stack']
        self.max_round = game_info['rule']['max_round']
        self.small_blind_amount = game_info['rule']['small_blind_amount']

        if game_info['seats'][0]['uuid'] is self.uuid:
            self.seat_id = 0
        else:
            self.seat_id = 1

        self.stack_record = [[self.initial_stack] * 2,
                             [self.initial_stack] * 2]

        self.game_count += 1
        # self.learn_factor = 0 if self.game_count > 10 else 0.01
        self.learn_factor = 0.01
        # self.learn_factor = np.floor(10 / self.game_count) / float(100)

    def receive_round_start_message(self, round_count, hole_card, seats):
        self.estimated_step_rewards = []
        self.total_gain = [
            self.stack_record[0][1] - self.initial_stack,
            self.stack_record[1][1] - self.initial_stack
        ]
        self.bet_has_placed = [0, 0]
        self.hole_card = hole_card

        r = np.random.rand()
        self.eps = self.epsilon(round_count)
        if r < self.eps:
            self.ifRandom = True
        else:
            self.ifRandom = False

    def receive_street_start_message(self, street, round_state):
        current_street = self.street_map[round_state['street']]
        self.street_idx = current_street

    def receive_game_update_message(self, action, round_state):
        pass

    def receive_round_result_message(self, winners, hand_info, round_state):
        # start training at the end of every round
        self.stack_record = [[self.stack_record[0][1], round_state['seats'][0]['stack']], \
                             [self.stack_record[1][1], round_state['seats'][1]['stack']]]

        true_reward = [self.stack_record[0][1] - self.stack_record[0][0], \
                       self.stack_record[1][1] - self.stack_record[1][0]][self.seat_id]

        # backtrack every step
        self.estimated_step_rewards = self.estimated_step_rewards[::-1]
        prob = 1

        for record in self.estimated_step_rewards:
            action = record[0]
            expected_reward = record[1]
            probability = record[2]
            step_idx = record[3]
            feature_vec = record[4]
            # print('action takes: %s, probability: %6.4f' % (action, prob))

            # update the theta
            prob *= probability
            delta = np.multiply(feature_vec, (true_reward - expected_reward) *
                                prob * self.learn_factor)
            # print('true reward: %6.3f, expected reward: %6.3f' % (true_reward, expected_reward))
            self.step_theta[step_idx][action] = np.add(
                self.step_theta[step_idx][action], delta)
        # self.pp.pprint(self.step_theta)

        self.accumulate += true_reward
        self.results.append(self.accumulate)

    # self.results.append(true_reward)

    def action_select_helper(self, valid_actions, flag):
        valid_acts = list(map(lambda x: x['action'], valid_actions))
        # remove raise if raise is not allowed
        if not flag and 'raise' in valid_acts:
            valid_acts.remove('raise')

        action_to_choose = {x: self.q_suggest[x] for x in valid_acts}
        num_valid = len(valid_acts)
        assert (num_valid > 0)
        max_action = max(action_to_choose, key=action_to_choose.get)

        if self.ifRandom:
            r = np.random.rand()
            action = ''
            if r < 0.5:
                action = 'call'
            elif r < 0.9 and num_valid == 3:
                action = 'raise'
            else:
                # print('here')
                action = 'fold'
            return action, self.probs[action]
        else:
            return max_action, self.probs[max_action]

    def theta_single_step(self, length):
        return {
            'raise': np.zeros(length),
            'call': np.zeros(length),
            'fold': np.zeros(length)
        }

    def phi(self, hand_strength, my_stack, opp_stack, my_bet, opp_bet,
            my_total_gain):
        return np.array([
            1,
            hand_strength,
            # self.diff_normal(my_bet, opp_bet),
            (my_bet - opp_bet) / float(10),
            self.diff_normal(my_stack, opp_stack),
            self.diff_normal(my_total_gain, -my_total_gain)
        ])

    def sigmoid(self, x):
        return float(1) / (1 + np.exp(-x))

    def diff_normal(self, x, y):
        if y == 0:
            return 1 if x > 0 else -1
        return self.sigmoid(float(x - y) / np.abs(y))

    def epsilon(self, round_count):
        # self.eps = float(1) / round_count
        # self.eps = float(1) / ((self.game_count - 1) * self.max_round + round_count)
        # self.eps = 0.1 + 0.9 * float(1) / round_count
        self.eps = 0.1
        return self.eps

    def get_result(self):
        #i = 0
        #avg = []
        #while i <= len(self.results) - part_size:
        #   sum = 0
        #   for j in range(part_size):
        #       sum += self.results[i + j]
        #   avg.append(float(sum) / part_size)
        #   i += part_size
        #return avg

        return self.results

    def get_transferred_vec(self, vec):
        vfunc_f = np.vectorize(self.zero_to_minus_one)
        return vfunc_f(vec)

    @staticmethod
    def zero_to_minus_one(a):
        return -1 if a < 0.5 else 1
示例#3
0
  def declare_action(self, valid_actions, hole_card, round_state):
    # check if raise 4 times alr, cannot raise any more
    # flag is true -- still can raise, otherwise cannot
    flag = True
    current_round = round_state['action_histories'][round_state['street']]
    uuid1 = round_state['seats'][0]['uuid']
    uuid2 = round_state['seats'][1]['uuid']
    # raise count for current round
    raiseCount = collections.defaultdict(int)
    for action_details in current_round:
        if action_details['action'] is 'RAISE' or 'BIGBLIND':
            # Big blind is also considered as 'RAISE'
            raiseCount[action_details['uuid']] += 1

    if raiseCount[uuid1] >= 4 or raiseCount[uuid2] >= 4:
        flag = False

    cfvc = CardFeatureVectorCompute()
    thf = Trained_hand_feature()
    card_feature = cfvc.fetch_feature(Card_util.gen_cards(hole_card), Card_util.gen_cards(round_state['community_card']))
    card_strength = np.dot(card_feature, thf.get_strength(self.street_map[round_state['street']]))

    # def act(self, theta, card_strength, isMe, my_stack, opponent_stack, curr_action, epsilon):
    # feature vector for different action
    isMe = True
    my_stack = round_state['seats'][self.seat_id]['stack']
    opponent_stack = round_state['seats'][1-self.seat_id]['stack']

    # get the feature vector for every possible action
    phiRAISE = self.phi(card_strength, isMe, my_stack, opponent_stack, 'raise')
    phiCALL = self.phi(card_strength, isMe, my_stack, opponent_stack, 'call')
    phiFOLD = self.phi(card_strength, isMe, my_stack, opponent_stack, 'fold')

    # value for taking different action
    qRAISE = self.evalModel(self.theta, phiRAISE)
    qCALL = self.evalModel(self.theta, phiRAISE)
    qFOLD = self.evalModel(self.theta, phiFOLD)

    # choose the action with highest value as the next action
    # with same quality, choose in order 'raise' > 'call' > 'fold'
    next_action = ''
    if qRAISE >= np.amax([qCALL, qFOLD]):
        next_action = 'raise'
    elif qCALL >= np.amax([qRAISE, qFOLD]):
        next_action = 'call'
    else:
        next_action = 'fold'

    # actions in valid_actions other than the current 'next_action'
    remain_actions = []
    for act in valid_actions:
        if act['action'] is not next_action:
            remain_actions.append(act['action'])
    assert len(remain_actions) in [1, 2]

    if np.random.rand() < self.epsilon(round_state['round_count'])/2 \
        or (flag is False and next_action is 'raise') \
        or (len(valid_actions) is 2 and next_action is 'raise'):

        # Condition fro determining whether the next action should be randomly chosen
        # 1. random_number < epsilon/2
        # 2. next_action is 'raise' but raise num alr 4, cannot raise anymore
        # 3. next_action not in valid_action (there are only two kind of valid_action set: with/withou 'raise')
        # if any of this is satisfied, next_action will be randomly chosen from remain_actions
        if len(remain_actions) is 1:
            # only remain 1 action, choose this by default
            next_action = remain_actions[0]
        else :
            # remain 2 actions, randomly choose from these two
            if np.random.rand() < 0.5:
                next_action = remain_actions[0]
            else:
                next_action = remain_actions[1]

    # next_action is finalised, store the 'q' and 'phi'
    if next_action is 'raise':
        prob = qRAISE / (qRAISE + qCALL + qFOLD)
        self.estimated_step_rewards.append([next_action, qRAISE, phiRAISE, prob])
    elif next_action is 'call':
        prob = qCALL / (qRAISE + qCALL + qFOLD)
        self.estimated_step_rewards.append([next_action, qCALL, phiCALL, prob])
    else:
        prob = qFOLD / (qRAISE + qCALL + qFOLD)
        self.estimated_step_rewards.append([next_action, qFOLD, phiFOLD, prob])

    # print(next_action)

    return next_action # action returned here is sent to the poker engine