def run_one_step(current_round_idx, position, num_hand_cards, init_trick, hand_cards, is_hearts_broken, expose_info, cards, score_cards=None, played_card=None, selection_func=random_choose, must_have={}, void_info={}): for trick_idx, (winner_index, trick) in enumerate(init_trick): for card_idx, card in enumerate(trick): for suit, rank in str_to_bitmask([card]).items(): trick[card_idx] = [suit, rank] if played_card is not None: played_card = [SUIT_TO_INDEX[played_card.suit.__repr__()], NUM_TO_INDEX[played_card.rank.__repr__()]] simulations_cards = None if cards: simulation_cards = redistribute_cards(1, position, hand_cards, num_hand_cards, init_trick[-1][1], cards, must_have, void_info) else: simulation_cards = [hand_cards] for simulation_card in simulation_cards: card = one_step_simulation(current_round_idx, position, simulation_card, copy.deepcopy(init_trick), void_info, copy.deepcopy(score_cards), is_hearts_broken, expose_info, played_card, selection_func) #print("----->", card) return transform(INDEX_TO_NUM[card[1]], INDEX_TO_SUIT[card[0]])
def one_step_simulation(current_round_idx, position, hand_cards, tricks, void_info={}, score_cards=None, is_hearts_borken=False, expose_info=False, played_card=None, selection_func=None): for player_idx, cards in enumerate(hand_cards): hand_cards[player_idx] = str_to_bitmask(cards) sm = SimpleGame(position=position, hand_cards=hand_cards, void_info=void_info, score_cards=score_cards, is_hearts_borken=is_hearts_borken, expose_info=expose_info, tricks=tricks) return sm.just_run_one_step(current_round_idx, selection_func=selection_func)
def get_simple_game_info(self, state): hand_cards = [[] if player_idx != self.position else state._player_hands[player_idx] for player_idx in range(4)] trick_cards = state.trick_cards init_trick = [[None, state.trick[:]]] for trick_idx, (winner_index, trick) in enumerate(init_trick): for card_idx, card in enumerate(trick): for suit, rank in str_to_bitmask([card]).items(): trick[card_idx] = [suit, rank] must_have = state.players[self.position].transfer_cards selection_func = [expert_choose]*4 return hand_cards, trick_cards, init_trick, must_have, selection_func
def simulation(current_round_idx, position, hand_cards, tricks, void_info={}, score_cards=None, is_hearts_borken=False, expose_hearts_ace=False, played_card=None, selection_func=None, proactive_mode=None): sm = None try: for player_idx, cards in enumerate(hand_cards): hand_cards[player_idx] = str_to_bitmask(cards) sm = SimpleGame(position=position, hand_cards=hand_cards, void_info=void_info, score_cards=score_cards, is_hearts_borken=is_hearts_borken, expose_hearts_ace=expose_hearts_ace, tricks=tricks) if IS_DEBUG: for player_idx, cards in enumerate(hand_cards): print("player-{}'s hand_cards is {}".format(player_idx, sm.translate_hand_cards(hand_cards[player_idx]))) if proactive_mode: ff = dict([[player_idx, random_choose if player_idx == position else choice(selection_func)] for player_idx in range(4)]) elif len(selection_func) == 1: ff = dict([[player_idx, choice(selection_func)] for player_idx in range(4)]) else: ff = dict(zip(range(4), [choice(selection_func) for _ in range(4)])) sm.run(current_round_idx, played_card=played_card, selection_func=ff) scores, num_of_shoot_the_moon = sm.score() if IS_DEBUG: sm.print_tricks(scores) print() return sm.played_card, scores, sm.score_cards, num_of_shoot_the_moon except: raise for player_idx, cards in enumerate(hand_cards): print("player-{}'s hand_cards is {}".format(player_idx, sm.translate_hand_cards(hand_cards[player_idx]))) print() return None, None, None, None
def run_simulation(seed, current_round_idx, position, init_trick, hand_cards, is_hearts_broken, expose_info, cards, score_cards=None, played_card=None, selection_func=random_choose, must_have={}, void_info={}, proactive_mode=None, simulation_time=0.93): simulation_time = max(simulation_time, 0.1) stime = time.time() for trick_idx, (winner_index, trick) in enumerate(init_trick): for card_idx, card in enumerate(trick): for suit, rank in str_to_bitmask([card]).items(): trick[card_idx] = [suit, rank] if played_card is not None: played_card = [ SUIT_TO_INDEX[played_card.suit.__repr__()], NUM_TO_INDEX[played_card.rank.__repr__()] ] simulations_cards = None if cards: simulation_cards = redistribute_cards(seed, position, hand_cards, init_trick[-1][1], cards, must_have, void_info) else: simulation_cards = [hand_cards] results, num_of_shoot_the_moon = defaultdict(list), defaultdict(int) for simulation_card in simulation_cards: card, scores, _, self_shoot_the_moon = simulation( current_round_idx, position, simulation_card, copy.deepcopy(init_trick), void_info, copy.deepcopy(score_cards), is_hearts_broken, expose_info, played_card, selection_func, proactive_mode) if card is None: continue card = tuple(card) sum_score = sum(scores) results[card].append( [scores[position] / sum_score, self_shoot_the_moon]) if time.time() - stime > simulation_time or IS_DEBUG: break return dict([(transform(INDEX_TO_NUM[card[1]], INDEX_TO_SUIT[card[0]]), info) for card, info in results.items()])
def simulation(current_round_idx, position, hand_cards, tricks, void_info={}, score_cards=None, is_hearts_broken=False, expose_info=False, played_card=None, selection_func=None, proactive_mode=None): sm = None try: for player_idx, cards in enumerate(hand_cards): hand_cards[player_idx] = str_to_bitmask(cards) sm = StepGame( current_round_idx, position=position, hand_cards=hand_cards, trick_cards=None, void_info=void_info, score_cards=score_cards, is_hearts_broken=is_hearts_broken, expose_info=expose_info, tricks=tricks, must_have={ 0: [Card(Suit.spades, Rank.six), Card(Suit.spades, Rank.queen)] }) if IS_DEBUG: for player_idx, cards in enumerate(hand_cards): print("player-{}'s hand_cards is {}".format( player_idx, translate_hand_cards(hand_cards[player_idx]))) if proactive_mode: ff = dict([[ player_idx, random_choose if player_idx == position else choice(selection_func) ] for player_idx in range(4)]) else: ff = dict(zip(range(4), [choice(selection_func) for _ in range(4)])) sm.run(current_round_idx, played_card=played_card, selection_func=ff) scores, num_of_shoot_the_moon = sm.score() if IS_DEBUG: sm.print_tricks(scores) print() return sm.played_card, scores, sm.score_cards, num_of_shoot_the_moon except Exception as e: raise for player_idx, cards in enumerate(hand_cards): print("player-{}'s hand_cards is {}".format( player_idx, translate_hand_cards(hand_cards[player_idx]))) print() return None, None, None, None
def run_simulation(seed, current_round_idx, position, num_hand_cards, init_trick, hand_cards, is_hearts_broken, expose_hearts_ace, cards, score_cards=None, played_card=None, selection_func=random_choose, must_have={}, void_info={}, proactive_mode=None, simulation_time=0.93): simulation_time = max(simulation_time, 0.1) stime = time.time() for trick_idx, (winner_index, trick) in enumerate(init_trick): for card_idx, card in enumerate(trick): for suit, rank in str_to_bitmask([card]).items(): trick[card_idx] = [suit, rank] if played_card is not None: played_card = [SUIT_TO_INDEX[played_card.suit.__repr__()], NUM_TO_INDEX[played_card.rank.__repr__()]] simulations_cards = None if cards: simulation_cards = redistribute_cards(seed, position, hand_cards, num_hand_cards, init_trick[-1][1], cards, must_have, void_info) else: simulation_cards = [hand_cards] results, num_of_shoot_the_moon = defaultdict(list), defaultdict(int) for simulation_card in simulation_cards: card, scores, _, self_shoot_the_moon = simulation(current_round_idx, position, simulation_card, copy.deepcopy(init_trick), void_info, copy.deepcopy(score_cards), is_hearts_broken, expose_hearts_ace, played_card, selection_func, proactive_mode) if card is None: continue card = tuple(card) rating = [0, 0, 0, 0] info = zip(range(4), scores) total_score = sum(scores) pre_score, pre_rating, sum_score = None, None, [score/total_score for score in scores] for rating_idx, (player_idx, score) in enumerate(sorted(info, key=lambda x: -x[1])): tmp_rating = rating_idx if pre_score is not None: if score == pre_score: tmp_rating = pre_rating rating[player_idx] = (4-tmp_rating)/4 + sum_score[player_idx] pre_score = score pre_rating = tmp_rating #print(card, scores, rating, sum_score) results[card].append([rating[position], self_shoot_the_moon]) if time.time()-stime > simulation_time or IS_DEBUG: break return dict([(transform(INDEX_TO_NUM[card[1]], INDEX_TO_SUIT[card[0]]), info) for card, info in results.items()])
def get_move(self, first_player_idx, hand_cards, valid_cards, remaining_cards, score_cards, trick_cards, num_hand_cards, init_trick, void_info, must_have, selection_func, trick_nr, is_heart_broken, expose_info, is_only_played_card=False, simulation_time_limit=TIMEOUT_SECOND - 0.1, not_seen=False, is_reset_percentage=False): stime = time.time() simulation_cards = redistribute_cards(randint(0, 64), self._self_player_idx, hand_cards[:], num_hand_cards, init_trick[-1][1], list(remaining_cards)[:], must_have, void_info, not_seen) b_trick_cards = [] for idx in range(13): b_trick_cards.append([None, None, None, None]) for card_idx, card in enumerate(trick_cards[idx]): if card: b_trick_cards[idx][card_idx] = (card.suit.value, 1 << (card.rank.value - 2)) vcards = str_to_bitmask(valid_cards) if not_seen else None ratio, stats_shoot_the_moon = [0, 0], {} for simulation_card in simulation_cards: for player_idx, cards in enumerate(simulation_card): simulation_card[player_idx] = str_to_bitmask(cards) try: sm = StepGame(trick_nr, position=first_player_idx, hand_cards=simulation_card, trick_cards=copy.deepcopy(b_trick_cards), void_info=void_info, score_cards=copy.deepcopy(score_cards), is_hearts_broken=is_heart_broken, expose_info=expose_info, tricks=copy.deepcopy(init_trick), must_have=must_have) if vcards is None: vcards = sm.get_valid_cards(sm.hand_cards[sm.start_pos], trick_nr + len(sm.tricks) - 1) if len(init_trick[-1][1]) == 4: sm.post_round_end() self._playout(trick_nr, sm, selection_func, self._c_puct) scores, is_shoot_the_moon = sm.score() if is_shoot_the_moon: shooter = scores.index(0) stats_shoot_the_moon.setdefault(shooter, 0) stats_shoot_the_moon[shooter] += 1 ratio[0] += 1 except Exception as e: ratio[1] += 1 #raise if time.time() - stime > simulation_time_limit: shooter = None if stats_shoot_the_moon != {}: for shooter, num in stats_shoot_the_moon.items(): break if shooter: say("ratio of success/failed is {}, shooter: {}, {}, {:.4f}%", \ ratio, shooter, num, num*100/ratio[0]) else: say("ratio of success/failed is {}", ratio) break if is_only_played_card: valid_cards = vcards vcards = [ list(batch_bitmask_to_card(suit, ranks)) for suit, ranks in vcards.items() ] if not_seen: for k, node in sorted(self.start_node._children.items(), key=lambda x: -x[1]._n_visits): if node._P > 0 and valid_cards.get(k[0], 0) & k[1]: say("seen: {}, valid_cards: {}, {}-->{}: {} times, percentage: {:.4f}%, value: {:.4f}", \ not not_seen, vcards, node._player_idx, bitmask_to_card(k[0], k[1]), node._n_visits, node._P*100, \ node.get_value(self._c_puct)) elif node._P == 0: continue big_value, big_visits, big_card = -sys.maxsize, -sys.maxsize, None for played_card, node in sorted(self.start_node._children.items(), key=lambda x: -x[1]._n_visits): if node._P > 0 and valid_cards.get(played_card[0], 0) & played_card[1]: if node._n_visits > big_visits: big_visits = node._n_visits big_value = node.get_value(self._c_puct) big_card = played_card else: if node.get_value(self._c_puct) - 2.0 > big_value: big_visits = node._n_visits big_value = node.get_value(self._c_puct) big_card = played_card return big_card else: results = {} for played_card, node in sorted(self.start_node._children.items(), key=lambda x: -x[1]._n_visits): results.setdefault(played_card, [0, 0]) results[played_card] = [ node._n_visits, node.get_value(self._c_puct) ] return results