def get_observation_size(game_config): """ Returns the len of the vectorized observation """ num_players = game_config['num_total_players'] # number of players ingame num_colors = game_config['colors'] num_ranks = game_config['ranks'] hand_size = game_config['hand_size'] max_information_tokens = game_config['life_tokens'] max_life_tokens = game_config['info_tokens'] max_moves = game_config['max_moves'] variant = game_config['variant'] env = json_to_pyhanabi.create_env_mock( num_players=num_players, num_colors=num_colors, num_ranks=num_ranks, hand_size=hand_size, max_information_tokens=max_information_tokens, max_life_tokens=max_life_tokens, max_moves=max_moves, variant=variant) vec = vectorizer.ObservationVectorizer(env) legal_moves_vectorizer = vectorizer.LegalMovesVectorizer(env) return vec.total_state_length
def __init__(self, game_config): """ # ################################################ # # -------------------- CONFIG -------------------- # # ################################################ # """ self.agent_name = game_config[ 'username'] # used to identify absolute position on table self.num_players = game_config[ 'num_total_players'] # number of players ingame self.max_life_tokens = game_config['life_tokens'] self.max_info_tokens = game_config['info_tokens'] self.max_deck_size = game_config['deck_size'] self.deck_size = self.max_deck_size self.life_tokens = self.max_life_tokens self.information_tokens = self.max_info_tokens self.players = None # list of names of players currently ingame self.player_position = None # agents absolute position at table self.agents_turn = False # flag that is True whenever its our turn self.hand_size = 4 if self.num_players > 3 else 5 # deal 5 cards when playing with 2 or 3 ppl """ # ################################################ # # ------- Observed Cards and Card knowledge ------ # # ################################################ # """ """ New cards are prepended, that means agent 1s inital draw looks like [4,3,2,1] """ # list of all players hands as _seen_ by calling agent [excluding clues] self.observed_hands = list( ) # is refreshed in self.update() on each notify message # list of clues given self.clues = list( ) # is refreshed in self.update() on each notify message # unfortunately, server references clue cards not by index but by an id between 0 and deck size, # so we need to store card_numbers to map the card ids to indices self.card_numbers = list() """ # ################################################ # # ----------------- GAME STATS ------------------- # # ################################################ # """ # is refreshed in self.update() on each notify message self.fireworks = {'R': 0, 'Y': 0, 'G': 0, 'W': 0, 'B': 0} # list of discarded cards as returned by self.card(suit, rank) self.discard_pile = list() # actually not contained in the returned dict of the # rl_env.HanabiEnvobservation._extract_from_dict()-method, but we need a history so we add this here. # Similarly, it could be added by appending obs_dict['last_moves'] = observation.last_moves() in said method. self.last_moves = list() self.variant = game_config['variant'] self.num_colors = game_config['colors'] self.num_ranks = game_config['ranks'] self.max_moves = game_config['max_moves'] self.order = 0 # number of the next card on the deck, incremented when card is drawn/dealt """ # ################################################ # # -------------- USE PYHANABI MOCKS -------------- # # ################################################ # """ self.env = json_to_pyhanabi.create_env_mock( num_players=self.num_players, num_colors=self.num_colors, num_ranks=self.num_ranks, hand_size=self.hand_size, max_information_tokens=self.max_info_tokens, max_life_tokens=self.max_life_tokens, max_moves=self.max_moves, variant=self.variant) self.caller_is_admin = False # flag is used to determine one RL agent that keeps track of human players # environment observations in order to be able to keep the vectorized observations synchronized if self.agent_name[-2:] == '00': self.caller_is_admin = True # admin is the first instance of the client class self.idx_human_player = -1 # used to determine whenever human is target of a card hint and thus when # hints will be out of sync with vectorizer environment state self.vectorizer_is_synced = False # if human player got card hints, the other vectorizer instances must know self.vectorizer = vectorizer.ObservationVectorizer(self.env) self.legal_moves_vectorizer = vectorizer.LegalMovesVectorizer(self.env)