def create_data(self): raw_data = [] for game_num in range(self.num_games): raw_data.append([[], []]) observations = self.environment.reset() game_done = False while not game_done: for agent_id in range(self.num_players): observation = observations['player_observations'][agent_id] one_hot_action_vector, action = one_hot_vectorized_action( self.agent_object, self.environment.num_moves(), observation) raw_data[game_num][0].append( b2int.convert(observation['vectorized'])) raw_data[game_num][1].append(one_hot_action_vector) if observation['current_player'] == agent_id: assert action is not None current_player_action = action else: assert action is None observations, _, game_done, _ = self.environment.step( current_player_action) if game_done: if game_num % 1000 == 0: print("game " + str(game_num) + " done") break return raw_data
def create_data(self): '''Create and return a list of games. Each game has the following structure: [ [[obs_0], [obs_1], ..., [obs_n]], [[act_0], [act_1], ..., [act_n]] ] where each obs_i and act_i are the observation and resultant action that an agent took at game step i. Each game round consists of num_players game steps. A game can have a variable amount of rounds--you can lose early. ''' raw_data = [] for game_num in range(self.num_games): raw_data.append([[],[]]) observations = self.environment.reset() game_done = False while not game_done: for agent_id in range(self.num_players): observation = observations['player_observations'][agent_id] one_hot_action_vector, action = one_hot_vectorized_action( self.agent_object, self.environment.num_moves(), observation) # raw_data[game_num][0].append( # observation['vectorized']) raw_data[game_num][0].append( b2int.convert(observation['vectorized'])) raw_data[game_num][1].append(one_hot_action_vector) if observation['current_player'] == agent_id: assert action is not None current_player_action = action else: assert action is None observations, _, game_done, _ = self.environment.step( current_player_action) if game_done: break return raw_data
def create_pkl_data(args, csv_data): config = { 'colors': 5, 'ranks': 5, 'players': 2, 'hand_size': 5, 'max_information_tokens': 8, 'max_life_tokens': 3, 'seed': -1, 'observation_type': 1, # FIXME: NEEDS CONFIRMATION 'random_start_player': False } # Create the Hanabi Environment with the defined configuration. env = rl_env.HanabiEnv(config) raw_data = [] for game_num in range(args.num_games): raw_data.append([[], []]) game_done = False game_filter = csv_data.iloc[:, 0] == game_num game_data = csv_data[game_filter] deck_filter = game_data.iloc[:, 1] == "Deck" deck = np.array(game_data[deck_filter].iloc[:, 2]).tolist() action_filter = game_data.iloc[:, 1] != "Deck" action = game_data[action_filter] action_type = np.array(action.iloc[:, 1]).tolist() action_color_rank = np.array(action.iloc[:, 2]).tolist() # Initialize the game with @deck. The arg is None by default. obs = env.reset(deck) game_step = -1 while not game_done: for agent_id in range(args.num_players): game_step += 1 # print("--------------{}----------------".format(game_step)) # observer_agent_id should be an agent other than agent_id # AKA not current playing agent # observer_agent_id = (game_step + 1) % 2 agent_ids = list(range(args.num_players)) agent_ids.remove(agent_id) observer_agent_id = random.choice(agent_ids) # Retrieve current player's hand used to get action agent_hand = get_agent_hand(obs, observer_agent_id) # Retrieve Action Dict action = get_action(action_type[game_step], action_color_rank[game_step], agent_hand) # Retrieve One-Hot Action one_hot_action_vector = get_one_hot_action( obs, agent_id, action) # Append Obs and Action # raw_data[game_num][0].append( # obs['player_observations'][agent_id]['vectorized']) raw_data[game_num][0].append( b2int.convert( obs['player_observations'][agent_id]['vectorized'])) raw_data[game_num][1].append(one_hot_action_vector) # Step Through obs, reward, game_done, info = env.step(action) # Check Game status if game_done: break return raw_data
def create_pkl_data(args, csv_data): config={'colors': 5, 'ranks': 5, 'players': 2 , 'hand_size': 5, 'max_information_tokens': 8, 'max_life_tokens': 3, 'seed': -1, 'observation_type': 1, # FIXME: NEEDS CONFIRMATION 'random_start_player': False} # Create the Hanabi Environment with the defined configuration. env = rl_env.HanabiEnv(config) raw_data = [] for game_num in range(args.num_games): print("############## GAME "+ str(game_num) + " ###############") raw_data.append([[], []]) game_done = False game_filter = csv_data.iloc[:, 0] == game_num game_data = csv_data[game_filter] deck_size = game_data.iloc[0, 1] action_type = np.array(game_data.iloc[:, 2]).tolist() action_card_color = np.array(game_data.iloc[:, 3]).tolist() action_card_rank = np.array(game_data.iloc[:, 4]).tolist() deck = np.array(game_data.iloc[0, 5:]).tolist() # Initialize the game with @deck. The arg is None by default. obs = env.reset(deck) game_step = -1 while not game_done: for agent_id in range(args.num_players): game_step += 1 # FIXME: Make obs dict usage clearer # Retrieve current player's hand used to get action # Done in a very hack way for now that will only support 2 player game observer_agent_id = (game_step + 1) % 2 agent_hand = obs['player_observations'][observer_agent_id]['observed_hands'][1] # Retrieve Action action = get_action(action_type[game_step], action_card_color[game_step], action_card_rank[game_step], agent_hand) # Construct one-hot action vector action_idx = obs['player_observations'][agent_id]['legal_moves_as_int'][obs['player_observations'][agent_id]['legal_moves'].index(action)] one_hot_action_vector = [0]*20 # FIXME: hard coded action length one_hot_action_vector[action_idx] = 1 # raw_data[game_num][0].append(obs['player_observations'][agent_id]['vectorized']) raw_data[game_num][0].append(b2int.convert( obs['player_observations'][agent_id]['vectorized'])) raw_data[game_num][1].append(one_hot_action_vector) # Step Through obs, reward, game_done, info = env.step(action) # Check Game status if game_done: break return raw_data