Пример #1
0
    def create_data(self):
        raw_data = []
        for game_num in range(self.num_games):
            raw_data.append([[], []])
            observations = self.environment.reset()
            game_done = False

            while not game_done:
                for agent_id in range(self.num_players):
                    observation = observations['player_observations'][agent_id]
                    one_hot_action_vector, action = one_hot_vectorized_action(
                        self.agent_object, self.environment.num_moves(),
                        observation)

                    raw_data[game_num][0].append(
                        b2int.convert(observation['vectorized']))
                    raw_data[game_num][1].append(one_hot_action_vector)

                    if observation['current_player'] == agent_id:
                        assert action is not None
                        current_player_action = action
                    else:
                        assert action is None

                    observations, _, game_done, _ = self.environment.step(
                        current_player_action)
                    if game_done:
                        if game_num % 1000 == 0:
                            print("game " + str(game_num) + " done")
                        break
        return raw_data
Пример #2
0
    def create_data(self):
        '''Create and return a list of games. Each game has the following structure:
            [ [[obs_0], [obs_1], ..., [obs_n]], [[act_0], [act_1], ..., [act_n]] ]
        where each obs_i and act_i are the observation and resultant action that
        an agent took at game step i. Each game round consists of num_players game
        steps. A game can have a variable amount of rounds--you can lose early.
        '''
        raw_data = []

        for game_num in range(self.num_games):
            raw_data.append([[],[]])
            observations = self.environment.reset()
            game_done = False

            while not game_done:
                for agent_id in range(self.num_players):
                    observation = observations['player_observations'][agent_id]
                    one_hot_action_vector, action = one_hot_vectorized_action(
                            self.agent_object,
                            self.environment.num_moves(),
                            observation)
                    # raw_data[game_num][0].append(
                    #         observation['vectorized'])
                    raw_data[game_num][0].append(
                            b2int.convert(observation['vectorized']))
                    raw_data[game_num][1].append(one_hot_action_vector)

                    if observation['current_player'] == agent_id:
                        assert action is not None
                        current_player_action = action
                    else:
                        assert action is None

                    observations, _, game_done, _ = self.environment.step(
                            current_player_action)
                    if game_done:
                        break

        return raw_data
Пример #3
0
def create_pkl_data(args, csv_data):
    config = {
        'colors': 5,
        'ranks': 5,
        'players': 2,
        'hand_size': 5,
        'max_information_tokens': 8,
        'max_life_tokens': 3,
        'seed': -1,
        'observation_type': 1,  # FIXME: NEEDS CONFIRMATION
        'random_start_player': False
    }

    # Create the Hanabi Environment with the defined configuration.
    env = rl_env.HanabiEnv(config)
    raw_data = []
    for game_num in range(args.num_games):
        raw_data.append([[], []])
        game_done = False

        game_filter = csv_data.iloc[:, 0] == game_num
        game_data = csv_data[game_filter]
        deck_filter = game_data.iloc[:, 1] == "Deck"
        deck = np.array(game_data[deck_filter].iloc[:, 2]).tolist()
        action_filter = game_data.iloc[:, 1] != "Deck"
        action = game_data[action_filter]
        action_type = np.array(action.iloc[:, 1]).tolist()
        action_color_rank = np.array(action.iloc[:, 2]).tolist()

        # Initialize the game with @deck. The arg is None by default.
        obs = env.reset(deck)

        game_step = -1
        while not game_done:
            for agent_id in range(args.num_players):
                game_step += 1
                # print("--------------{}----------------".format(game_step))

                # observer_agent_id should be an agent other than agent_id
                # AKA not current playing agent
                # observer_agent_id = (game_step + 1) % 2
                agent_ids = list(range(args.num_players))
                agent_ids.remove(agent_id)
                observer_agent_id = random.choice(agent_ids)

                # Retrieve current player's hand used to get action
                agent_hand = get_agent_hand(obs, observer_agent_id)

                # Retrieve Action Dict
                action = get_action(action_type[game_step],
                                    action_color_rank[game_step], agent_hand)

                # Retrieve One-Hot Action
                one_hot_action_vector = get_one_hot_action(
                    obs, agent_id, action)

                # Append Obs and Action
                # raw_data[game_num][0].append(
                #     obs['player_observations'][agent_id]['vectorized'])
                raw_data[game_num][0].append(
                    b2int.convert(
                        obs['player_observations'][agent_id]['vectorized']))
                raw_data[game_num][1].append(one_hot_action_vector)

                # Step Through
                obs, reward, game_done, info = env.step(action)

                # Check Game status
                if game_done:
                    break

    return raw_data
Пример #4
0
def create_pkl_data(args, csv_data):
    config={'colors': 5,
            'ranks': 5,
            'players': 2 ,
            'hand_size': 5,
            'max_information_tokens': 8,
            'max_life_tokens': 3,
            'seed': -1,
            'observation_type': 1, # FIXME: NEEDS CONFIRMATION
            'random_start_player': False}

    # Create the Hanabi Environment with the defined configuration.
    env = rl_env.HanabiEnv(config)
    raw_data = []
    for game_num in range(args.num_games):
        print("############## GAME "+ str(game_num) + " ###############")
        raw_data.append([[], []])
        game_done = False

        game_filter = csv_data.iloc[:, 0] == game_num
        game_data = csv_data[game_filter]
        deck_size = game_data.iloc[0, 1]
        action_type = np.array(game_data.iloc[:, 2]).tolist()
        action_card_color = np.array(game_data.iloc[:, 3]).tolist()
        action_card_rank = np.array(game_data.iloc[:, 4]).tolist()
        deck = np.array(game_data.iloc[0, 5:]).tolist()

        # Initialize the game with @deck. The arg is None by default.
        obs = env.reset(deck)

        game_step = -1
        while not game_done:
            for agent_id in range(args.num_players):
                game_step += 1
                # FIXME: Make obs dict usage clearer

                # Retrieve current player's hand used to get action
                # Done in a very hack way for now that will only support 2 player game
                observer_agent_id = (game_step + 1) % 2
                agent_hand = obs['player_observations'][observer_agent_id]['observed_hands'][1]

                # Retrieve Action
                action = get_action(action_type[game_step], action_card_color[game_step], action_card_rank[game_step], agent_hand)

                # Construct one-hot action vector
                action_idx = obs['player_observations'][agent_id]['legal_moves_as_int'][obs['player_observations'][agent_id]['legal_moves'].index(action)]
                one_hot_action_vector = [0]*20 # FIXME: hard coded action length
                one_hot_action_vector[action_idx] = 1

                # raw_data[game_num][0].append(obs['player_observations'][agent_id]['vectorized'])
                raw_data[game_num][0].append(b2int.convert(
                    obs['player_observations'][agent_id]['vectorized']))
                raw_data[game_num][1].append(one_hot_action_vector)

                # Step Through
                obs, reward, game_done, info = env.step(action)

                # Check Game status
                if game_done:
                    break

    return raw_data