def get_game_step_info(self, obs: List[str], infos: Dict[str, List[Any]]): """ Get all the available information, and concat them together to be tensor for a neural model. we use post padding here, all information are tokenized here. Arguments: obs: Previous command's feedback for each game. infos: Additional information for each game. """ inventory_token_list = [preproc(item, tokenizer=self.nlp) for item in infos["inventory"]] inventory_id_list = [_words_to_ids(tokens, self.word2id) for tokens in inventory_token_list] feedback_token_list = [preproc(item, str_type='feedback', tokenizer=self.nlp) for item in obs] feedback_id_list = [_words_to_ids(tokens, self.word2id) for tokens in feedback_token_list] quest_token_list = [preproc(item, tokenizer=self.nlp) for item in infos["extra.recipe"]] quest_id_list = [_words_to_ids(tokens, self.word2id) for tokens in quest_token_list] prev_action_token_list = [preproc(item, tokenizer=self.nlp) for item in self.prev_actions] prev_action_id_list = [_words_to_ids(tokens, self.word2id) for tokens in prev_action_token_list] description_token_list = [preproc(item, tokenizer=self.nlp) for item in infos["description"]] for i, d in enumerate(description_token_list): if len(d) == 0: description_token_list[i] = ["end"] # if empty description, insert word "end" description_id_list = [_words_to_ids(tokens, self.word2id) for tokens in description_token_list] description_id_list = [_d + _i + _q + _f + _pa for (_d, _i, _q, _f, _pa) in zip(description_id_list, inventory_id_list, quest_id_list, feedback_id_list, prev_action_id_list)] input_description = pad_sequences(description_id_list, maxlen=max_len(description_id_list)).astype('int32') input_description = to_pt(input_description, self.use_cuda) return input_description, description_id_list
def get_game_info_at_certain_step(self, obs, infos): """ Get all needed info from game engine for training. Arguments: obs: Previous command's feedback for each game. infos: Additional information for each game. """ batch_size = len(obs) feedback_strings = [preproc(item, tokenizer=self.nlp) for item in obs] description_strings = [ preproc(item, tokenizer=self.nlp) for item in infos["description"] ] observation_strings = [ d + " <|> " + fb if fb != d else d + " <|> hello" for fb, d in zip(feedback_strings, description_strings) ] inventory_strings = [ preproc(item, tokenizer=self.nlp) for item in infos["inventory"] ] local_word_list = [ obs.split() + inv.split() for obs, inv in zip(observation_strings, inventory_strings) ] directions = ["east", "west", "north", "south"] if self.question_type in ["location", "existence"]: # agents observes the env, but do not change them possible_verbs = [["go", "inventory", "wait", "open", "examine"] for _ in range(batch_size)] else: possible_verbs = [ list(set(item) - set(["", "look"])) for item in infos["verbs"] ] possible_adjs, possible_nouns = [], [] for i in range(batch_size): object_nouns = [ item.split()[-1] for item in infos["object_nouns"][i] ] object_adjs = [ w for item in infos["object_adjs"][i] for w in item.split() ] possible_nouns.append( list(set(object_nouns) & set(local_word_list[i]) - set([""])) + directions) possible_adjs.append( list(set(object_adjs) & set(local_word_list[i]) - set([""])) + ["</s>"]) return observation_strings, [ possible_verbs, possible_adjs, possible_nouns ]
def collect_data_from_game(gamefile, seed, branching_depth): tokenizer = spacy.load('en', disable=['ner', 'parser', 'tagger']) rng = np.random.RandomState(seed) # Ignore the following commands. commands_to_ignore = ["look", "examine", "inventory"] env_infos = textworld.EnvInfos(description=True, location=True, facts=True, last_action=True, admissible_commands=True, game=True, extras=["walkthrough"]) env = textworld.start(gamefile, env_infos) env = textworld.envs.wrappers.Filter(env) obs, infos = env.reset() walkthrough = infos["extra.walkthrough"] # Make sure we start with listing the inventory. if walkthrough[0] != "inventory": walkthrough = ["inventory"] + walkthrough # Add 'restart' command as a way to indicate the beginning of the game. walkthrough = ["restart"] + walkthrough dataset = [] done = False facts_seen = set() for i, cmd in enumerate(walkthrough): last_facts = facts_seen if i > 0: # != "restart" obs, _, done, infos = env.step(cmd) facts_seen = process_facts(last_facts, infos["game"], infos["facts"], infos["last_action"], cmd) dataset += [{ "game": os.path.basename(gamefile), "step": (i, 0), "observation": preproc(obs, tokenizer=tokenizer), "previous_action": cmd.lower(), "target_commands": sorted( gen_graph_commands(facts_seen - last_facts, cmd="add") + gen_graph_commands(last_facts - facts_seen, cmd="delete")), "previous_graph_seen": sorted(serialize_facts(last_facts)), "graph_seen": sorted(serialize_facts(facts_seen)), }] if done: break # Stop collecting data if game is done. # Fork the current game & seen facts. env_ = env.copy() facts_seen_ = facts_seen # Then, take N random actions. for j in range(1, branching_depth + 1): commands = [ c for c in infos["admissible_commands"] if ((c == "examine cookbook" or c.split()[0] not in commands_to_ignore) and (i + 1) != len(walkthrough) and c != walkthrough[i + 1]) ] if len(commands) == 0: break cmd_ = rng.choice(commands) obs, _, done, infos = env_.step(cmd_) if done: break # Stop collecting data if game is done. last_facts_ = facts_seen_ facts_seen_ = process_facts(last_facts_, infos["game"], infos["facts"], infos["last_action"], cmd_) dataset += [{ "game": os.path.basename(gamefile), "step": (i, j), "observation": preproc(obs, tokenizer=tokenizer), "previous_action": cmd_.lower(), "target_commands": sorted( gen_graph_commands(facts_seen_ - last_facts_, cmd="add") + gen_graph_commands(last_facts_ - facts_seen_, cmd="delete")), "previous_graph_seen": sorted(serialize_facts(last_facts_)), "graph_seen": sorted(serialize_facts(facts_seen_)), }] return gamefile, dataset