示例#1
0
    def update(self):
        """
        Update neural model in agent. In this example we follow algorithm
        of updating model in dqn with replay memory.

        """
        if len(self.replay_memory) < self.replay_batch_size:
            return None
        transitions = self.replay_memory.sample(self.replay_batch_size)
        batch = Transition(*zip(*transitions))

        observation_id_list = pad_sequences(
            batch.observation_id_list,
            maxlen=max_len(batch.observation_id_list)).astype('int32')
        input_observation = to_pt(observation_id_list, self.use_cuda)
        next_observation_id_list = pad_sequences(
            batch.next_observation_id_list,
            maxlen=max_len(batch.next_observation_id_list)).astype('int32')
        next_input_observation = to_pt(next_observation_id_list, self.use_cuda)
        chosen_indices = list(list(zip(*batch.word_indices)))
        chosen_indices = [torch.stack(item, 0)
                          for item in chosen_indices]  # list of batch x 1

        word_ranks = self.infer_word_ranks(
            input_observation
        )  # list of batch x vocab, len=5 (one per potential output word)
        word_qvalues = [
            w_rank.gather(1, idx).squeeze(-1)
            for w_rank, idx in zip(word_ranks, chosen_indices)
        ]  # list of batch
        q_value = torch.mean(torch.stack(word_qvalues, -1), -1)  # batch

        next_word_ranks = self.infer_word_ranks(
            next_input_observation
        )  # batch x n_verb, batch x n_noun, batchx n_second_noun
        next_word_masks = list(list(zip(*batch.next_word_masks)))
        next_word_masks = [np.stack(item, 0) for item in next_word_masks]
        next_word_qvalues, _ = _choose_maxQ_command(next_word_ranks,
                                                    next_word_masks,
                                                    self.use_cuda)
        next_q_value = torch.mean(torch.stack(next_word_qvalues, -1),
                                  -1)  # batch
        next_q_value = next_q_value.detach()

        rewards = torch.stack(batch.reward)  # batch
        not_done = 1.0 - np.array(batch.done, dtype='float32')  # batch
        not_done = to_pt(not_done, self.use_cuda, type='float')
        rewards = rewards + not_done * next_q_value * self.discount_gamma  # batch
        mask = torch.stack(batch.mask)  # batch
        loss = F.smooth_l1_loss(q_value * mask, rewards * mask)
        return loss
示例#2
0
    def get_game_step_info(self, obs: List[str], infos: Dict[str, List[Any]]):
        """
        Get all the available information, and concat them together to be tensor for
        a neural model. we use post padding here, all information are tokenized here.

        Arguments:
            obs: Previous command's feedback for each game.
            infos: Additional information for each game.
        """
        inventory_token_list = [preproc(item, tokenizer=self.nlp) for item in infos["inventory"]]
        inventory_id_list = [_words_to_ids(tokens, self.word2id) for tokens in inventory_token_list]

        feedback_token_list = [preproc(item, str_type='feedback', tokenizer=self.nlp) for item in obs]
        feedback_id_list = [_words_to_ids(tokens, self.word2id) for tokens in feedback_token_list]

        quest_token_list = [preproc(item, tokenizer=self.nlp) for item in infos["extra.recipe"]]
        quest_id_list = [_words_to_ids(tokens, self.word2id) for tokens in quest_token_list]

        prev_action_token_list = [preproc(item, tokenizer=self.nlp) for item in self.prev_actions]
        prev_action_id_list = [_words_to_ids(tokens, self.word2id) for tokens in prev_action_token_list]

        description_token_list = [preproc(item, tokenizer=self.nlp) for item in infos["description"]]
        for i, d in enumerate(description_token_list):
            if len(d) == 0:
                description_token_list[i] = ["end"]  # if empty description, insert word "end"
        description_id_list = [_words_to_ids(tokens, self.word2id) for tokens in description_token_list]
        description_id_list = [_d + _i + _q + _f + _pa for (_d, _i, _q, _f, _pa) in zip(description_id_list, inventory_id_list, quest_id_list, feedback_id_list, prev_action_id_list)]

        input_description = pad_sequences(description_id_list, maxlen=max_len(description_id_list)).astype('int32')
        input_description = to_pt(input_description, self.use_cuda)

        return input_description, description_id_list
示例#3
0
    def get_qa_loss(self):
        """
        Update neural model in agent. In this example we follow algorithm
        of updating model in dqn with replay memory.
        """
        if len(self.qa_replay_memory) < self.replay_batch_size:
            return None
        transitions = self.qa_replay_memory.sample(self.replay_batch_size)
        batch = qa_Transition(*zip(*transitions))

        answer_distribution, obs_mask = self.answer_question(
            batch.observation_list, batch.quest_list,
            use_model="online")  # answer_distribution is batch x time x 2
        answer_distribution = masked_softmax(answer_distribution,
                                             obs_mask.unsqueeze(-1),
                                             axis=1)

        answer_strings = [item[0] for item in batch.answer_strings]
        groundtruth_answer_positions = get_answer_position(
            batch.observation_list, answer_strings)  # list: batch x 2
        groundtruth = pad_sequences(groundtruth_answer_positions).astype(
            'int32')
        groundtruth = to_pt(groundtruth, self.use_cuda)  # batch x 2
        batch_loss = NegativeLogLoss(
            answer_distribution * obs_mask.unsqueeze(-1), groundtruth)

        return torch.mean(batch_loss)
示例#4
0
文件: agent.py 项目: yyht/qait_public
 def get_agent_inputs(self, string_list):
     sentence_token_list = [item.split() for item in string_list]
     sentence_id_list = [
         _words_to_ids(tokens, self.word2id)
         for tokens in sentence_token_list
     ]
     input_sentence_char = list_of_token_list_to_char_input(
         sentence_token_list, self.char2id)
     input_sentence = pad_sequences(
         sentence_id_list, maxlen=max_len(sentence_id_list)).astype('int32')
     input_sentence = to_pt(input_sentence, self.use_cuda)
     input_sentence_char = to_pt(input_sentence_char, self.use_cuda)
     return input_sentence, input_sentence_char, sentence_id_list
示例#5
0
    def get_game_step_info(self, obs: List[str], infos: Dict[str, List[Any]]):
        """
        Get all the available information, and concat them together to be tensor for
        a neural model. we use post padding here, all information are tokenized here.

        Arguments:
            obs: Previous command's feedback for each game.
            infos: Additional information for each game.
        """
        word2id = self.vocab.word2id
        inventory_id_list = get_token_ids_for_items(infos["inventory"],
                                                    word2id,
                                                    tokenizer=self.nlp)

        feedback_id_list = get_token_ids_for_items(obs,
                                                   word2id,
                                                   tokenizer=self.nlp)

        quest_id_list = get_token_ids_for_items(infos["extra.recipe"],
                                                word2id,
                                                tokenizer=self.nlp)

        prev_action_id_list = get_token_ids_for_items(self.prev_actions,
                                                      word2id,
                                                      tokenizer=self.nlp)

        description_id_list = get_token_ids_for_items(infos["description"],
                                                      word2id,
                                                      tokenizer=self.nlp,
                                                      subst_if_empty=['end'])

        description_id_list = [
            _d + _i + _q + _f + _pa
            for (_d, _i, _q, _f, _pa
                 ) in zip(description_id_list, inventory_id_list,
                          quest_id_list, feedback_id_list, prev_action_id_list)
        ]

        input_description = pad_sequences(
            description_id_list,
            maxlen=max_len(description_id_list)).astype('int32')
        input_description = to_pt(input_description, self.use_cuda)

        return input_description, description_id_list
示例#6
0
def get_sufficient_info_reward_attribute(reward_helper_info):
    asked_entities = reward_helper_info["_entities"]
    asked_attributes = reward_helper_info["_attributes"]
    init_game_facts = reward_helper_info["init_game_facts"]
    full_facts = reward_helper_info["full_facts"]
    answers = reward_helper_info["answers"]
    game_facts_per_step = reward_helper_info[
        "game_facts_per_step"]  # batch x game step+1
    commands_per_step = reward_helper_info[
        "commands_per_step"]  # batch x game step+1
    game_finishing_mask = reward_helper_info[
        "game_finishing_mask"]  # game step x batch size
    rewards = []
    coverage_rewards = []
    seen_entity_reward = []
    for i in range(len(asked_entities)):  # Iterate over batch
        reward = check_reasoning_path_reward_sequence(asked_entities[i],
                                                      asked_attributes[i],
                                                      game_facts_per_step[i],
                                                      commands_per_step[i],
                                                      bool(int(answers[i])))
        rewards.append(reward)

        # add coverage
        end_facts = set(
        )  # world discovered so far = union of observing game facts of all steps
        for t in range(len(game_facts_per_step[i])):
            end_facts = end_facts | set(game_facts_per_step[i][t])
        coverage = exploration_coverage(full_facts[i], end_facts,
                                        init_game_facts[i])
        coverage_rewards.append(coverage)

        seen_entities = set(name for f in end_facts for name in f.names)
        seen_entity_reward.append(1.0 if asked_entities[i] in
                                  seen_entities else 0.0)

    res = pad_sequences(rewards, dtype="float32")  # batch x game step
    res = res * game_finishing_mask.T
    coverage_rewards = np.array(coverage_rewards)
    seen_entity_reward = np.array(seen_entity_reward)
    res = res + game_finishing_mask.T * np.expand_dims(
        coverage_rewards + seen_entity_reward, axis=-1) * 0.1
    return res  # batch x game step
示例#7
0
文件: agent.py 项目: yyht/qait_public
    def answer_question(self,
                        input_observation,
                        input_observation_char,
                        observation_id_list,
                        input_quest,
                        input_quest_char,
                        use_model="online"):
        # first pad answerer_input, and get the mask
        model = self.online_net if use_model == "online" else self.target_net
        batch_size = len(observation_id_list)
        max_length = input_observation.size(1)
        mask = compute_mask(input_observation)  # batch x obs_len

        # noun mask for location question
        if self.question_type in ["location"]:
            location_mask = []
            for i in range(batch_size):
                m = [1 for item in observation_id_list[i]]
                location_mask.append(m)
            location_mask = pad_sequences(location_mask,
                                          maxlen=max_length,
                                          dtype="float32")
            location_mask = to_pt(location_mask,
                                  enable_cuda=self.use_cuda,
                                  type='float')
            assert mask.size() == location_mask.size()
            mask = mask * location_mask

        match_representation_sequence = self.get_match_representations(
            input_observation,
            input_observation_char,
            input_quest,
            input_quest_char,
            use_model=use_model)
        pred = model.answer_question(match_representation_sequence,
                                     mask)  # batch x vocab or batch x 2

        # attention sum:
        # sometimes certain word appears multiple times in the observation,
        # thus we need to merge them together before doing further computations
        # ------- but
        # if answer type is not pointing, we just use a pre-defined mapping
        # that maps 0/1 to their positions in vocab
        if self.answer_type == "2 way":
            observation_id_list = []
            max_length = 2
            for i in range(batch_size):
                observation_id_list.append(
                    [self.word2id["0"], self.word2id["1"]])

        observation = to_pt(
            pad_sequences(observation_id_list,
                          maxlen=max_length).astype('int32'), self.use_cuda)
        vocab_distribution = np.zeros(
            (batch_size, len(self.word_vocab)))  # batch x vocab
        vocab_distribution = to_pt(vocab_distribution,
                                   self.use_cuda,
                                   type='float')
        vocab_distribution = vocab_distribution.scatter_add_(
            1, observation, pred)  # batch x vocab
        non_zero_words = []
        for i in range(batch_size):
            non_zero_words.append(list(set(observation_id_list[i])))
        vocab_mask = torch.ne(vocab_distribution, 0).float()
        return vocab_distribution, non_zero_words, vocab_mask