示例#1
0
 def _build_vocab(self):
     data = self.source_text + self.target_text
     self.source_idx2token, self.source_token2idx, self.source_vocab_size = build_vocab(
         data, self.source_vocab_size, self.special_token_list)
     self.target_idx2token, self.target_token2idx, self.target_vocab_size = self.source_idx2token, self.source_token2idx, self.source_vocab_size
     entity_data = [[[entity.split() for entity in doc] for doc in group]
                    for group in self.source_entity]
     self.source_entity_idx2token, self.source_entity_token2idx, _ = build_vocab(
         entity_data, self.source_vocab_size, [])
     self.source_relation_idx2token, self.source_relation_token2idx, _ = build_vocab(
         self.relation, self.source_vocab_size, [])
示例#2
0
 def _build_vocab(self):
     if self.share_vocab:
         assert self.source_vocab_size == self.target_vocab_size
         text_data = self.source_text + self.target_text
         self.source_idx2token, self.source_token2idx, self.source_vocab_size = build_vocab(
             text_data, self.source_vocab_size, self.special_token_list)
         self.target_idx2token, self.target_token2idx, self.target_vocab_size = self.source_idx2token, self.source_token2idx, self.source_vocab_size
     else:
         self.source_idx2token, self.source_token2idx, self.source_vocab_size = build_vocab(
             self.source_text, self.source_vocab_size,
             self.special_token_list)
         self.target_idx2token, self.target_token2idx, self.target_vocab_size = build_vocab(
             self.target_text, self.target_vocab_size,
             self.special_token_list)
 def _build_vocab(self):
     self.source_key_idx2token, self.source_key_token2idx, self.source_key_vocab_size = build_vocab(
         self.source_key_text, self.source_vocab_size,
         self.special_token_list)
     data = self.source_value_text + self.target_text
     self.source_idx2token, self.source_token2idx, self.source_vocab_size = build_vocab(
         data, self.source_vocab_size, self.special_token_list)
     self.target_idx2token, self.target_token2idx, self.target_vocab_size = self.source_idx2token, self.source_token2idx, self.source_vocab_size
 def _build_vocab(self):
     if self.share_vocab:
         assert self.source_language == self.target_language
         text_data = self.source_text_data + self.target_text_data
         self.source_idx2token, self.source_token2idx, self.max_source_vocab_size = build_vocab(
             text_data, self.max_source_vocab_size, self.special_token_list)
         self.target_idx2token, self.target_token2idx = self.source_idx2token, self.source_token2idx
     else:
         self.source_idx2token, self.source_token2idx, self.max_source_vocab_size = build_vocab(
             self.source_text_data, self.max_source_vocab_size,
             self.special_token_list)
         self.target_idx2token, self.target_token2idx, self.max_target_vocab_size = build_vocab(
             self.target_text_data, self.max_target_vocab_size,
             self.special_token_list)
 def _build_vocab(self):
     self.idx2token, self.token2idx, self.max_vocab_size = build_vocab(
         self.text_data, self.max_vocab_size, self.special_token_list)
示例#6
0
 def _build_vocab(self):
     self.source_idx2token, self.source_token2idx = build_attribute_vocab(
         self.source_text)
     self.target_idx2token, self.target_token2idx, self.target_vocab_size = build_vocab(
         self.target_text, self.target_vocab_size, self.special_token_list)
示例#7
0
 def _build_vocab(self):
     text_data = self.group_text_data[0] + self.group_text_data[
         1] + self.group_text_data[2]
     self.idx2token, self.token2idx, self.max_vocab_size = build_vocab(
         text_data, self.max_vocab_size, self.special_token_list)
示例#8
0
 def _build_vocab(self):
     self.target_idx2token, self.target_token2idx, self.target_vocab_size = build_vocab(
         self.target_text, self.target_vocab_size, self.special_token_list)