def prepare_text_vocab(self, all_texts): """Preparing text vocab""" if os.path.exists(self.text_vocab_file_path) and \ self.use_custom_vocab: logging.info("Reuse text vocab file: {}".format( self.text_vocab_file_path)) else: prepare_vocab(self.text_vocab_file_path, all_texts, min_frequency=self.vocab_min_frequency) logging.info("Generate text vocab file: {}".format( self.text_vocab_file_path))
def prepare_label_vocab(self, all_labels): """Prepare label vocab""" for i in range(self.output_num): if os.path.exists(self.label_vocab_file_paths[i]) and \ self.use_custom_vocab: logging.info("Reuse label vocab file: {}".format( self.label_vocab_file_paths[i])) else: prepare_vocab(self.label_vocab_file_paths[i], all_labels[i], min_frequency=self.vocab_min_frequency, use_default_dict=True) logging.info("Generate label vocab file: {}".format( self.label_vocab_file_paths[i]))
def _prepare_vocabs(self, all_texts, all_labels): """Preparing vocab for x.""" logging.info("Preparing vocab for x ...") prepare_vocab(self.text_vocab_file_path, all_texts, min_frequency=self.vocab_min_frequency) logging.info("Preparing vocab for y ...") if "vocab" in self.config["data"]["task"]["classes"]: prepare_vocab_from_config(self.label_vocab_file_path, self.config) else: prepare_vocab(self.label_vocab_file_path, all_labels, min_frequency=1, use_default_dict=False)
def prepare_label_vocab(self, all_labels): """Prepare label vocab""" for i in range(self.output_num): if os.path.exists(self.label_vocab_file_paths[i]) and \ self.use_custom_vocab: logging.info("Reuse label vocab file: {}".format( self.label_vocab_file_paths[i])) else: if "vocab" in self.config["data"]["task"]["classes"]: output_index = i if self.multi_output else None prepare_vocab_from_config(self.label_vocab_file_paths[i], self.config, output_index=output_index) else: prepare_vocab(self.label_vocab_file_paths[i], all_labels[i], min_frequency=1, use_default_dict=False) logging.info("Generate label vocab file: {}".format( self.label_vocab_file_paths[i]))