def generate_data(self): """Generate data for offline training.""" if self.infer_without_label: column_num = 1 text_ds = load_textline_dataset(self.paths_after_pre_process, column_num) else: column_num = 3 intent_label_ds, slots_label_ds, text_ds = load_textline_dataset( self.paths_after_pre_process, column_num) logging.info("Loading text dataset...") input_pipeline_func = self.get_input_pipeline(for_export=False) text_ds = text_ds.map( input_pipeline_func, num_parallel_calls=self.num_parallel_calls) text_size_ds = text_ds.map( lambda x: compute_sen_lens(x, padding_token=0), num_parallel_calls=self.num_parallel_calls) text_ds = tf.data.Dataset.zip((text_ds, text_size_ds)) if self.infer_without_label: data_set = text_ds else: intent_label_ds = process_one_label_dataset( intent_label_ds, self.config, output_index=0) slots_label_ds = process_multi_label_dataset( slots_label_ds, self.config, output_index=1) data_set = tf.data.Dataset.zip((text_ds, intent_label_ds, slots_label_ds)) self.config['data']['vocab_size'] = get_vocab_size( self.text_vocab_file_path) self.config['data']['{}_data_size'.format(self.mode)] = get_file_len( self.paths_after_pre_process) return data_set
def generate_data(self): """Generate data for offline training.""" text, (intent_label, slots_label) = load_nlu_joint_raw_data( paths=self.paths_after_pre_process, mode=self.mode) text_placeholder = tf.placeholder(tf.string, name="text") intent_label_placeholder = tf.placeholder(tf.string, name="intent_label") slots_label_placeholder = tf.placeholder(tf.string, name="slots_label") self.init_feed_dict[text_placeholder] = text self.init_feed_dict[intent_label_placeholder] = intent_label self.init_feed_dict[slots_label_placeholder] = slots_label text_ds = self.load_text_dataset(text_placeholder) if self.infer_without_label: data_set = text_ds else: intent_label_ds = load_one_label_dataset(intent_label, self.config, output_index=0) slots_label_ds = load_multi_label_dataset(slots_label, self.config, output_index=1) data_set = tf.data.Dataset.zip( (text_ds, intent_label_ds, slots_label_ds)) self.config['data']['vocab_size'] = get_vocab_size( self.text_vocab_file_path) self.config['data']['{}_data_size'.format(self.mode)] = len(text) return data_set
def generate_data(self): """Generate data for offline training.""" paths = self.paths if self.infer_without_label: self.column_num = 1 text_ds = load_textline_dataset(paths, self.column_num) else: self.column_num = 2 label_ds, text_ds = load_textline_dataset(paths, self.column_num) logging.info("process text ds...") input_pipeline_func = self.get_input_pipeline(for_export=False) text_ds = text_ds.map(input_pipeline_func, num_parallel_calls=self.num_parallel_calls) text_size_ds = text_ds.map( lambda x: compute_sen_lens(x, padding_token=0), num_parallel_calls=self.num_parallel_calls) text_ds = tf.data.Dataset.zip((text_ds, text_size_ds)) logging.info("process label ds...") if self.infer_without_label: data_set = text_ds else: label_ds = process_multi_label_dataset(label_ds, self.config) data_set = tf.data.Dataset.zip((text_ds, label_ds)) self.config['data']['vocab_size'] = get_vocab_size( self.text_vocab_file_path) self.config['data']['{}_data_size'.format(self.mode)] = get_file_len( self.paths) return data_set
def generate_data(self): """Generate data for offline training.""" text, label = load_seq_label_raw_data( paths=self.paths, mode=self.mode, infer_no_label=self.infer_no_label) text_placeholder = tf.placeholder(tf.string, name="text") label_placeholder = tf.placeholder(tf.string, name="label") self.init_feed_dict[text_placeholder] = text self.init_feed_dict[label_placeholder] = label text_ds = self.load_text_dataset(text_placeholder) if self.infer_without_label: data_set = text_ds else: label_ds = load_multi_label_dataset(label_placeholder, self.config) data_set = tf.data.Dataset.zip((text_ds, label_ds)) self.config['data']['vocab_size'] = get_vocab_size( self.text_vocab_file_path) self.config['data']['{}_data_size'.format(self.mode)] = len(text) return data_set
def generate_data(self): """Generate data for offline training.""" text, label = load_seq_label_raw_data( paths=self.paths, mode=self.mode, infer_no_label=self.infer_no_label) text_ds = self.load_text_dataset(text) if self.infer_without_label: data_set = text_ds else: label_ds = load_multi_label_dataset(label, self.config) data_set = tf.data.Dataset.zip((text_ds, label_ds)) self.config['data']['vocab_size'] = get_vocab_size( self.text_vocab_file_path) self.config['data']['{}_data_size'.format(self.mode)] = len(text) return data_set
def export_inputs(self): """Inputs for exported model.""" self.config['data']['vocab_size'] = get_vocab_size( self.text_vocab_file_path) input_sentence = tf.placeholder( shape=(None,), dtype=tf.string, name="input_sentence") input_pipeline_func = self.get_input_pipeline(for_export=True) token_ids = input_pipeline_func(input_sentence) token_ids_len = tf.map_fn(lambda x: compute_sen_lens(x, padding_token=0), token_ids) export_data = { "export_inputs": { "input_sentence": input_sentence }, "model_inputs": { "input_x": token_ids, "input_x_len": token_ids_len } } return export_data