def export_inputs(self): """Inputs for exported model.""" vocab_dict = load_vocab_dict(self.text_vocab_file_path) vocab_size = len(vocab_dict) self.config['data']['vocab_size'] = vocab_size input_sent_left = tf.placeholder( shape=(None,), dtype=tf.string, name="input_sent_left") input_sent_right = tf.placeholder( shape=(None,), dtype=tf.string, name="input_sent_right") input_pipeline_func = self.get_input_pipeline(for_export=True) token_ids_left = input_pipeline_func(input_sent_left) token_ids_right = input_pipeline_func(input_sent_right) token_ids_len_left = tf.map_fn( lambda x: compute_sen_lens(x, padding_token=0), token_ids_left) token_ids_len_right = tf.map_fn( lambda x: compute_sen_lens(x, padding_token=0), token_ids_right) export_data = { "export_inputs": { "input_sent_left": input_sent_left, "input_sent_right": input_sent_right, }, "model_inputs": { "input_x_left": token_ids_left, "input_x_right": token_ids_right, "input_x_left_len": token_ids_len_left, "input_x_right_len": token_ids_len_right, "input_x_len": [token_ids_len_left, token_ids_len_right] } } return export_data
def export_inputs(self): """Inputs for exported model.""" vocab_dict = load_vocab_dict(self.text_vocab_file_path) vocab_size = len(vocab_dict) label_vocab_dict = load_vocab_dict(self.label_vocab_file_paths[0]) label_vocab_size = len(label_vocab_dict) self.config['data']['vocab_size'] = vocab_size self.config['data']['label_vocab_size'] = label_vocab_size input_sentence = tf.placeholder(shape=(None, ), dtype=tf.string, name="input_sentence") input_pipeline_func = self.get_input_pipeline(for_export=True) token_ids = input_pipeline_func(input_sentence) token_ids_len = tf.map_fn( lambda x: compute_sen_lens(x, padding_token=0), token_ids) export_data = { "export_inputs": { "input_sentence": input_sentence }, "model_inputs": { "input_enc_x": token_ids, "input_x_len": token_ids_len } } return export_data
def _dynamic_pooling_index(self, length_left, length_right, fixed_length_left: int, fixed_length_right: int, compress_ratio_left: float, compress_ratio_right: float) -> tf.Tensor: def _dpool_index(one_length_left, one_length_right, fixed_length_left, fixed_length_right): logging.info("fixed_length_left: {}".format(fixed_length_left)) logging.info("fixed_length_right: {}".format(fixed_length_right)) if one_length_left == 0: stride_left = fixed_length_left else: stride_left = 1.0 * fixed_length_left / tf.cast( one_length_left, dtype=tf.float32) if one_length_right == 0: stride_right = fixed_length_right else: stride_right = 1.0 * fixed_length_right / tf.cast( one_length_right, dtype=tf.float32) one_idx_left = [ tf.cast(i / stride_left, dtype=tf.int32) for i in range(fixed_length_left) ] one_idx_right = [ tf.cast(i / stride_right, dtype=tf.int32) for i in range(fixed_length_right) ] mesh1, mesh2 = tf.meshgrid(one_idx_left, one_idx_right) index_one = tf.transpose(tf.stack([mesh1, mesh2]), (2, 1, 0)) return index_one index = [] dpool_bias_left = dpool_bias_right = 0 if fixed_length_left % compress_ratio_left != 0: dpool_bias_left = 1 if fixed_length_right % compress_ratio_right != 0: dpool_bias_right = 1 cur_fixed_length_left = int( fixed_length_left // compress_ratio_left) + dpool_bias_left cur_fixed_length_right = int( fixed_length_right // compress_ratio_right) + dpool_bias_right logging.info("length_left: {}".format(length_left)) logging.info("length_right: {}".format(length_right)) logging.info("cur_fixed_length_left: {}".format(cur_fixed_length_left)) logging.info("cur_fixed_length_right: {}".format(cur_fixed_length_right)) index = tf.map_fn( lambda x: _dpool_index(x[0], x[1], cur_fixed_length_left, cur_fixed_length_right), (length_left, length_right), dtype=tf.int32) logging.info("index: {}".format(index)) return index
def pad_to_hier_input_true_len(inputs, max_doc_len, max_sen_len, split_token, padding_token=0): """ Input shape: [batch_size, max_len] New Input shape: [batch_size, max_doc_len, max_sen_len] """ new_input = tf.map_fn( lambda x: split_one_doc_to_true_len_sens( x, split_token, padding_token, max_doc_len, max_sen_len), inputs) return new_input
def export_inputs(self): """Inputs for exported model.""" vocab_dict = load_vocab_dict(self.text_vocab_file_path) vocab_size = len(vocab_dict) if self.use_true_length and self.split_token != "": if self.split_token not in vocab_dict: raise ValueError( "The Model uses split token: {}, not in corpus.".format( self.split_token)) self.config['data']['split_token'] = int( vocab_dict[self.split_token]) self.config['data']['vocab_size'] = vocab_size input_sentence = tf.placeholder(shape=(None, ), dtype=tf.string, name="input_sentence") input_pipeline_func = self.get_input_pipeline(for_export=True) token_ids = input_pipeline_func(input_sentence) token_ids_len = tf.map_fn( lambda x: compute_sen_lens(x, padding_token=0), token_ids) export_data = { "export_inputs": { "input_sentence": input_sentence }, "model_inputs": { "input_x": token_ids, "input_x_len": token_ids_len } } if self.use_dense: input_dense = tf.placeholder(shape=(None, ), dtype=tf.float32, name="input_dense") export_data["export_inputs"]["input_dense"] = input_dense return export_data