Exemplo n.º 1
0
  def export_inputs(self):
    """Inputs for exported model."""
    vocab_dict = load_vocab_dict(self.text_vocab_file_path)
    vocab_size = len(vocab_dict)
    self.config['data']['vocab_size'] = vocab_size

    input_sent_left = tf.placeholder(
        shape=(None,), dtype=tf.string, name="input_sent_left")
    input_sent_right = tf.placeholder(
        shape=(None,), dtype=tf.string, name="input_sent_right")
    input_pipeline_func = self.get_input_pipeline(for_export=True)

    token_ids_left = input_pipeline_func(input_sent_left)
    token_ids_right = input_pipeline_func(input_sent_right)
    token_ids_len_left = tf.map_fn(
        lambda x: compute_sen_lens(x, padding_token=0), token_ids_left)
    token_ids_len_right = tf.map_fn(
        lambda x: compute_sen_lens(x, padding_token=0), token_ids_right)

    export_data = {
        "export_inputs": {
            "input_sent_left": input_sent_left,
            "input_sent_right": input_sent_right,
        },
        "model_inputs": {
            "input_x_left": token_ids_left,
            "input_x_right": token_ids_right,
            "input_x_left_len": token_ids_len_left,
            "input_x_right_len": token_ids_len_right,
            "input_x_len": [token_ids_len_left, token_ids_len_right]
        }
    }
    return export_data
Exemplo n.º 2
0
    def export_inputs(self):
        """Inputs for exported model."""
        vocab_dict = load_vocab_dict(self.text_vocab_file_path)
        vocab_size = len(vocab_dict)
        label_vocab_dict = load_vocab_dict(self.label_vocab_file_paths[0])
        label_vocab_size = len(label_vocab_dict)
        self.config['data']['vocab_size'] = vocab_size
        self.config['data']['label_vocab_size'] = label_vocab_size

        input_sentence = tf.placeholder(shape=(None, ),
                                        dtype=tf.string,
                                        name="input_sentence")

        input_pipeline_func = self.get_input_pipeline(for_export=True)

        token_ids = input_pipeline_func(input_sentence)
        token_ids_len = tf.map_fn(
            lambda x: compute_sen_lens(x, padding_token=0), token_ids)

        export_data = {
            "export_inputs": {
                "input_sentence": input_sentence
            },
            "model_inputs": {
                "input_enc_x": token_ids,
                "input_x_len": token_ids_len
            }
        }

        return export_data
Exemplo n.º 3
0
  def _dynamic_pooling_index(self, length_left, length_right,
                             fixed_length_left: int, fixed_length_right: int,
                             compress_ratio_left: float,
                             compress_ratio_right: float) -> tf.Tensor:

    def _dpool_index(one_length_left, one_length_right, fixed_length_left,
                     fixed_length_right):

      logging.info("fixed_length_left: {}".format(fixed_length_left))
      logging.info("fixed_length_right: {}".format(fixed_length_right))

      if one_length_left == 0:
        stride_left = fixed_length_left
      else:
        stride_left = 1.0 * fixed_length_left / tf.cast(
            one_length_left, dtype=tf.float32)

      if one_length_right == 0:
        stride_right = fixed_length_right
      else:
        stride_right = 1.0 * fixed_length_right / tf.cast(
            one_length_right, dtype=tf.float32)

      one_idx_left = [
          tf.cast(i / stride_left, dtype=tf.int32)
          for i in range(fixed_length_left)
      ]
      one_idx_right = [
          tf.cast(i / stride_right, dtype=tf.int32)
          for i in range(fixed_length_right)
      ]
      mesh1, mesh2 = tf.meshgrid(one_idx_left, one_idx_right)
      index_one = tf.transpose(tf.stack([mesh1, mesh2]), (2, 1, 0))
      return index_one

    index = []
    dpool_bias_left = dpool_bias_right = 0
    if fixed_length_left % compress_ratio_left != 0:
      dpool_bias_left = 1
    if fixed_length_right % compress_ratio_right != 0:
      dpool_bias_right = 1
    cur_fixed_length_left = int(
        fixed_length_left // compress_ratio_left) + dpool_bias_left
    cur_fixed_length_right = int(
        fixed_length_right // compress_ratio_right) + dpool_bias_right
    logging.info("length_left: {}".format(length_left))
    logging.info("length_right: {}".format(length_right))
    logging.info("cur_fixed_length_left: {}".format(cur_fixed_length_left))
    logging.info("cur_fixed_length_right: {}".format(cur_fixed_length_right))

    index = tf.map_fn(
        lambda x: _dpool_index(x[0], x[1], cur_fixed_length_left,
                               cur_fixed_length_right),
        (length_left, length_right),
        dtype=tf.int32)

    logging.info("index: {}".format(index))

    return index
Exemplo n.º 4
0
 def pad_to_hier_input_true_len(inputs,
                                max_doc_len,
                                max_sen_len,
                                split_token,
                                padding_token=0):
   """
   Input shape: [batch_size, max_len]
   New Input shape: [batch_size, max_doc_len, max_sen_len]
   """
   new_input = tf.map_fn(
       lambda x: split_one_doc_to_true_len_sens(
           x, split_token, padding_token, max_doc_len, max_sen_len), inputs)
   return new_input
Exemplo n.º 5
0
    def export_inputs(self):
        """Inputs for exported model."""
        vocab_dict = load_vocab_dict(self.text_vocab_file_path)
        vocab_size = len(vocab_dict)
        if self.use_true_length and self.split_token != "":
            if self.split_token not in vocab_dict:
                raise ValueError(
                    "The Model uses split token: {}, not in corpus.".format(
                        self.split_token))
            self.config['data']['split_token'] = int(
                vocab_dict[self.split_token])
        self.config['data']['vocab_size'] = vocab_size

        input_sentence = tf.placeholder(shape=(None, ),
                                        dtype=tf.string,
                                        name="input_sentence")

        input_pipeline_func = self.get_input_pipeline(for_export=True)

        token_ids = input_pipeline_func(input_sentence)
        token_ids_len = tf.map_fn(
            lambda x: compute_sen_lens(x, padding_token=0), token_ids)

        export_data = {
            "export_inputs": {
                "input_sentence": input_sentence
            },
            "model_inputs": {
                "input_x": token_ids,
                "input_x_len": token_ids_len
            }
        }

        if self.use_dense:
            input_dense = tf.placeholder(shape=(None, ),
                                         dtype=tf.float32,
                                         name="input_dense")
            export_data["export_inputs"]["input_dense"] = input_dense

        return export_data