Exemplo n.º 1
0
def create_test_iterator(hparams, mode):
    """Create test iterator."""
    src_vocab_table = lookup_ops.index_table_from_tensor(
        tf.constant([hparams.eos, "a", "b", "c", "d"]))
    tgt_vocab_mapping = tf.constant([hparams.sos, hparams.eos, "a", "b", "c"])
    tgt_vocab_table = lookup_ops.index_table_from_tensor(tgt_vocab_mapping)
    if mode == tf.contrib.learn.ModeKeys.INFER:
        reverse_tgt_vocab_table = lookup_ops.index_to_string_table_from_tensor(
            tgt_vocab_mapping)

    src_dataset = tf.data.Dataset.from_tensor_slices(
        tf.constant(["a a b b c", "a b b"]))

    if mode != tf.contrib.learn.ModeKeys.INFER:
        tgt_dataset = tf.data.Dataset.from_tensor_slices(
            tf.constant(["a b c b c", "a b c b"]))
        return (iterator_utils.get_iterator(
            src_dataset=src_dataset,
            tgt_dataset=tgt_dataset,
            src_vocab_table=src_vocab_table,
            tgt_vocab_table=tgt_vocab_table,
            batch_size=hparams.batch_size,
            sos=hparams.sos,
            eos=hparams.eos,
            source_reverse=hparams.source_reverse,
            random_seed=hparams.random_seed,
            num_buckets=hparams.num_buckets), src_vocab_table, tgt_vocab_table)
    else:
        return (iterator_utils.get_infer_iterator(
            src_dataset=src_dataset,
            src_vocab_table=src_vocab_table,
            eos=hparams.eos,
            source_reverse=hparams.source_reverse,
            batch_size=hparams.batch_size), src_vocab_table, tgt_vocab_table,
                reverse_tgt_vocab_table)
    def testGetInferIterator(self):
        src_vocab_table = lookup_ops.index_table_from_tensor(
            tf.constant(["a", "b", "c", "eos", "sos"]))
        src_dataset = tf.data.Dataset.from_tensor_slices(
            tf.constant(["c c a", "c a", "d", "f e a g"]))
        hparams = tf.contrib.training.HParams(random_seed=3,
                                              source_reverse=False,
                                              eos="eos",
                                              sos="sos")
        batch_size = 2
        src_max_len = 3
        iterator = iterator_utils.get_infer_iterator(
            src_dataset=src_dataset,
            src_vocab_table=src_vocab_table,
            batch_size=batch_size,
            eos=hparams.eos,
            source_reverse=hparams.source_reverse,
            src_max_len=src_max_len)
        table_initializer = tf.tables_initializer()
        source = iterator.source
        seq_len = iterator.source_sequence_length
        self.assertEqual([None, None], source.shape.as_list())
        self.assertEqual([None], seq_len.shape.as_list())
        with self.test_session() as sess:
            sess.run(table_initializer)
            sess.run(iterator.initializer)

            (source_v, seq_len_v) = sess.run((source, seq_len))
            self.assertAllEqual(
                [
                    [2, 2, 0],  # c c a
                    [2, 0, 3]
                ],  # c a eos
                source_v)
            self.assertAllEqual([3, 2], seq_len_v)

            (source_v, seq_len_v) = sess.run((source, seq_len))
            self.assertAllEqual(
                [
                    [-1, 3, 3],  # "d" == unknown, eos eos
                    [-1, -1, 0]
                ],  # "f" == unknown, "e" == unknown, a
                source_v)
            self.assertAllEqual([1, 3], seq_len_v)

            with self.assertRaisesOpError("End of sequence"):
                sess.run((source, seq_len))
Exemplo n.º 3
0
def create_infer_model(model_creator, hparams, scope=None, extra_args=None):
    """Create inference model."""
    graph = tf.Graph()
    src_vocab_file = hparams.src_vocab_file
    tgt_vocab_file = hparams.tgt_vocab_file
    lbl_vocab_file = hparams.lbl_vocab_file

    with graph.as_default(), tf.container(scope or "infer"):
        src_vocab_table, tgt_vocab_table, lbl_vocab_table = \
          vocab_utils.create_vocab_tables(src_vocab_file, tgt_vocab_file,
                                          lbl_vocab_file, hparams.share_vocab)
        reverse_tgt_vocab_table = lookup_ops.index_to_string_table_from_file(
            tgt_vocab_file, default_value=vocab_utils.UNK)
        reverse_lbl_vocab_table = lookup_ops.index_to_string_table_from_file(
            lbl_vocab_file, default_value=vocab_utils.UNK)

        src_placeholder = tf.placeholder(shape=[None], dtype=tf.string)
        batch_size_placeholder = tf.placeholder(shape=[], dtype=tf.int64)

        src_dataset = tf.data.Dataset.from_tensor_slices(src_placeholder)
        iterator = iterator_utils.get_infer_iterator(
            src_dataset,
            src_vocab_table,
            batch_size=batch_size_placeholder,
            eos=hparams.eos,
            src_max_len=hparams.src_max_len_infer)
        model = model_creator(
            hparams,
            iterator=iterator,
            mode=tf.contrib.learn.ModeKeys.INFER,
            source_vocab_table=src_vocab_table,
            target_vocab_table=tgt_vocab_table,
            label_vocab_table=lbl_vocab_table,
            reverse_target_vocab_table=reverse_tgt_vocab_table,
            reverse_target_intent_vocab_table=reverse_lbl_vocab_table,
            scope=scope,
            extra_args=extra_args)
    return InferModel(graph=graph,
                      model=model,
                      src_placeholder=src_placeholder,
                      batch_size_placeholder=batch_size_placeholder,
                      iterator=iterator)
Exemplo n.º 4
0
def create_infer_model(model_creator, hparams):
    src1_vocab_file = "%s/%s" % (hparams.data_dir, hparams.word_vocab)
    src2_vocab_file = "%s/%s" % (hparams.data_dir, hparams.pos_vocab)
    tgt_vocab_file = "%s/%s" % (hparams.data_dir, hparams.role_vocab)

    graph = tf.Graph()
    with graph.as_default(), tf.container("infer"):
        src1_vocab_table, src2_vocab_table, tgt_vocab_table = create_vocab_tables(
            src1_vocab_file, src2_vocab_file, tgt_vocab_file)
        reverse_tgt_vocab_table = lookup_ops.index_to_string_table_from_file(
            tgt_vocab_file, default_value=data_utils._UNK)
        src1_placeholder = tf.placeholder(shape=[None], dtype=tf.string)
        src2_placeholder = tf.placeholder(shape=[None], dtype=tf.string)
        batch_size_placeholder = tf.placeholder(shape=[], dtype=tf.int64)

        src1_dataset = tf.contrib.data.Dataset.from_tensor_slices(
            src1_placeholder)
        src2_dataset = tf.contrib.data.Dataset.from_tensor_slices(
            src2_placeholder)
        iterator = iterator_utils.get_infer_iterator(
            src1_dataset,
            src2_dataset,
            src1_vocab_table,
            src2_vocab_table,
            batch_size=batch_size_placeholder)
        model = model_creator(
            hparams,
            iterator=iterator,
            mode=tf.contrib.learn.ModeKeys.INFER,
            src1_vocab_table=src1_vocab_table,
            src2_vocab_table=src2_vocab_table,
            tgt_vocab_table=tgt_vocab_table,
        )
    return InferModel(graph=graph,
                      model=model,
                      src1_placeholder=src1_placeholder,
                      src2_placeholder=src2_placeholder,
                      batch_size_placeholder=batch_size_placeholder,
                      iterator=iterator)