示例#1
0
def chatbot_lstm_attn():
    hparams = lstm.lstm_attention()
    hparams.max_length = 256
    hparams.clip_grad_norm = 0.  # i.e. no gradient clipping
    hparams.optimizer_adam_epsilon = 1e-9
    hparams.learning_rate_decay_scheme = "noam"
    hparams.learning_rate = 0.1
    hparams.learning_rate_warmup_steps = 4000
    hparams.initializer_gain = 1.0
    hparams.initializer = "uniform_unit_scaling"
    hparams.weight_decay = 0.0
    hparams.optimizer_adam_beta1 = 0.9
    hparams.optimizer_adam_beta2 = 0.98
    hparams.num_sampled_classes = 0
    hparams.label_smoothing = 0.1
    hparams.learning_rate_warmup_steps = 8000
    hparams.learning_rate = 0.2
    hparams.layer_preprocess_sequence = "n"
    hparams.layer_postprocess_sequence = "da"
    hparams.layer_prepostprocess_dropout = 0.1

    hparams.hidden_size = 1024
    hparams.num_hidden_layers = 2
    hparams.attn_vec_size = 128
    hparams.batch_size = 4096
    return hparams
示例#2
0
    def testLSTMSeq2SeqAttention(self):
        vocab_size = 9
        x = np.random.random_integers(1,
                                      high=vocab_size - 1,
                                      size=(3, 5, 1, 1))
        y = np.random.random_integers(1,
                                      high=vocab_size - 1,
                                      size=(3, 6, 1, 1))
        hparams = lstm.lstm_attention()

        p_hparams = problem_hparams.test_problem_hparams(
            vocab_size, vocab_size)
        x = tf.constant(x, dtype=tf.int32)
        x = tf.placeholder_with_default(x, shape=[None, None, 1, 1])

        with self.test_session() as session:
            features = {
                "inputs": x,
                "targets": tf.constant(y, dtype=tf.int32),
            }
            model = lstm.LSTMSeq2seqAttention(hparams,
                                              tf.estimator.ModeKeys.TRAIN,
                                              p_hparams)
            logits, _ = model(features)
            session.run(tf.global_variables_initializer())
            res = session.run(logits)
        self.assertEqual(res.shape, (3, 6, 1, 1, vocab_size))
示例#3
0
    def testLSTMSeq2SeqAttention(self):
        vocab_size = 9
        x = np.random.random_integers(1,
                                      high=vocab_size - 1,
                                      size=(3, 5, 1, 1))
        y = np.random.random_integers(1,
                                      high=vocab_size - 1,
                                      size=(3, 6, 1, 1))
        hparams = lstm.lstm_attention()

        p_hparams = problem_hparams.test_problem_hparams(
            hparams, vocab_size, vocab_size)
        x = tf.constant(x, dtype=tf.int32)
        x._shape = tf.TensorShape([None, None, 1, 1])

        with self.test_session() as session:
            features = {
                "inputs": x,
                "targets": tf.constant(y, dtype=tf.int32),
            }
            model = lstm.LSTMSeq2SeqAttention(hparams,
                                              tf.contrib.learn.ModeKeys.TRAIN,
                                              p_hparams)
            sharded_logits, _, _ = model.model_fn(features)
            logits = tf.concat(sharded_logits, 0)
            session.run(tf.global_variables_initializer())
            res = session.run(logits)
        self.assertEqual(res.shape, (3, 6, 1, 1, vocab_size))
def chatbot_lstm_attn():
    hparams = lstm.lstm_attention()
    hparams.hidden_size = 1024
    hparams.num_hidden_layers = 2
    hparams.attn_vec_size = 128
    hparams.batch_size = 4096

    return hparams
def lstm_attention_think_lr3():
    hparams = lstm_attention()
    hparams.learning_rate_constant = 0.001
    hparams.learning_rate_schedule = "constant"
    hparams.num_hidden_layers = 1
    hparams.hidden_size = 512
    hparams.add_hparam("eval_throttle_seconds", 100)
    return hparams
def gru_attention_scan():
	hparams = lstm_attention()
	hparams.learning_rate_constant = 0.001
	hparams.learning_rate_schedule = "constant"
	hparams.add_hparam("stack_size", 10)
	hparams.add_hparam("num_stacks", 10)
	hparams.add_hparam("decoder_type", DECODER_TYPE)
	hparams.num_hidden_layers = 1
	hparams.hidden_size = 50
	hparams.dropout = 0.5
	hparams.add_hparam("eval_throttle_seconds", 100)
	return hparams
示例#7
0
  def testLSTMSeq2seqAttentionBidirectionalEncoder(self):
    vocab_size = 9
    x = np.random.random_integers(1, high=vocab_size - 1, size=(3, 5, 1, 1))
    y = np.random.random_integers(1, high=vocab_size - 1, size=(3, 6, 1, 1))
    hparams = lstm.lstm_attention()

    p_hparams = problem_hparams.test_problem_hparams(vocab_size, vocab_size)
    x = tf.constant(x, dtype=tf.int32)
    x = tf.placeholder_with_default(x, shape=[None, None, 1, 1])

    with self.test_session() as session:
      features = {
          "inputs": x,
          "targets": tf.constant(y, dtype=tf.int32),
      }
      model = lstm.LSTMSeq2seqAttentionBidirectionalEncoder(
          hparams, tf.estimator.ModeKeys.TRAIN, p_hparams)
      logits, _ = model(features)
      session.run(tf.global_variables_initializer())
      res = session.run(logits)
    self.assertEqual(res.shape, (3, 6, 1, 1, vocab_size))
示例#8
0
def semeval_lstm():
    hparams = lstm_attention()
    hparams.batch_size = 512
    hparams.num_heads = 1
    return hparams
示例#9
0
def dstc_bilstm_attention_hparams_v4():
    hparams = lstm_attention()
    hparams.num_hidden_layers = 4
    hparams.batch_size = 4096
    hparams.dropout = 0.7
    return hparams
示例#10
0
def dstc_lstm_attention_hparams_v1():
    hparams = lstm_attention()
    hparams.num_hidden_layers = 2
    hparams.batch_size = 4096
    hparams.dropout = 0.9
    return hparams