Exemplo n.º 1
0
        def inner_loop(i, hit_eos, decoded_ids):

            tgt_embed = self.tgt_embedding.encode(decoded_ids)
            T = get_shape_as_list(tgt_embed)[1]
            tgt_mask = subsequent_mask(T)
            scope = 'TransformerDecoder'
            h = transformer_decoder_stack(tgt_embed, src_enc, src_mask,
                                          tgt_mask, num_heads, pdrop, scale,
                                          layers, activation_type, scope, d_ff)

            vsz = self.tgt_embedding.vsz
            do_weight_tying = bool(kwargs.get('tie_weights', True))  # False
            hsz = get_shape_as_list(h)[-1]
            h = tf.reshape(h, [-1, hsz])
            if do_weight_tying and hsz == self.tgt_embedding.get_dsz():
                with tf.variable_scope(self.tgt_embedding.scope, reuse=True):
                    W = tf.get_variable("W")
                    outputs = tf.matmul(h, W, transpose_b=True, name="logits")
            else:
                vocab_w = tf.get_variable("vocab_w", [hsz, vsz],
                                          dtype=tf.float32)
                vocab_b = tf.get_variable("vocab_b", [vsz], dtype=tf.float32)
                outputs = tf.nn.xw_plus_b(h, vocab_w, vocab_b, name="logits")

            preds = tf.reshape(outputs, [B, T, vsz])
            next_id = tf.argmax(preds, axis=-1)[:, -1]
            hit_eos |= tf.equal(next_id, Offsets.EOS)
            next_id = tf.reshape(next_id, [B, 1])

            decoded_ids = tf.concat([decoded_ids, next_id], axis=1)
            return i + 1, hit_eos, decoded_ids
Exemplo n.º 2
0
        def inner_loop(i, hit_eos, decoded_ids):

            tgt_embed = self.tgt_embedding.encode(decoded_ids)
            T = get_shape_as_list(tgt_embed)[1]
            tgt_mask = subsequent_mask(T)
            scope = 'TransformerDecoder'
            h = transformer_decoder_stack(src_enc, tgt_embed, src_mask, tgt_mask, num_heads, pdrop, scale, layers, activation_type, scope, d_ff)

            vsz = self.tgt_embedding.vsz
            do_weight_tying = bool(kwargs.get('tie_weights', True))  # False
            hsz = get_shape_as_list(h)[-1]
            h = tf.reshape(h, [-1, hsz])
            if do_weight_tying and hsz == self.tgt_embedding.get_dsz():
                with tf.variable_scope(self.tgt_embedding.scope, reuse=True):
                    W = tf.get_variable("W")
                    outputs = tf.matmul(h, W, transpose_b=True, name="logits")
            else:
                vocab_w = tf.get_variable("vocab_w", [hsz, vsz], dtype=tf.float32)
                vocab_b = tf.get_variable("vocab_b", [vsz], dtype=tf.float32)
                outputs = tf.nn.xw_plus_b(h, vocab_w, vocab_b, name="logits")

            preds = tf.reshape(outputs, [B, T, vsz])
            next_id = tf.argmax(preds, axis=-1)[:, -1]
            hit_eos |= tf.equal(next_id, Offsets.EOS)
            next_id = tf.reshape(next_id, [B, 1])

            decoded_ids = tf.concat([decoded_ids, next_id], axis=1)
            return i + 1, hit_eos, decoded_ids
Exemplo n.º 3
0
    def decode(self, inputs):
        encoder_outputs, tgt, src_len, tgt_len = inputs
        tgt_embed = self.tgt_embedding(tgt)
        #if not tgt:
        #    tgt = self.tgt_embedding.create_placeholder(self.tgt_embedding.name)
        src_enc = encoder_outputs.output

        #tgt_embed = self.tgt_embedding.encode(tgt)
        shape = get_shape_as_list(tgt_embed)
        B = shape[0]
        T = shape[1]

        if hasattr(encoder_outputs, 'src_mask'):
            src_mask = encoder_outputs.src_mask
        else:
            src_mask = tf.sequence_mask(src_len, T, dtype=tf.float32)

        tgt_mask = subsequent_mask(T)
        h = self.decoder((tgt_embed, src_enc, src_mask, tgt_mask))
        outputs = self.proj(h)

        self.preds = tf.transpose(tf.reshape(outputs, [B, T, -1]), [1, 0, 2])
        best = tf.argmax(self.preds, -1)
        self.output(best)
        return self.best
Exemplo n.º 4
0
 def decode(self, x, num_heads=4, layers=1, scale=True, activation_type='relu', scope='TransformerEncoder', d_ff=None, **kwargs):
     T = get_shape_as_list(x)[1]
     dsz = get_shape_as_list(x)[-1]
     mask = subsequent_mask(T)
     if dsz != self.hsz:
         x = tf.layers.dense(x, self.hsz)
     x = transformer_encoder_stack(x, mask, num_heads, self.pdrop_value, scale, layers, activation_type, d_ff=d_ff)
     return tf.reshape(x, [-1, self.hsz])
Exemplo n.º 5
0
def test_attn_value_sub_mask(qkv):
    q, k, v = qkv
    B, H, T, _ = q.get_shape().as_list()
    q = tf.zeros_like(q)
    mask = subsequent_mask(T)
    res = dot_product_attention(q, k, v, mask=mask)
    with tf.Session() as sess:
        res, gold = sess.run([res, v])
    for b in range(B):
        for h in range(H):
            for t in range(T):
                np.testing.assert_allclose(res[b, h, t, :], np.mean(gold[:, :, :t+1, :], axis=2)[b, h, :], atol=1e-5)
Exemplo n.º 6
0
def test_attn_value_sub_mask(qkv):
    q, k, v = qkv
    with tf.device('/cpu:0'):
        B, H, T, _ = q.get_shape().as_list()
        q = tf.zeros_like(q)
        mask = subsequent_mask(T)
        res = dot_product_attention(q, k, v, mask=mask)
        with tf.Session() as sess:
            res, gold = sess.run([res, v])
        for b in range(B):
            for h in range(H):
                for t in range(T):
                    np.testing.assert_allclose(res[b, h, t, :], np.mean(gold[:, :, :t+1, :], axis=2)[b, h, :], atol=1e-5)
Exemplo n.º 7
0
        def inner_loop(i, hit_eos, decoded_ids):

            tgt_embed = self.tgt_embedding(decoded_ids)
            T = get_shape_as_list(tgt_embed)[1]
            tgt_mask = subsequent_mask(T)
            h = self.decoder((tgt_embed, src_enc, src_mask, tgt_mask))
            # hsz = get_shape_as_list(h)[-1]
            # h = tf.reshape(h, [-1, hsz])
            outputs = self.proj(h)

            preds = tf.reshape(outputs, [B, T, -1])
            next_id = tf.argmax(preds, axis=-1)[:, -1]
            hit_eos |= tf.equal(next_id, Offsets.EOS)
            next_id = tf.reshape(next_id, [B, 1])

            decoded_ids = tf.concat([decoded_ids, next_id], axis=1)
            return i + 1, hit_eos, decoded_ids
Exemplo n.º 8
0
    def decode(self,
               encoder_outputs,
               src_len,
               tgt_len,
               pdrop,
               layers=1,
               scope='TransformerDecoder',
               num_heads=4,
               scale=True,
               activation_type='relu',
               d_ff=None,
               **kwargs):
        """self.best is [T, B]"""
        src_enc = encoder_outputs.output
        if hasattr(encoder_outputs, 'src_mask'):
            src_mask = encoder_outputs.src_mask
        else:
            T = get_shape_as_list(src_enc)[1]
            src_mask = tf.sequence_mask(src_len, T, dtype=tf.float32)
        tgt_embed = self.tgt_embedding.encode(kwargs.get('tgt'))
        T = get_shape_as_list(tgt_embed)[1]
        tgt_mask = subsequent_mask(T)
        scope = 'TransformerDecoder'
        h = transformer_decoder_stack(tgt_embed, src_enc, src_mask, tgt_mask,
                                      num_heads, pdrop, scale, layers,
                                      activation_type, scope, d_ff)

        vsz = self.tgt_embedding.vsz
        do_weight_tying = bool(kwargs.get('tie_weights', True))  # False
        hsz = get_shape_as_list(h)[-1]
        if do_weight_tying and hsz == self.tgt_embedding.get_dsz():
            h = tf.reshape(h, [-1, hsz])
            with tf.variable_scope(self.tgt_embedding.scope, reuse=True):
                W = tf.get_variable("W")
                outputs = tf.matmul(h, W, transpose_b=True, name="logits")
        else:
            h = tf.reshape(h, [-1, hsz])
            vocab_w = tf.get_variable("vocab_w", [hsz, vsz], dtype=tf.float32)
            vocab_b = tf.get_variable("vocab_b", [vsz], dtype=tf.float32)
            outputs = tf.nn.xw_plus_b(h, vocab_w, vocab_b, name="logits")
        self.preds = tf.transpose(tf.reshape(outputs, [-1, T, vsz]), [1, 0, 2])
        best = tf.argmax(self.preds, -1)
        self.output(best)
Exemplo n.º 9
0
    def decode(self, encoder_outputs,
               src_len,
               tgt_len,
               pdrop,
               layers=1,
               scope='TransformerDecoder',
               num_heads=4,
               scale=True,
               activation_type='relu',
               d_ff=None, **kwargs):
        """self.best is [T, B]"""
        src_enc = encoder_outputs.output
        if hasattr(encoder_outputs, 'src_mask'):
            src_mask = encoder_outputs.src_mask
        else:
            T = get_shape_as_list(src_enc)[1]
            src_mask = tf.sequence_mask(src_len, T, dtype=tf.float32)
        tgt_embed = self.tgt_embedding.encode(kwargs.get('tgt'))
        T = get_shape_as_list(tgt_embed)[1]
        tgt_mask = subsequent_mask(T)
        scope = 'TransformerDecoder'
        h = transformer_decoder_stack(src_enc, tgt_embed, src_mask, tgt_mask, num_heads, pdrop, scale, layers, activation_type, scope, d_ff)

        vsz = self.tgt_embedding.vsz
        do_weight_tying = bool(kwargs.get('tie_weights', True))  # False
        hsz = get_shape_as_list(h)[-1]
        if do_weight_tying and hsz == self.tgt_embedding.get_dsz():
            h = tf.reshape(h, [-1, hsz])
            with tf.variable_scope(self.tgt_embedding.scope, reuse=True):
                W = tf.get_variable("W")
                outputs = tf.matmul(h, W, transpose_b=True, name="logits")
        else:
            h = tf.reshape(h, [-1, hsz])
            vocab_w = tf.get_variable("vocab_w", [hsz, vsz], dtype=tf.float32)
            vocab_b = tf.get_variable("vocab_b", [vsz], dtype=tf.float32)
            outputs = tf.nn.xw_plus_b(h, vocab_w, vocab_b, name="logits")
        self.preds = tf.transpose(tf.reshape(outputs, [-1, T, vsz]), [1, 0, 2])
        best = tf.argmax(self.preds, -1)
        self.output(best)
Exemplo n.º 10
0
 def decode(self, x, num_heads=4, layers=1, scale=True, activation_type='relu', scope='TransformerEncoder', d_ff=None, **kwargs):
     T = get_shape_as_list(x)[1]
     mask = subsequent_mask(T)
     x = transformer_encoder_stack(x, mask, num_heads, self.pdrop_value, scale, layers, activation_type)
     return tf.reshape(x, [-1, self.hsz])