Пример #1
0
def tensor2vector(tensor,
                  hidden_size,
                  mask=None,
                  init=None,
                  use_ln=False,
                  dropout=0.1,
                  scope="vecatt"):
    with tf.variable_scope(scope):
        if util.valid_dropout(dropout):
            tensor = tf.nn.dropout(tensor, 1. - dropout)

        if init is None:
            m = tf.nn.tanh(
                func.linear(tensor, hidden_size, ln=use_ln, scope="m_tensor"))
        else:
            init = util.expand_tile_dims(init, tf.shape(tensor)[1], 1)
            if util.valid_dropout(dropout):
                init = tf.nn.dropout(init, 1. - dropout)

            m = tf.nn.tanh(
                func.linear(tensor, hidden_size, ln=use_ln, scope="m_tensor") +
                func.linear(init, hidden_size, scope="m_init"))
        s = func.linear(m, 1, bias=False, scope="sore")

        if mask is None:
            mask = tf.ones(
                [tf.shape(tensor)[0], tf.shape(tensor)[1]], tf.float32)
        s = tf.squeeze(s, -1) + (1. - mask) * (-1e9)
        w = tf.nn.softmax(s)

        return tf.reduce_sum(tf.expand_dims(w, 2) * tensor, axis=1), s
Пример #2
0
def encoder(source, params):
    mask = tf.to_float(tf.cast(source, tf.bool))
    hidden_size = params.hidden_size

    source, mask = util.remove_invalid_seq(source, mask)

    embed_name = "embedding" if params.shared_source_target_embedding \
        else "src_embedding"
    src_emb = tf.get_variable(embed_name,
                              [params.src_vocab.size(), params.embed_size])
    src_bias = tf.get_variable("bias", [params.embed_size])

    inputs = tf.gather(src_emb, source)
    inputs = tf.nn.bias_add(inputs, src_bias)

    if util.valid_dropout(params.dropout):
        inputs = tf.nn.dropout(inputs, 1. - params.dropout)

    with tf.variable_scope("encoder"):
        # forward rnn
        with tf.variable_scope('forward'):
            outputs = rnn.rnn(params.cell, inputs, hidden_size, mask=mask,
                              ln=params.layer_norm, sm=params.swap_memory)
            output_fw, state_fw = outputs[1]
        # backward rnn
        with tf.variable_scope('backward'):
            if not params.caencoder:
                outputs = rnn.rnn(params.cell, tf.reverse(inputs, [1]),
                                  hidden_size, mask=tf.reverse(mask, [1]),
                                  ln=params.layer_norm, sm=params.swap_memory)
                output_bw, state_bw = outputs[1]
            else:
                outputs = rnn.cond_rnn(params.cell, tf.reverse(inputs, [1]),
                                       tf.reverse(output_fw, [1]), hidden_size,
                                       mask=tf.reverse(mask, [1]),
                                       ln=params.layer_norm,
                                       sm=params.swap_memory,
                                       one2one=True)
                output_bw, state_bw = outputs[1]

            output_bw = tf.reverse(output_bw, [1])

    if not params.caencoder:
        source_encodes = tf.concat([output_fw, output_bw], -1)
        source_feature = tf.concat([state_fw, state_bw], -1)
    else:
        source_encodes = output_bw
        source_feature = state_bw

    with tf.variable_scope("decoder_initializer"):
        decoder_init = rnn.get_cell(
            params.cell, hidden_size, ln=params.layer_norm
        ).get_init_state(x=source_feature)
    decoder_init = tf.tanh(decoder_init)

    return {
        "encodes": source_encodes,
        "decoder_initializer": decoder_init,
        "mask": mask
    }
Пример #3
0
def embedding_layer(features, params):
    p = features['p']
    h = features['h']

    p_mask = tf.to_float(tf.cast(p, tf.bool))
    h_mask = tf.to_float(tf.cast(h, tf.bool))

    with tf.device('/cpu:0'):
        symbol_embeddings = tf.get_variable('special_symbol_embeddings',
                                            shape=(3, params.embed_size),
                                            trainable=True)
        embedding_initializer = tf.glorot_uniform_initializer()
        if tf.gfile.Exists(params.pretrain_word_embedding_file):
            pretrain_embedding = np.load(params.pretrain_word_embedding_file)['data']
            embedding_initializer = tf.constant_initializer(pretrain_embedding)
        general_embeddings = tf.get_variable('general_symbol_embeddings',
                                             shape=(params.word_vocab.size() - 3, params.embed_size),
                                             initializer=embedding_initializer,
                                             trainable=False)
        word_embeddings = tf.concat([symbol_embeddings, general_embeddings], 0)

        p_emb = tf.nn.embedding_lookup(word_embeddings, p)
        h_emb = tf.nn.embedding_lookup(word_embeddings, h)

    p_features = [p_emb]
    h_features = [h_emb]

    if params.enable_bert:
        p_features.append(features['bert_p_enc'])
        h_features.append(features['bert_h_enc'])

    if params.use_char:
        pc = features['pc']
        hc = features['hc']

        pc_mask = tf.to_float(tf.cast(pc, tf.bool))
        hc_mask = tf.to_float(tf.cast(hc, tf.bool))

        pc = tf.reshape(pc, [-1, tf.shape(pc)[-1]])
        hc = tf.reshape(hc, [-1, tf.shape(hc)[-1]])
        pc_mask = tf.reshape(pc_mask, [-1, tf.shape(pc_mask)[-1]])
        hc_mask = tf.reshape(hc_mask, [-1, tf.shape(hc_mask)[-1]])
        with tf.device('/cpu:0'):
            char_embeddings = tf.get_variable('char_embeddings',
                                              shape=(params.char_vocab.size(), params.char_embed_size),
                                              initializer=tf.glorot_uniform_initializer(),
                                              trainable=True)
            with tf.variable_scope('char_embedding'):
                pc_emb = tf.nn.embedding_lookup(char_embeddings, pc)
                hc_emb = tf.nn.embedding_lookup(char_embeddings, hc)
                if util.valid_dropout(params.dropout):
                    pc_emb = tf.nn.dropout(pc_emb, 1. - 0.5 * params.dropout)
                    hc_emb = tf.nn.dropout(hc_emb, 1. - 0.5 * params.dropout)

        with tf.variable_scope("char_encoding", reuse=tf.AUTO_REUSE):
            pc_emb = pc_emb * tf.expand_dims(pc_mask, -1)
            hc_emb = hc_emb * tf.expand_dims(hc_mask, -1)

            pc_shp = util.shape_list(features['pc'])
            pc_emb = tf.reshape(pc_emb, [pc_shp[0], pc_shp[1], pc_shp[2], params.char_embed_size])
            hc_shp = util.shape_list(features['hc'])
            hc_emb = tf.reshape(hc_emb, [hc_shp[0], hc_shp[1], hc_shp[2], params.char_embed_size])

            pc_state = func.linear(tf.reduce_max(pc_emb, 2), params.char_embed_size, scope="cmap")
            hc_state = func.linear(tf.reduce_max(hc_emb, 2), params.char_embed_size, scope="cmap")

        p_features.append(pc_state)
        h_features.append(hc_state)

    '''
    p_emb = func.highway(tf.concat(p_features, axis=2),
                         size=params.hidden_size, dropout=params.dropout, num_layers=2, scope='highway')
    h_emb = func.highway(tf.concat(h_features, axis=2),
                         size=params.hidden_size, dropout=params.dropout, num_layers=2, scope='highway')
    '''
    p_emb = tf.concat(p_features, axis=2)
    h_emb = tf.concat(h_features, axis=2)

    p_emb = p_emb * tf.expand_dims(p_mask, -1)
    h_emb = h_emb * tf.expand_dims(h_mask, -1)

    features.update({'p_emb': p_emb,
                     'h_emb': h_emb,
                     'p_mask': p_mask,
                     'h_mask': h_mask,
                     })
    return features
Пример #4
0
def encoder(source, params):
    mask = tf.to_float(tf.cast(source, tf.bool))
    hidden_size = params.hidden_size

    source, mask = util.remove_invalid_seq(source, mask)

    embed_name = "embedding" if params.shared_source_target_embedding \
        else "src_embedding"
    src_emb = tf.get_variable(embed_name,
                              [params.src_vocab.size(), params.embed_size])
    src_bias = tf.get_variable("bias", [params.embed_size])

    inputs = tf.gather(src_emb, source)
    inputs = tf.nn.bias_add(inputs, src_bias)

    if util.valid_dropout(params.dropout):
        inputs = tf.nn.dropout(inputs, 1. - params.dropout)

    with tf.variable_scope("encoder"):
        x = inputs

        for layer in range(params.num_encoder_layer):
            with tf.variable_scope("layer_{}".format(layer)):
                # forward rnn
                with tf.variable_scope('forward'):
                    outputs = rnn.rnn(params.cell,
                                      x,
                                      hidden_size,
                                      mask=mask,
                                      ln=params.layer_norm,
                                      sm=params.swap_memory,
                                      dp=params.dropout)
                    output_fw, state_fw = outputs[1]
                if layer == 0:
                    # backward rnn
                    with tf.variable_scope('backward'):
                        if not params.caencoder:
                            outputs = rnn.rnn(params.cell,
                                              tf.reverse(x, [1]),
                                              hidden_size,
                                              mask=tf.reverse(mask, [1]),
                                              ln=params.layer_norm,
                                              sm=params.swap_memory,
                                              dp=params.dropout)
                            output_bw, state_bw = outputs[1]
                        else:
                            outputs = rnn.cond_rnn(params.cell,
                                                   tf.reverse(x, [1]),
                                                   tf.reverse(output_fw, [1]),
                                                   hidden_size,
                                                   mask=tf.reverse(mask, [1]),
                                                   ln=params.layer_norm,
                                                   sm=params.swap_memory,
                                                   num_heads=params.num_heads,
                                                   one2one=True)
                            output_bw, state_bw = outputs[1]

                        output_bw = tf.reverse(output_bw, [1])

                    if not params.caencoder:
                        y = tf.concat([output_fw, output_bw], -1)
                        z = tf.concat([state_fw, state_bw], -1)
                    else:
                        y = output_bw
                        z = state_bw
                else:
                    y = output_fw
                    z = state_fw

                y = func.linear(y, hidden_size, ln=False, scope="ff")

                # short cut via residual connection
                if x.get_shape()[-1].value == y.get_shape()[-1].value:
                    x = func.residual_fn(x, y, dropout=params.dropout)
                else:
                    x = y
                if params.layer_norm:
                    x = func.layer_norm(x, scope="ln")

    with tf.variable_scope("decoder_initializer"):
        decoder_cell = rnn.get_cell(params.cell,
                                    hidden_size,
                                    ln=params.layer_norm)

    return {
        "encodes": x,
        "decoder_initializer": {
            "layer_{}".format(l):
            decoder_cell.get_init_state(x=z, scope="layer_{}".format(l))
            for l in range(params.num_decoder_layer)
        },
        "mask": mask
    }
Пример #5
0
def decoder(target, state, params):
    mask = tf.to_float(tf.cast(target, tf.bool))
    hidden_size = params.hidden_size

    if 'decoder' not in state:
        target, mask = util.remove_invalid_seq(target, mask)

    embed_name = "embedding" if params.shared_source_target_embedding \
        else "tgt_embedding"
    tgt_emb = tf.get_variable(embed_name,
                              [params.tgt_vocab.size(), params.embed_size])
    tgt_bias = tf.get_variable("bias", [params.embed_size])

    inputs = tf.gather(tgt_emb, target)
    inputs = tf.nn.bias_add(inputs, tgt_bias)

    # shift
    if 'decoder' not in state:
        inputs = tf.pad(inputs, [[0, 0], [1, 0], [0, 0]])
        inputs = inputs[:, :-1, :]
    else:
        inputs = tf.cond(
            tf.reduce_all(tf.equal(target, params.tgt_vocab.pad())),
            lambda: tf.zeros_like(inputs), lambda: inputs)
        mask = tf.ones_like(mask)

    if util.valid_dropout(params.dropout):
        inputs = tf.nn.dropout(inputs, 1. - params.dropout)

    with tf.variable_scope("decoder"):
        x = inputs
        for layer in range(params.num_decoder_layer):
            with tf.variable_scope("layer_{}".format(layer)):
                init_state = state["decoder_initializer"]["layer_{}".format(
                    layer)]
                if 'decoder' in state:
                    init_state = state["decoder"]["state"]["layer_{}".format(
                        layer)]
                if layer == 0 or params.use_deep_att:
                    returns = rnn.cond_rnn(params.cell,
                                           x,
                                           state["encodes"],
                                           hidden_size,
                                           init_state=init_state,
                                           mask=mask,
                                           num_heads=params.num_heads,
                                           mem_mask=state["mask"],
                                           ln=params.layer_norm,
                                           sm=params.swap_memory,
                                           one2one=False,
                                           dp=params.dropout)
                    (_, hidden_state), (outputs,
                                        _), contexts, attentions = returns
                    c = contexts
                else:
                    if params.caencoder:
                        returns = rnn.cond_rnn(params.cell,
                                               x,
                                               c,
                                               hidden_size,
                                               init_state=init_state,
                                               mask=mask,
                                               mem_mask=mask,
                                               ln=params.layer_norm,
                                               sm=params.swap_memory,
                                               num_heads=params.num_heads,
                                               one2one=True,
                                               dp=params.dropout)
                        (_, hidden_state), (outputs,
                                            _), contexts, attentions = returns
                    else:
                        outputs = rnn.rnn(params.cell,
                                          tf.concat([x, c], -1),
                                          hidden_size,
                                          mask=mask,
                                          init_state=init_state,
                                          ln=params.layer_norm,
                                          sm=params.swap_memory,
                                          dp=params.dropout)
                        outputs, hidden_state = outputs[1]
                if 'decoder' in state:
                    state['decoder']['state']['layer_{}'.format(
                        layer)] = hidden_state

                y = func.linear(outputs, hidden_size, ln=False, scope="ff")

                # short cut via residual connection
                if x.get_shape()[-1].value == y.get_shape()[-1].value:
                    x = func.residual_fn(x, y, dropout=params.dropout)
                else:
                    x = y
                if params.layer_norm:
                    x = func.layer_norm(x, scope="ln")

    feature = func.linear(tf.concat([x, c], -1),
                          params.embed_size,
                          ln=params.layer_norm,
                          scope="ff")
    feature = tf.nn.tanh(feature)

    if util.valid_dropout(params.dropout):
        feature = tf.nn.dropout(feature, 1. - params.dropout)

    if 'dev_decode' in state:
        feature = x[:, -1, :]

    embed_name = "tgt_embedding" if params.shared_target_softmax_embedding \
        else "softmax_embedding"
    embed_name = "embedding" if params.shared_source_target_embedding \
        else embed_name
    softmax_emb = tf.get_variable(embed_name,
                                  [params.tgt_vocab.size(), params.embed_size])
    feature = tf.reshape(feature, [-1, params.embed_size])
    logits = tf.matmul(feature, softmax_emb, False, True)

    soft_label, normalizer = util.label_smooth(target,
                                               util.shape_list(logits)[-1],
                                               factor=params.label_smooth)
    centropy = tf.nn.softmax_cross_entropy_with_logits_v2(logits=logits,
                                                          labels=soft_label)
    centropy -= normalizer
    centropy = tf.reshape(centropy, tf.shape(target))

    loss = tf.reduce_sum(centropy * mask, -1) / tf.reduce_sum(mask, -1)
    loss = tf.reduce_mean(loss)

    # these mask tricks mainly used to deal with zero shapes, such as [0, 1]
    loss = tf.cond(tf.equal(tf.shape(target)[0], 0),
                   lambda: tf.constant(0, dtype=tf.float32), lambda: loss)

    return loss, logits, state
Пример #6
0
def decoder(target, state, params):
    mask = tf.to_float(tf.cast(target, tf.bool))
    hidden_size = params.hidden_size

    if 'decoder' not in state:
        target, mask = util.remove_invalid_seq(target, mask)

    embed_name = "embedding" if params.shared_source_target_embedding \
        else "tgt_embedding"
    tgt_emb = tf.get_variable(embed_name,
                              [params.tgt_vocab.size(), params.embed_size])
    tgt_bias = tf.get_variable("bias", [params.embed_size])

    inputs = tf.gather(tgt_emb, target)
    inputs = tf.nn.bias_add(inputs, tgt_bias)

    # shift
    if 'decoder' not in state:
        inputs = tf.pad(inputs, [[0, 0], [1, 0], [0, 0]])
        inputs = inputs[:, :-1, :]
    else:
        inputs = tf.cond(tf.reduce_all(tf.equal(target, params.tgt_vocab.pad())),
                         lambda: tf.zeros_like(inputs),
                         lambda: inputs)
        mask = tf.ones_like(mask)

    if util.valid_dropout(params.dropout):
        inputs = tf.nn.dropout(inputs, 1. - params.dropout)

    with tf.variable_scope("decoder"):
        init_state = state["decoder_initializer"]
        if 'decoder' in state:
            init_state = state["decoder"]["state"]
        returns = rnn.cond_rnn(params.cell, inputs, state["encodes"], hidden_size,
                               init_state=init_state, mask=mask,
                               mem_mask=state["mask"], ln=params.layer_norm,
                               sm=params.swap_memory, one2one=False)
        (hidden_states, _), (outputs, _), contexts, attentions = returns

    feature = linear([outputs, contexts, inputs], params.embed_size,
                     ln=params.layer_norm, scope="pre_logits")
    feature = tf.tanh(feature)
    if util.valid_dropout(params.dropout):
        feature = tf.nn.dropout(feature, 1. - params.dropout)

    embed_name = "tgt_embedding" if params.shared_target_softmax_embedding \
        else "softmax_embedding"
    embed_name = "embedding" if params.shared_source_target_embedding \
        else embed_name
    softmax_emb = tf.get_variable(embed_name,
                                  [params.tgt_vocab.size(), params.embed_size])
    feature = tf.reshape(feature, [-1, params.embed_size])
    logits = tf.matmul(feature, softmax_emb, False, True)

    centropy = tf.nn.softmax_cross_entropy_with_logits(
        logits=logits,
        labels=util.label_smooth(target,
                                 util.shape_list(logits)[-1],
                                 factor=params.label_smooth)
    )
    centropy = tf.reshape(centropy, tf.shape(target))

    loss = tf.reduce_sum(centropy * mask, -1) / tf.reduce_sum(mask, -1)
    loss = tf.reduce_mean(loss)

    # these mask tricks mainly used to deal with zero shapes, such as [0, 1]
    loss = tf.cond(tf.equal(tf.shape(target)[0], 0),
                   lambda: tf.constant(0, dtype=tf.float32),
                   lambda: loss)

    if 'decoder' in state:
        state['decoder']['state'] = hidden_states

    return loss, logits, state