Exemplo n.º 1
0
def build_graph(config):
    
    input_x = tf.placeholder(tf.int32, [None, None], name='input_x')
    input_y = tf.placeholder(tf.int64, [None], name='input_y')

    with tf.device('/cpu:0'):
        emb_mat = tf.get_variable('embedding',
                                  [config.vocab.size(), config.vocab.emb_dim],
                                  initializer=tf.constant_initializer(config.vocab.embeddings),
                                  trainable = config.emb_tune)
        seq_emb = tf.nn.embedding_lookup(emb_mat, input_x)
        
        seq_mask = tf.cast(tf.cast(input_x, dtype = tf.bool), dtype = tf.int32)
        seq_len = tf.reduce_sum(seq_mask, 1)

    with tf.name_scope("rnn"):
        
        seq_e = rnn_layer(seq_emb, seq_len, 128, config.keep_prob,
                          activation = tf.nn.relu, concat = True, scope = 'bi-lstm-1')        
               
        B = tf.shape(seq_e)[0]
        query = tf.get_variable("query", [config.att_dim],
                                initializer = tf.ones_initializer())
        query = tf.tile(tf.expand_dims(query, 0), [B, 1])

        feat = att_pool_layer(seq_e, query, seq_mask, config.att_dim,
                              config.keep_prob, is_train=None, scope="att_pooling")
        
        #feat = seq_e[:,-1,:]

    with tf.name_scope("score"):
        #
        fc = tf.contrib.layers.dropout(feat, config.keep_prob)
        fc = tf.layers.dense(fc, 128, name='fc1')            
        fc = tf.nn.relu(fc)
        
        fc = tf.contrib.layers.dropout(fc, config.keep_prob)
        logits = tf.layers.dense(fc, config.num_classes, name='fc2')
        # logits = tf.nn.sigmoid(fc)
        
        normed_logits = tf.nn.softmax(logits, name='logits')          
        y_pred_cls = tf.argmax(logits, 1, name='pred_cls')
        
    with tf.name_scope("loss"):
        #
        cross_entropy = tf.nn.sparse_softmax_cross_entropy_with_logits(logits = logits,
                                                                       labels = input_y)
        loss = tf.reduce_mean(cross_entropy, name = 'loss')

    with tf.name_scope("accuracy"):
        #
        correct_pred = tf.equal(input_y, y_pred_cls)
        acc = tf.reduce_mean(tf.cast(correct_pred, tf.float32), name = 'metric')
    
    #
    print(normed_logits)
    print(acc)
    print(loss)
    print()
    def build_inference(self, input_tensors):
        """
        """
        settings = self.settings
        input_x = input_tensors["input_x"]
        #
        keep_prob = tf.get_variable("keep_prob",
                                    shape=[],
                                    dtype=tf.float32,
                                    trainable=False)
        #
        with tf.device('/cpu:0'):
            emb_mat = tf.get_variable(
                'embedding', [settings.vocab.size(), settings.vocab.emb_dim],
                initializer=tf.constant_initializer(settings.vocab.embeddings),
                trainable=settings.emb_tune,
                dtype=tf.float32)
            seq_emb = tf.nn.embedding_lookup(emb_mat, input_x)

            seq_mask = tf.cast(tf.cast(input_x, dtype=tf.bool), dtype=tf.int32)
            seq_len = tf.reduce_sum(seq_mask, 1)

        with tf.name_scope("rnn"):

            seq_e = rnn_layer(seq_emb,
                              seq_len,
                              128,
                              keep_prob,
                              activation=tf.nn.relu,
                              concat=True,
                              scope='bi-lstm-1')

            # attention-pooling, 注意力加权采提
            #
            B = tf.shape(seq_e)[0]
            query = tf.get_variable("query", [settings.att_dim],
                                    initializer=tf.ones_initializer())
            query = tf.tile(tf.expand_dims(query, 0), [B, 1])

            feat = att_pool_layer(query,
                                  seq_e,
                                  seq_mask,
                                  settings.att_dim,
                                  keep_prob,
                                  scope="att_pooling")

            #feat = seq_e[:,-1,:]

        with tf.name_scope("score"):
            #
            fc = tf.nn.dropout(feat, keep_prob)
            fc = tf.layers.dense(fc, 128, name='fc1')
            fc = tf.nn.relu(fc)

            fc = tf.nn.dropout(fc, keep_prob)
            logits = tf.layers.dense(fc, settings.num_classes, name='fc2')

            normed_logits = tf.nn.softmax(logits, name='logits')

        #
        print(normed_logits)
        #
        output_tensors = {"normed_logits": normed_logits, "logits": logits}
        #
        return output_tensors
def build_graph(config):

    input_x = tf.placeholder(tf.int32, [None, None], name='input_x')
    input_y = tf.placeholder(tf.int64, [None], name='input_y')

    with tf.device('/cpu:0'):
        emb_mat = tf.get_variable(
            'embedding', [config.vocab.size(), config.vocab.emb_dim],
            initializer=tf.constant_initializer(config.vocab.embeddings),
            trainable=config.emb_tune)
        seq_emb = tf.nn.embedding_lookup(emb_mat, input_x)

        seq_mask = tf.cast(tf.cast(input_x, dtype=tf.bool), dtype=tf.int32)
        # seq_len = tf.reduce_sum(seq_mask, 1)

    with tf.name_scope("csm"):

        conv1_5 = tf.layers.conv1d(seq_emb,
                                   128,
                                   5,
                                   padding='same',
                                   name='conv1_5')
        conv1_3 = tf.layers.conv1d(seq_emb,
                                   128,
                                   3,
                                   padding='same',
                                   name='conv1_3')
        conv1_2 = tf.layers.conv1d(seq_emb,
                                   128,
                                   2,
                                   padding='same',
                                   name='conv1_2')

        emb_d = tf.concat([conv1_5, conv1_3, conv1_2, seq_emb], -1)
        emb_d = tf.layers.dense(emb_d, 256, name='emb_d')

        B = tf.shape(emb_d)[0]
        num_heads = 2
        att_dim = 128

        feat = []
        for idx in range(num_heads):
            trans = dot_att_layer(emb_d,
                                  emb_d,
                                  seq_mask,
                                  256,
                                  keep_prob=config.keep_prob,
                                  gating=False,
                                  scope="dot_attention_" + str(idx))

            query = tf.get_variable("query_" + str(idx), [att_dim],
                                    initializer=tf.ones_initializer())
            query = tf.tile(tf.expand_dims(query, 0), [B, 1])

            feat_c = att_pool_layer(trans,
                                    query,
                                    seq_mask,
                                    att_dim,
                                    config.keep_prob,
                                    is_train=None,
                                    scope="att_pooling_" + str(idx))
            feat.append(feat_c)
        #
        feat = tf.concat(feat, 1)
        #

    with tf.name_scope("score"):
        #
        fc = tf.contrib.layers.dropout(feat, config.keep_prob)
        fc = tf.layers.dense(fc, 128, name='fc1')
        fc = tf.nn.relu(fc)

        fc = tf.contrib.layers.dropout(fc, config.keep_prob)
        logits = tf.layers.dense(fc, config.num_classes, name='fc2')
        #logits = tf.nn.sigmoid(logits)

        normed_logits = tf.nn.softmax(logits, name='logits')
        y_pred_cls = tf.argmax(logits, 1, name='pred_cls')

    with tf.name_scope("loss"):
        #
        cross_entropy = tf.nn.sparse_softmax_cross_entropy_with_logits(
            logits=logits, labels=input_y)
        loss = tf.reduce_mean(cross_entropy, name='loss')

    with tf.name_scope("accuracy"):
        #
        correct_pred = tf.equal(input_y, y_pred_cls)
        acc = tf.reduce_mean(tf.cast(correct_pred, tf.float32), name='metric')

    #
    print(normed_logits)
    print(acc)
    print(loss)
    print()
Exemplo n.º 4
0
    def build_inference(settings, input_tensors):

        input_x = input_tensors[0]

        #
        keep_prob = tf.get_variable("keep_prob",
                                    shape=[],
                                    dtype=tf.float32,
                                    trainable=False)
        #
        with tf.device('/cpu:0'):
            emb_mat = tf.get_variable(
                'embedding', [settings.vocab.size(), settings.vocab.emb_dim],
                initializer=tf.constant_initializer(settings.vocab.embeddings),
                trainable=settings.emb_tune)

        with tf.variable_scope("emb"):

            emb_dim = settings.vocab.emb_dim

            emb_x = tf.nn.embedding_lookup(emb_mat, input_x)

            mask_t = tf.cast(tf.cast(input_x, dtype=tf.bool), dtype=tf.int32)
            # seq_len = tf.reduce_sum(mask_t, 1)

        with tf.variable_scope("posi_emb"):

            d_posi_emb = 64
            d_model = 1024

            posi_emb_x = get_posi_emb(input_x, d_posi_emb, d_model)

            emb_x = tf.concat([emb_x, posi_emb_x], -1)

            emb_all_dim = emb_dim + d_posi_emb * 2

            #
            enc_t = emb_x
            enc_dim = emb_all_dim

        #
        # transformers
        #
        num_layers_trans = 2
        #
        for lid in range(num_layers_trans):

            with tf.variable_scope("self_att_%d" % lid):

                num_head = 2
                num_hidden = int(128 / num_head)

                sat_t = []
                for idx in range(num_head):
                    sat_t_c = att_qkv_layer(enc_t,
                                            enc_t,
                                            enc_t,
                                            mask_t,
                                            num_hidden,
                                            keep_prob=keep_prob,
                                            scope="t_%d" % idx)
                    sat_t.append(sat_t_c)
                #
                sat_t = tf.concat(sat_t, -1)
                #
                # add & norm
                sat_t = dropout(sat_t, keep_prob=keep_prob)
                sat_t = tf.layers.dense(sat_t, enc_dim)
                #
                enc_t = enc_t + sat_t
                enc_t = tf.contrib.layers.layer_norm(enc_t)
                #
                """
                # dense
                ffn_t = dropout(enc_t, keep_prob=keep_prob)
                ffn_t = tf.layers.dense(ffn_t, enc_dim, activation=tf.nn.relu)
                ffn_t = tf.layers.dense(ffn_t, enc_dim)
                #
                # add & norm
                enc_t = enc_t + ffn_t
                enc_t = tf.contrib.layers.layer_norm(enc_t)
                #
                """

        with tf.variable_scope("feat"):
            """ attention-pooling, 注意力加权采提
            """
            B = tf.shape(enc_t)[0]
            query = tf.get_variable("query", [settings.att_dim],
                                    initializer=tf.ones_initializer())
            query = tf.tile(tf.expand_dims(query, 0), [B, 1])

            feat = att_pool_layer(query,
                                  enc_t,
                                  mask_t,
                                  settings.att_dim,
                                  keep_prob,
                                  scope="att_pooling")

        with tf.variable_scope("score"):
            #
            fc = tf.nn.dropout(feat, keep_prob)
            fc = tf.layers.dense(fc, 128, name='fc1')
            fc = tf.nn.relu(fc)

            fc = tf.nn.dropout(fc, keep_prob)
            logits = tf.layers.dense(fc, settings.num_classes, name='fc2')

            normed_logits = tf.nn.softmax(logits, name='logits')

        #
        print(normed_logits)
        #
        output_tensors = normed_logits, logits
        #
        return output_tensors
    def build_inference(self, input_tensors):
        """
        """
        settings = self.settings
        input_x = input_tensors["input_x"]
        #
        keep_prob = tf.get_variable("keep_prob",
                                    shape=[],
                                    dtype=tf.float32,
                                    trainable=False)
        #
        with tf.device('/cpu:0'):
            emb_mat = tf.get_variable(
                'embedding', [settings.vocab.size(), settings.vocab.emb_dim],
                initializer=tf.constant_initializer(settings.vocab.embeddings),
                trainable=settings.emb_tune,
                dtype=tf.float32)
            emb_dim = settings.vocab.emb_dim

        with tf.variable_scope("mask"):

            mask_t = tf.cast(tf.cast(input_x, dtype=tf.bool), dtype=tf.int32)
            # seq_len = tf.reduce_sum(mask_t, 1)
            mask_mat = get_tensor_expanded(mask_t, 1, tf.float32)

        with tf.variable_scope("emb"):

            posi_emb_max_len = 512
            posi_emb_dim = emb_dim
            posi_emb_model = 1024

            posi_emb_mat = get_position_emb_mat(posi_emb_max_len, posi_emb_dim,
                                                posi_emb_model)

            #
            emb_x = get_emb_positioned(input_x, emb_mat, posi_emb_mat)
            emb_all_dim = emb_dim

            #
            seq_input = emb_x
            dim_all = emb_all_dim
            #

        #
        # transformers
        #
        num_layers = 2
        num_heads = 2
        #
        dim_middle = emb_dim * 2
        activation_type = "gelu"
        #
        with tf.variable_scope("transformers"):

            seq_input = transformer_encoder(seq_input, mask_mat, num_layers,
                                            num_heads, dim_all, dim_middle,
                                            activation_type, keep_prob)
            #

        with tf.variable_scope("feat"):
            """ attention-pooling, 注意力加权采提
            """
            B = tf.shape(seq_input)[0]
            query = tf.get_variable("query", [settings.att_dim],
                                    initializer=tf.ones_initializer())
            query = tf.tile(tf.expand_dims(query, 0), [B, 1])

            feat = att_pool_layer(query,
                                  seq_input,
                                  mask_t,
                                  settings.att_dim,
                                  keep_prob,
                                  scope="att_pooling")

        with tf.variable_scope("score"):
            #
            fc = tf.nn.dropout(feat, keep_prob)
            fc = tf.layers.dense(fc, 128, name='fc1')
            fc = tf.nn.relu(fc)

            fc = tf.nn.dropout(fc, keep_prob)
            logits = tf.layers.dense(fc, settings.num_classes, name='fc2')

            normed_logits = tf.nn.softmax(logits, name='logits')

        #
        print(normed_logits)
        #
        output_tensors = {"normed_logits": normed_logits, "logits": logits}
        #
        return output_tensors