示例#1
0
    def loss_layer(self, project_logits, lengths, name=None):

        with tf.variable_scope("crf_loss" if not name else name):
            small = -1000.0
            start_logits = tf.concat(
                [small * tf.ones(shape=[self.batch_size, 1, self.num_tags]), tf.zeros(shape=[self.batch_size, 1, 1])],
                axis=-1)

            pad_logits = tf.cast(small * tf.ones([self.batch_size, self.num_steps, 1]), tf.float32)
            logits = tf.concat([project_logits, pad_logits], axis=-1)
            logits = tf.concat([start_logits, logits], axis=1)
            targets = tf.concat(
                [tf.cast(self.num_tags * tf.ones([self.batch_size, 1]), tf.int32), self.targets], axis=-1)

            self.trans = tf.get_variable(
                "transitions",
                shape=[self.num_tags + 1, self.num_tags + 1],
                initializer=self.initializer)

            log_likelihood, self.trans = crf_log_likelihood(
                inputs=logits,
                tag_indices=targets,
                transition_params=self.trans,
                sequence_lengths=lengths + 1)

            return tf.reduce_mean(-log_likelihood)
示例#2
0
    def __init__(self, config, embeddings, ntags, nchars):
        self.config = config
        self.embeddings = embeddings
        self.nchars = nchars
        self.ntags = ntags
        self.logger = config.logger

        self.word_ids = tf.placeholder(tf.int32,
                                       shape=[None, None],
                                       name="word_ids")

        self.sequence_lengths = tf.placeholder(tf.int32,
                                               shape=[None],
                                               name="sequence_lengths")

        self.char_ids = tf.placeholder(tf.int32,
                                       shape=[None, None, None],
                                       name="char_ids")

        self.word_lengths = tf.placeholder(tf.int32,
                                           shape=[None, None],
                                           name="word_lengths")

        self.labels = tf.placeholder(tf.int32,
                                     shape=[None, None],
                                     name="labels")

        self.dropout = tf.placeholder(dtype=tf.float32,
                                      shape=[],
                                      name="dropout")
        self.lr = tf.placeholder(dtype=tf.float32, shape=[], name="lr")

        with tf.variable_scope("words"):
            _word_embeddings = tf.Variable(
                self.embeddings,
                name="_word_embeddings",
                dtype=tf.float32,
                trainable=self.config.train_embeddings)
            word_embeddings = tf.nn.embedding_lookup(_word_embeddings,
                                                     self.word_ids,
                                                     name="word_embeddings")
            print(word_embeddings)

        with tf.variable_scope("chars"):
            _char_embeddings = tf.get_variable(
                name="_char_embeddings",
                dtype=tf.float32,
                shape=[self.nchars, self.config.dim_char])
            char_embeddings = tf.nn.embedding_lookup(_char_embeddings,
                                                     self.char_ids,
                                                     name="char_embeddings")
            shape = tf.shape(char_embeddings)
            char_embeddings = tf.reshape(
                char_embeddings, shape=[-1, shape[-2], self.config.dim_char])
            word_lengths = tf.reshape(self.word_lengths, shape=[-1])
            cell_fw = tf.contrib.rnn.LSTMCell(self.config.char_hidden_size,
                                              state_is_tuple=True)
            cell_bw = tf.contrib.rnn.LSTMCell(self.config.char_hidden_size,
                                              state_is_tuple=True)

            _, ((_, output_fw), (_,
                                 output_bw)) = tf.nn.bidirectional_dynamic_rnn(
                                     cell_fw,
                                     cell_bw,
                                     inputs=char_embeddings,
                                     sequence_length=word_lengths,
                                     dtype=tf.float32)

            output = tf.concat([output_fw, output_bw], axis=-1)
            output = tf.reshape(
                output, shape=[-1, shape[1], 2 * self.config.char_hidden_size])

            # word_embeddings = tf.concat([word_embeddings, output], axis=-1)

        self.word_embeddings = tf.nn.dropout(output, self.dropout)

        with tf.variable_scope("bi-lstm"):
            cell_fw = tf.contrib.rnn.LSTMCell(self.config.hidden_size)
            cell_bw = tf.contrib.rnn.LSTMCell(self.config.hidden_size)
            # cell_fw = tf.contrib.rnn.MultiRNNCell([cell_fw] * 3, state_is_tuple=True)
            # print(self.word_embeddings)
            # cell_bw = tf.contrib.rnn.MultiRNNCell([cell_bw] * 3, state_is_tuple=True)
            (output_fw, output_bw), _ = tf.nn.bidirectional_dynamic_rnn(
                cell_fw,
                cell_bw,
                self.word_embeddings,
                sequence_length=self.sequence_lengths,
                dtype=tf.float32)
            output = tf.concat([output_fw, output_bw], axis=-1)
            output = tf.nn.dropout(output, self.dropout)

        with tf.variable_scope("proj"):
            W = tf.get_variable(
                "W",
                shape=[2 * self.config.hidden_size, self.ntags],
                dtype=tf.float32)

            b = tf.get_variable("b",
                                shape=[self.ntags],
                                dtype=tf.float32,
                                initializer=tf.zeros_initializer())

            ntime_steps = tf.shape(
                output
            )[1]  # output.shape = [batch size, num tokens (i.e., num timesteps), word representation dim]

            output = tf.reshape(output, [-1, 2 * self.config.hidden_size])
            # Highway Layer
            output = self.highway(output, 2 * self.config.hidden_size,
                                  tf.nn.relu)
            pred = tf.matmul(
                output, W
            ) + b  # each word representation is transformed to a vector of 'ntags' (i.e., number of NER tags) dimensions
            # => 'pred' serves as logits values which be often pushed into a softmax layer.
            self.logits = tf.reshape(pred, [-1, ntime_steps, self.ntags])

        log_likelihood, self.transition_params = crf_log_likelihood(
            self.logits, self.labels, self.sequence_lengths)
        self.loss = tf.reduce_mean(-log_likelihood)

        tf.summary.scalar("loss", self.loss)

        with tf.variable_scope("train_step"):
            optimizer = tf.train.AdamOptimizer(self.lr)
            self.train_op = optimizer.minimize(self.loss)

        self.init = tf.global_variables_initializer()
    def __init__(self, config):
        self.cfg = config
        # Create folders
        if not os.path.exists(self.cfg["checkpoint_path"]):
            os.makedirs(self.cfg["checkpoint_path"])
        if not os.path.exists(self.cfg["summary_path"]):
            os.makedirs(self.cfg["summary_path"])
        #Create logger
        self.logger = get_logger(
            os.path.join(self.cfg["checkpoint_path"], "log.txt"))
        # Load dictionary
        dict_data = load_data(self.cfg["vocab"])
        self.word_dict, self.char_dict = dict_data["word_dict"], dict_data[
            "char_dict"]
        self.label_dict = dict_data["label_dict"]
        del dict_data
        self.word_vocab_size = len(self.word_dict)
        self.char_vocab_size = len(self.char_dict)
        self.label_vocab_size = len(self.label_dict)

        self.max_to_keep = self.cfg["max_to_keep"]
        self.checkpoint_path = self.cfg["checkpoint_path"]
        self.summary_path = self.cfg["summary_path"]
        self.word_embedding = self.cfg["word_embedding"]

        self.sess, self.saver = None, None

        # Add placeholder
        self.words = tf.placeholder(
            tf.int32, shape=[None, None],
            name="words")  # shape = (batch_size, max_time)
        self.labels = tf.placeholder(
            tf.int32, shape=[None, None],
            name="label")  # shape = (batch_size, max_time)
        self.seq_len = tf.placeholder(tf.int32, shape=[None], name="seq_len")
        # shape = (batch_size, max_time, max_word_length)
        self.chars = tf.placeholder(tf.int32,
                                    shape=[None, None, None],
                                    name="chars")
        self.char_seq_len = tf.placeholder(tf.int32,
                                           shape=[None, None],
                                           name="char_seq_len")
        # hyper-parameters
        self.is_train = tf.placeholder(tf.bool, shape=[], name="is_train")
        self.batch_size = tf.placeholder(tf.int32, name="batch_size")
        self.keep_prob = tf.placeholder(tf.float32, name="keep_probability")
        self.drop_rate = tf.placeholder(tf.float32, name="dropout_rate")
        self.lr = tf.placeholder(tf.float32, name="learning_rate")

        # Build embedding layer
        with tf.variable_scope("embeddings"):
            self.word_embeddings = tf.Variable(np.load(
                self.cfg["word_embedding"])["embeddings"],
                                               name="embedding",
                                               dtype=tf.float32,
                                               trainable=False)

            word_emb = tf.nn.embedding_lookup(self.word_embeddings,
                                              self.words,
                                              name="word_emb")
            print("Word embedding shape: {}".format(
                word_emb.get_shape().as_list()))

            self.char_embeddings = tf.get_variable(
                name="char_embedding",
                dtype=tf.float32,
                trainable=True,
                shape=[self.char_vocab_size, self.cfg["char_emb_dim"]])
            char_emb = tf.nn.embedding_lookup(self.char_embeddings,
                                              self.chars,
                                              name="chars_emb")
            char_represent = multi_conv1d(char_emb,
                                          self.cfg["filter_sizes"],
                                          self.cfg["channel_sizes"],
                                          drop_rate=self.drop_rate,
                                          is_train=self.is_train)
            print("Chars representation shape: {}".format(
                char_represent.get_shape().as_list()))
            word_emb = tf.concat([word_emb, char_represent], axis=-1)

            self.word_emb = tf.layers.dropout(word_emb,
                                              rate=self.drop_rate,
                                              training=self.is_train)
            print("Word and chars concatenation shape: {}".format(
                self.word_emb.get_shape().as_list()))

        # Build model ops
        with tf.name_scope("BiLSTM"):
            with tf.variable_scope('forward'):
                lstm_fw_cell = tf.keras.layers.LSTMCell(self.cfg["num_units"])
            with tf.variable_scope('backward'):
                lstm_bw_cell = tf.keras.layers.LSTMCell(self.cfg["num_units"])
            rnn_outs, *_ = bidirectional_dynamic_rnn(
                lstm_fw_cell,
                lstm_bw_cell,
                self.word_emb,
                sequence_length=self.seq_len,
                dtype=tf.float32)

            # As we have a Bi-LSTM, we have two outputs which are not connected, so we need to merge them.
            rnn_outs = tf.concat(rnn_outs, axis=-1)

            #            rnn_outs = tf.layers.dropout(rnn_outs, rate=self.drop_rate, training=self.is_train)
            outputs = rnn_outs
            print("Output shape: {}".format(outputs.get_shape().as_list()))

            self.logits = tf.layers.dense(outputs,
                                          units=self.label_vocab_size,
                                          use_bias=True)
            #            self.logits = tf.nn.softmax(self.logits)
            print("Logits shape: {}".format(self.logits.get_shape().as_list()))
        # Define loss and optimizer
        crf_loss, self.trans_params = crf_log_likelihood(
            self.logits, self.labels, self.seq_len)
        #        losses = focal_loss(self.gamma,self.alpha)
        #        self.loss = losses(self.labels, self.logits)
        self.loss = tf.reduce_mean(-crf_loss)
        tf.summary.scalar("loss", self.loss)

        optimizer = tf.train.AdamOptimizer(learning_rate=self.lr)
        self.train_op = optimizer.minimize(self.loss)

        print('Params number: {}'.format(
            np.sum([
                np.prod(v.get_shape().as_list())
                for v in tf.trainable_variables()
            ])))

        sess_config = tf.ConfigProto()
        sess_config.gpu_options.allow_growth = True
        self.sess = tf.Session(config=sess_config)
        self.saver = tf.train.Saver(max_to_keep=self.max_to_keep)
        self.sess.run(tf.global_variables_initializer())
示例#4
0
    def __init__(self,vocab_size,word_dim,hidden_dim,
                 pad_word,init_embedding=None,
                 num_classes=4,clip=5,
                 lr=0.001,l2_reg_lamda=0.0,num_layers=1,
                 rnn_cell='lstm',bi_direction=False,
                 hidden_dim2=128,hyper_embedding_size=16
                 ):

        self.x=tf.placeholder(dtype=tf.int32,shape=[None,None,9],name='input_x')   
        self.y=tf.placeholder(dtype=tf.int32,shape=[None,None],name='input_y')   	
        self.dict=tf.placeholder(dtype=tf.float32,shape=[None,None,8],name='dict')	
        self.dropout_keep_prob=tf.placeholder(dtype=tf.float32,name='dropout_keep_prob')  #dropout_keep_prob 
		
		

		
		#例如: (batch_size=2)  (x_batch).shape:   (2, 163, 9)
		#得到batch_size个句子的长度:  seq_length=[153 163]   2个句子的长度 :153 163
        self.seq_length=tf.reduce_sum(tf.cast(tf.not_equal(self.x[:,:,2], tf.ones_like(self.x[:,:,2])*pad_word), tf.int32), 1)
		#bool类型转化为o/1.  通过与全1矩阵的比较,得到矩阵的各个点是否有字,没有字的地方即为0
        self.weights=tf.cast(tf.not_equal(self.x[:,:,2], tf.ones_like(self.x[:,:,2])*pad_word), tf.float32)
		#(x_batch).shape   (2, 163, 9)   
        self.batch_size = tf.shape(self.x)[0] #??(batch_size=2):  (x_batch).shape   (2, 163, 9)

		#得到embedding
        if init_embedding is None:
            self.embedding=tf.get_variable(shape=[vocab_size,word_dim],dtype=tf.float32,name='embedding')
        else:
            self.embedding=tf.Variable(init_embedding,dtype=tf.float32,name='embedding')

		#将x与embedding匹配,x从id变成embedding
        with tf.variable_scope('embedding'):
            x=tf.nn.embedding_lookup(self.embedding,self.x) #x  embedding
			#batch=2,reshape: 2 * n *(9*dimention)        将原先的x的9维合并为1维
            x = tf.reshape(x, [self.batch_size, -1, 9 * word_dim])
		
        
        def lstm_cell(dim):
            cell=rnn.BasicLSTMCell(dim)
            cell=rnn.DropoutWrapper(cell,output_keep_prob=self.dropout_keep_prob)
            return cell
		
		
        def hyperlstm_cell(dim):
            cell=HyperLSTMCell(num_units=hidden_dim,forget_bias=1.0,use_recurrent_dropout=False,
                               dropout_keep_prob=1.0,use_layer_norm=False,hyper_num_units=hidden_dim2,
                               hyper_embedding_size=hyper_embedding_size,hyper_use_recurrent_dropout=False)
			#运算符将dropout添加到给定单元格的输入和输出。				   
            cell=rnn.DropoutWrapper(cell,output_keep_prob=self.dropout_keep_prob)
            return cell

		#第一层:输入9*100+8维的inputx,投入Bi-LSTM
        with tf.variable_scope('first_layer'):
            inputx=tf.concat([x,self.dict],axis=2)   #沿一个维度连接张量 axis越小,连接的维度越靠外。
													#0为最外层也就是维度的第一位,2则是(?, ?, 900) (?, ?, 8) concat 得到(?, ?, 908)
            #forward_output shape=(?, ?, 128)
            (forward_output,backword_output),_=tf.nn.bidirectional_dynamic_rnn(  
                cell_fw=hyperlstm_cell(hidden_dim), #RNNCell的一个实例,用于前向 
                cell_bw=hyperlstm_cell(hidden_dim), #RNNCell的一个实例,用于反向
                inputs=inputx,    #RNN输入
                sequence_length=self.seq_length,   #包含批处理中每个序列的实际长度
                dtype=tf.float32   #初始状态和预期输出的数据类型
				
            )
			
            output=tf.concat([forward_output,backword_output],axis=2) #得到合并的输出 shape=(?, ?, 256)

        with tf.variable_scope('loss'):

            self.output=layers.fully_connected(  #全连接层
                inputs=output,
                num_outputs=num_classes,
                activation_fn=None,  #默认relu,none表示无
                )
            #crf
			#最大似然log_likelihood       dtype=float32
			#转换矩阵transition_params   shape=(4, 4) dtype=float32_ref
            log_likelihood, self.transition_params = crf.crf_log_likelihood(   
                self.output, self.y, self.seq_length)
			#计算张量维度的元素平均值 dtype=float32
            loss = tf.reduce_mean(-log_likelihood)   

        with tf.variable_scope('train_op'):
            self.optimizer=tf.train.AdamOptimizer(learning_rate=lr) 		#Adam优化器
            tvars=tf.trainable_variables()  #返回使用的所有变量
			#add_n:逐个元素地添加所有输入张量    l2_loss: L2损失
            l2_loss = tf.add_n([tf.nn.l2_loss(v) for v in tvars if v.get_shape().ndims > 1])
            self.loss=loss+l2_reg_lamda*l2_loss
			
            grads,_=tf.clip_by_global_norm(tf.gradients(self.loss,tvars),clip)
            self.train_op=self.optimizer.apply_gradients(zip(grads,tvars))
示例#5
0
    def __init__(self, settings):
        self.embedding_size = settings.embedding_size
        self.time_step = settings.time_step
        self.hidden_size = settings.hidden_size
        self.seq_dim = settings.seq_dim
        self.layers_num = settings.layers_num
        self.n_classes = settings.n_classes
        self.n_seq = settings.n_seq
        self.vocabulary_size = settings.vocabulary_size
        self._weights_decay = settings.weights_decay
        self._embed_dropout_prob = settings.embed_dropout_prob
        self._cnn_kernel_outdim = settings.cnn_kernel_outdim
        self._cnn_kernel_size = settings.cnn_kernel_size
        self._cnn_stride = settings.cnn_stride
        self._global_steps = tf.Variable(0, trainable=False, name='Global_Step')
        self.initializer = initializers.xavier_initializer()

        self._dropout_prob = tf.placeholder(tf.float32, [])
        # input placeholder
        with tf.name_scope('Inputs'):
            self._sentence_lengths = tf.placeholder(tf.int32, [None], name='sentence_lengths')
            self._x_inputs = tf.placeholder(tf.int32, [None, self.time_step], name='x_input')
            self._y_inputs = tf.placeholder(tf.int32, [None, self.time_step], name='y_input')
            self._seq_inputs = tf.placeholder(tf.int32, [None, self.time_step], name='seq_input')
            self._batch_size = tf.placeholder(tf.int32, [], name='batch_size')

        self._embedding = []
        with tf.variable_scope('embedding'):
            self._char_embedding = tf.get_variable(shape=[self.vocabulary_size+1, self.embedding_size],
                                                   initializer=self.initializer,
                                                   dtype=tf.float32, trainable=True, name='char_embedding')
            self.char_embedding = tf.nn.embedding_lookup(self._char_embedding, self.x_inputs)
            self._embedding.append(self.char_embedding)
            with tf.variable_scope('seq_embedding'):
                self.seq_embedding = tf.get_variable(shape=[self.n_seq, self.seq_dim],
                                                     initializer=self.initializer,
                                                     dtype=tf.float32, trainable=True, name='seq_embedding')
                self._embedding.append(tf.nn.embedding_lookup(self.seq_embedding, self.seq_inputs))
            with tf.variable_scope('cnn_embedding'):
                self.cnn_embedding = self.cnn()
                self._embedding.append(self.cnn_embedding)
            self.embedding = tf.concat(self._embedding, axis=-1)
            self.embedding = tf.nn.dropout(self.embedding, self._embed_dropout_prob)
        with tf.variable_scope('bi_lstm'):
            bi_lstm_output = self.inference(self.embedding)
            bi_lstm_output = tf.nn.dropout(bi_lstm_output, self._dropout_prob)
        with tf.variable_scope('flatten_middle'):
            flatten_input = tf.reshape(bi_lstm_output, [-1, self.hidden_size * 2])
            weights = self._variable_with_weight_decay('weights_middle', [self.hidden_size*2, self.hidden_size],
                                                       self.weights_decay)
            tf.summary.histogram('weights_middle', weights)
            biases = self._variable_on_cpu('biases_middle', [self.hidden_size], tf.zeros_initializer())
            tf.summary.histogram('biases_middle', biases)
            _flatten_middle = tf.matmul(flatten_input, weights)+biases
            flatten_middle = tf.tanh(_flatten_middle)
        with tf.variable_scope('flatten_out'):
            weights = self._variable_with_weight_decay('weights_out', [self.hidden_size, self.n_classes],
                                                       self.weights_decay)
            tf.summary.histogram('weights_out', weights)
            biases = self._variable_on_cpu('biases_out', [self.n_classes], tf.zeros_initializer())
            tf.summary.histogram('biases_out', biases)
            flatten_out = tf.nn.xw_plus_b(flatten_middle, weights, biases)
        with tf.name_scope('crf'):  # 没用variable_scope
            self.logits = tf.reshape(flatten_out, [-1, self.time_step, self.n_classes])
            self.transition_params = tf.get_variable('transitions',
                                                     shape=[self.n_classes, self.n_classes],
                                                     initializer=self.initializer)

            log_likelihood, self.transition_params = crf_log_likelihood(
                inputs=self.logits, tag_indices=self.y_inputs,
                transition_params=self.transition_params, sequence_lengths=self.sentence_lengths)
            self._crf_loss = -tf.reduce_mean(log_likelihood)
            tf.summary.scalar('crf_lost', self._crf_loss)
            self.lost = self._crf_loss + tf.add_n(tf.get_collection('losses'))
            tf.summary.scalar('lost', self.lost)

        with tf.name_scope('predict'):
            self.predict_sentence, self.best_score = crf_decode(
                self.logits, self.transition_params, self.sentence_lengths)
            self._correct_predict = tf.equal(self.predict_sentence, self.y_inputs)
            self.accuracy = tf.reduce_mean(tf.cast(self._correct_predict, 'float'))
            tf.summary.scalar('accuracy', self.accuracy)
            # self.conf_matrix = tf.confusion_matrix(self.y_inputs, self.predict_sentence, num_classes=self.n_classes)
        self.saver = tf.train.Saver(max_to_keep=2)
示例#6
0
 def loss(self,):
     log_likelihood, self.transition_params = crf_log_likelihood(inputs=self.logits,
                                                                 tag_indices=self.input_y,
                                                                 sequence_lengths=self.sequence_lengths)
     crf_loss = -tf.reduce_mean(log_likelihood)
     return crf_loss
示例#7
0
    def __init__(self,
                 config,
                 features,
                 dropout_keep_prob,
                 init_embeddings=None):

        super(DictHyperModel).__init__()
        input_ids = features["input_ids"]
        input_dicts = features["input_dicts"]
        seq_length = features["seq_length"]
        label_ids = features["label_ids"]

        self.label_ids = label_ids
        self.dict = input_dicts
        self.seq_length = seq_length

        x, batch_size, feat_size = model_utils.input_embedding(
            input_ids, config, init_embeddings=init_embeddings)
        x = tf.reshape(x, [batch_size, -1, feat_size * config.embedding_size])
        x = tf.nn.dropout(x, dropout_keep_prob)

        def hyperlstm_cell(dim, input_main_dim, input_hyper_dim):
            cell = HyperLSTMCell(
                num_units=dim,
                input_main_dim=input_main_dim,
                input_hyper_dim=input_hyper_dim,
                forget_bias=1.0,
                use_recurrent_dropout=False,
                dropout_keep_prob=1.0,
                use_layer_norm=False,
                hyper_num_units=config.dict_hidden_size,
                hyper_embedding_size=config.hyper_embedding_size,
                hyper_use_recurrent_dropout=False)
            cell = tf.nn.rnn_cell.DropoutWrapper(
                cell, output_keep_prob=dropout_keep_prob)
            return cell

        with tf.variable_scope('hyper'):
            self.dict = tf.cast(self.dict, dtype=tf.float32)
            input_main_dim = model_utils.get_shape_list(x, expected_rank=3)[2]
            input_hyper_dim = model_utils.get_shape_list(self.dict,
                                                         expected_rank=3)[2]
            x = tf.concat([x, self.dict], axis=2)
            (forward_output,
             backword_output), _ = tf.nn.bidirectional_dynamic_rnn(
                 cell_fw=hyperlstm_cell(config.hidden_size, input_main_dim,
                                        input_hyper_dim),
                 cell_bw=hyperlstm_cell(config.hidden_size, input_main_dim,
                                        input_hyper_dim),
                 inputs=x,
                 sequence_length=self.seq_length,
                 dtype=tf.float32)
            output = tf.concat([forward_output, backword_output], axis=2)

        with tf.variable_scope('output'):
            scores = layers.fully_connected(inputs=output,
                                            num_outputs=config.num_classes,
                                            activation_fn=None)
            transition_param = tf.get_variable(
                "transitions", [config.num_classes, config.num_classes])
            self.prediction, _ = crf.crf_decode(scores, transition_param,
                                                self.seq_length)

        with tf.variable_scope('loss'):
            # crf
            if config.multitag:
                self.label_ids = tf.cast(self.label_ids, dtype=tf.bool)
                self.log_likelihood, _ = model_utils.crf_multitag_log_likelihood(
                    scores, self.label_ids, self.seq_length, transition_param)
            else:
                self.log_likelihood, _ = crf.crf_log_likelihood(
                    scores, self.label_ids, self.seq_length, transition_param)
            self.loss = tf.reduce_mean(-self.log_likelihood)
 def CRF_layer(self):
     self.logit = self.bilstm.logit
     with tf.name_scope('crf'):
         log_likelihood_, self.transition = crf.crf_log_likelihood(
             self.logit, self.bilstm.input_y, self.bilstm.seq_lengths)
         self.cost = -tf.reduce_mean(log_likelihood_)
示例#9
0
    def __init__(self,
                 vocab_size,
                 word_dim,
                 hidden_dim,
                 pad_word,
                 init_embedding=None,
                 num_classes=4,
                 clip=5,
                 lr=0.001,
                 l2_reg_lamda=0.0,
                 num_layers=1,
                 rnn_cell='lstm',
                 bi_direction=False,
                 hidden_dim2=128,
                 hyper_embedding_size=16):
        self.x = tf.placeholder(dtype=tf.int32,
                                shape=[None, None, 9],
                                name='input_x')
        self.y = tf.placeholder(dtype=tf.int32,
                                shape=[None, None],
                                name='input_y')
        self.dict = tf.placeholder(dtype=tf.float32,
                                   shape=[None, None, 8],
                                   name='dict')
        self.dropout_keep_prob = tf.placeholder(dtype=tf.float32,
                                                name='dropout_keep_prob')
        self.seq_length = tf.reduce_sum(
            tf.cast(
                tf.not_equal(self.x[:, :, 2],
                             tf.ones_like(self.x[:, :, 2]) * pad_word),
                tf.int32), 1)
        self.weights = tf.cast(
            tf.not_equal(self.x[:, :, 2],
                         tf.ones_like(self.x[:, :, 2]) * pad_word), tf.float32)
        self.batch_size = tf.shape(self.x)[0]

        if init_embedding is None:
            self.embedding = tf.get_variable(shape=[vocab_size, word_dim],
                                             dtype=tf.float32,
                                             name='embedding')
        else:
            self.embedding = tf.Variable(init_embedding,
                                         dtype=tf.float32,
                                         name='embedding')

        with tf.variable_scope('embedding'):
            x = tf.nn.embedding_lookup(self.embedding, self.x)
            x = tf.reshape(x, [self.batch_size, -1, 9 * word_dim])

        x = tf.nn.dropout(x, self.dropout_keep_prob)

        def lstm_cell(dim):
            cell = rnn.BasicLSTMCell(dim)
            cell = rnn.DropoutWrapper(cell,
                                      output_keep_prob=self.dropout_keep_prob)
            return cell

        with tf.variable_scope('character'):
            (forward_output,
             backword_output), _ = tf.nn.bidirectional_dynamic_rnn(
                 cell_fw=lstm_cell(hidden_dim),
                 cell_bw=lstm_cell(hidden_dim),
                 inputs=x,
                 sequence_length=self.seq_length,
                 dtype=tf.float32)
            output = tf.concat([forward_output, backword_output], axis=2)

        with tf.variable_scope('dict'):
            (forward_output,
             backword_output), _ = tf.nn.bidirectional_dynamic_rnn(
                 cell_fw=lstm_cell(hidden_dim2),
                 cell_bw=lstm_cell(hidden_dim2),
                 inputs=self.dict,
                 sequence_length=self.seq_length,
                 dtype=tf.float32)
            dict_output = tf.concat([forward_output, backword_output], axis=2)

        output = tf.concat([dict_output, output], axis=2)

        with tf.variable_scope('loss'):

            self.output = layers.fully_connected(inputs=output,
                                                 num_outputs=num_classes,
                                                 activation_fn=None)

            #crf
            log_likelihood, self.transition_params = crf.crf_log_likelihood(
                self.output, self.y, self.seq_length)

            loss = tf.reduce_mean(-log_likelihood)

        with tf.variable_scope('train_op'):
            self.optimizer = tf.train.AdamOptimizer(learning_rate=lr)
            tvars = tf.trainable_variables()
            l2_loss = tf.add_n(
                [tf.nn.l2_loss(v) for v in tvars if v.get_shape().ndims > 1])
            self.loss = loss + l2_loss * l2_reg_lamda
            grads, _ = tf.clip_by_global_norm(tf.gradients(self.loss, tvars),
                                              clip)
            self.train_op = self.optimizer.apply_gradients(zip(grads, tvars))
示例#10
0
    def bilstm_crf(self):
        with tf.device('/cpu:0'):
            _word_embeddings = tf.Variable(self.config.embeddings,
                                           dtype=tf.float32,
                                           trainable=self.config.update_embedding)
            word_embeddings = tf.nn.embedding_lookup(params=_word_embeddings,
                                                     ids=self.word_ids)
            self.word_embeddings = word_embeddings

        with tf.variable_scope("bi-lstm"):
            cell_fw = LSTMCell(self.config.hidden_dim)
            cell_bw = LSTMCell(self.config.hidden_dim)
            (output_fw_seq, output_bw_seq), _ = tf.nn.bidirectional_dynamic_rnn(
                cell_fw=cell_fw,
                cell_bw=cell_bw,
                inputs=self.word_embeddings,
                sequence_length=self.sequence_lengths,
                dtype=tf.float32)
            output = tf.concat([output_fw_seq, output_bw_seq], axis=-1)
            output = tf.nn.dropout(output, self.dropout_pl)

        with tf.variable_scope("proj"):
            W = tf.get_variable(name="W",
                                shape=[2 * self.config.hidden_dim, self.config.num_tags],
                                initializer=tf.contrib.layers.xavier_initializer(),
                                dtype=tf.float32)

            b = tf.get_variable(name="b",
                                shape=[self.config.num_tags],
                                initializer=tf.zeros_initializer(),
                                dtype=tf.float32)

            s = tf.shape(output)
            output = tf.reshape(output, [-1, 2 * self.config.hidden_dim])
            pred = tf.matmul(output, W) + b

            self.logits = tf.reshape(pred, [-1, s[1], self.config.num_tags])

            if not self.config.CRF:
                self.labels_softmax_ = tf.argmax(self.logits, axis=-1)
                self.labels_softmax_ = tf.cast(self.labels_softmax_, tf.int32)

        with tf.variable_scope("loss"):
            if self.config.CRF:
                log_likelihood, self.transition_params = crf_log_likelihood(inputs=self.logits,
                                                                            tag_indices=self.labels,
                                                                            sequence_lengths=self.sequence_lengths)
                self.loss = -tf.reduce_mean(log_likelihood)

            else:
                losses = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=self.logits,
                                                                        labels=self.labels)
                mask = tf.sequence_mask(self.sequence_lengths)
                losses = tf.boolean_mask(losses, mask)
                self.loss = tf.reduce_mean(losses)

        with tf.variable_scope("optimizer"):
            if self.config.optimizer == 'Adam':
                optim = tf.train.AdamOptimizer(learning_rate=self.lr_pl)
            elif self.config.optimizer == 'Adadelta':
                optim = tf.train.AdadeltaOptimizer(learning_rate=self.lr_pl)
            elif self.config.optimizer == 'Adagrad':
                optim = tf.train.AdagradOptimizer(learning_rate=self.lr_pl)
            elif self.config.optimizer == 'RMSProp':
                optim = tf.train.RMSPropOptimizer(learning_rate=self.lr_pl)
            elif self.config.optimizer == 'Momentum':
                optim = tf.train.MomentumOptimizer(learning_rate=self.lr_pl, momentum=0.9)
            elif self.config.optimizer == 'SGD':
                optim = tf.train.GradientDescentOptimizer(learning_rate=self.lr_pl)
            else:
                optim = tf.train.GradientDescentOptimizer(learning_rate=self.lr_pl)

            grads_and_vars = optim.compute_gradients(self.loss)
            grads_and_vars_clip = [[tf.clip_by_value(g, -self.config.clip_grad, self.config.clip_grad), v] for g, v in grads_and_vars]
            self.train_op = optim.apply_gradients(grads_and_vars_clip, global_step=self.global_step)
    def __init__(self,
                 config: DictConcatConfig,
                 is_training,
                 features,
                 init_embedding=None):

        super(DictConcatModel).__init__()
        input_ids = features["input_ids"]
        input_dicts = features["input_dicts"]
        seq_length = features["seq_length"]
        label_ids = features["label_ids"]

        self.input_ids = input_ids
        self.label_ids = label_ids
        self.dict = input_dicts
        self.seq_length = seq_length
        self.is_training = is_training
        input_shape = model_utils.get_shape_list(input_ids, expected_rank=3)
        self.batch_size = input_shape[0]
        self.max_length = input_shape[1]
        self.window_size = input_shape[2]

        if not is_training:
            config.embedding_dropout_prob = 0.0
            config.hidden_dropout_prob = 0.0

        if init_embedding is None:
            self.embedding = tf.get_variable(
                shape=[config.vocab_size, config.embedding_size],
                dtype=tf.float32,
                name='embedding',
                initializer=tf.truncated_normal_initializer(stddev=0.02))
        else:
            self.embedding = tf.Variable(init_embedding,
                                         dtype=tf.float32,
                                         name='embedding')

        with tf.variable_scope('embedding'):
            x = tf.nn.embedding_lookup(self.embedding, self.input_ids)
            feat_size = self.window_size
            x = tf.reshape(
                x, [self.batch_size, -1, feat_size * config.embedding_size])

        x = model_utils.dropout(x, config.embedding_dropout_prob)

        def lstm_cell(dim):
            cell = tf.nn.rnn_cell.LSTMCell(dim, name='basic_lstm_cell')
            cell = rnn.DropoutWrapper(cell,
                                      output_keep_prob=1.0 -
                                      config.hidden_dropout_prob)
            return cell

        with tf.variable_scope('character'):
            (forward_output,
             backword_output), _ = tf.nn.bidirectional_dynamic_rnn(
                 cell_fw=lstm_cell(config.hidden_size),
                 cell_bw=lstm_cell(config.hidden_size),
                 inputs=x,
                 sequence_length=self.seq_length,
                 dtype=tf.float32)
            output = tf.concat([forward_output, backword_output], axis=2)

        with tf.variable_scope('dict'):
            self.dict = tf.cast(self.dict, dtype=tf.float32)
            (forward_output,
             backword_output), _ = tf.nn.bidirectional_dynamic_rnn(
                 cell_fw=lstm_cell(config.dict_hidden_size),
                 cell_bw=lstm_cell(config.dict_hidden_size),
                 inputs=self.dict,
                 sequence_length=self.seq_length,
                 dtype=tf.float32)
            dict_output = tf.concat([forward_output, backword_output], axis=2)

        with tf.variable_scope('output'):
            output = tf.concat([dict_output, output], axis=2)
            scores = layers.fully_connected(inputs=output,
                                            num_outputs=config.num_classes,
                                            activation_fn=None)
            transition_param = tf.get_variable(
                "transitions", [config.num_classes, config.num_classes])
            self.prediction, _ = crf.crf_decode(scores, transition_param,
                                                self.seq_length)

        with tf.variable_scope('loss'):
            # crf
            self.log_likelihood, _ = crf.crf_log_likelihood(
                scores, self.label_ids, self.seq_length, transition_param)
            self.loss = tf.reduce_mean(-self.log_likelihood)
示例#12
0
    def Model(self):
        # tf.device定义模型运行的具体设备,tf.name_scope定义对象属于哪个区域
        with tf.device('/cpu:0'), tf.name_scope('embedding'):
            # with tf.device(None), tf.name_scope('embedding'):
            # embedding_ = tf.Variable(tf.truncated_normal([pm.vocab_size, pm.embedding_size], -0.25, 0.25), name='w')
            # 替换词语向量
            embedding_ = get_bert_vec(pm.word_vec_path)
            # 在嵌入的张量中寻找id
            embedding = tf.nn.embedding_lookup(embedding_, self.input_x)
            # 将张量正则化处理防止过拟合
            self.embedding = tf.nn.dropout(embedding, pm.keep_pro)

        with tf.name_scope('biLSTM'):
            # 定义双向LSTM网络, tf.nn.rnn_cell.LSTMCell与tf.contrib.rnn.LSTMCell一样
            # cell_fw = tf.nn.rnn_cell.LSTMCell(pm.hidden_dim)
            # cell_bw = tf.nn.rnn_cell.LSTMCell(pm.hidden_dim)
            cell_fw = tf.contrib.rnn.LSTMCell(pm.hidden_dim)
            cell_bw = tf.contrib.rnn.LSTMCell(pm.hidden_dim)

            # 创建双向递归神经网络的动态版本
            outputs, outstats = tf.nn.bidirectional_dynamic_rnn(
                cell_fw=cell_fw,
                cell_bw=cell_bw,
                inputs=self.embedding,
                sequence_length=self.seq_length,
                dtype=tf.float32)
            # 将双向神经网络拼接
            outputs = tf.concat(outputs, 2)

        with tf.name_scope('output'):
            s = tf.shape(outputs)
            # output = tf.reshape(outputs, [-1, 2 * pm.hidden_dim])
            # dense1 = tf.layers.dense(inputs=output, units=512, activation=tf.nn.relu,
            #                          kernel_regularizer=tf.contrib.layers.l2_regularizer(0.003))
            # dense2 = tf.layers.dense(inputs=dense1, units=256, activation=tf.nn.relu)
            # output = tf.layers.dense(inputs=dense2, units=pm.num_tags, activation=None)

            # 全连接层,最后输出维度等于pm.num_tags
            output = tf.reshape(outputs, [-1, 2 * pm.hidden_dim])
            output = tf.layers.dense(output, pm.num_tags)
            # TODO 高级tf.nn.dropout,防止过拟合,正则化张量keep_pro元素保留概率
            output = tf.contrib.layers.dropout(output, pm.keep_pro)
            self.logits = tf.reshape(output, [-1, s[1], pm.num_tags])

        with tf.name_scope('crf'):
            # log_likelihood是对数似然函数,transition_params是转移概率矩阵
            log_likelihood, self.transition_params = crf_log_likelihood(
                inputs=self.logits,
                tag_indices=self.input_y,
                sequence_lengths=self.seq_length)

        with tf.name_scope('loss'):
            # tf.reduce_mean 主要用作降维或者计算tensor(图像)的平均值。keep_dims:是否降维度Flase降维
            self.loss = tf.reduce_mean(-log_likelihood,
                                       keepdims=False)  # 最大似然取负,使用梯度下降

        with tf.name_scope('optimizer'):
            # tf.train.AdamOptimizer寻找全局最优解的优化算法,引入二次方梯度校正
            optimizer = tf.train.AdamOptimizer(
                pm.learning_rate)  # AdamOptimizer --> adam优化器
            # TODO 梯度剪裁
            gradients, variable = zip(*optimizer.compute_gradients(self.loss))
            gradients, _ = tf.clip_by_global_norm(gradients, pm.clip)
            self.optimizer = optimizer.apply_gradients(
                zip(gradients, variable), global_step=self.global_step)
示例#13
0
V1 = tf.Variable(tf.truncated_normal(stddev=0.01, shape=[hidden_num,
                                                         num_tags]))
V2 = tf.Variable(tf.truncated_normal(stddev=0.01, shape=[hidden_num,
                                                         num_tags]))

#生成bi-lstm网络
pred_p, y_label = lstm(x, y, A, Wc, bc, V1, V2)
#crf的log似然损失函数
#print crf_log_likelihood的参数
print("#" * 40)
print(pred_p)
print(y_label)
print(seq_lengths)
print(A)
cost, A = crf.crf_log_likelihood(inputs=pred_p,
                                 tag_indices=y_label,
                                 sequence_lengths=seq_lengths)
cost = tf.reduce_mean(-cost)
train = tf.train.AdamOptimizer(train_rate).minimize(cost)

sess = tf.Session()
sess.run(tf.initialize_all_variables())
step = 1
while step < train_step:
    batch_x, batch_y, batch_seq_lengths = dataGenerator.next_train_batch(
        batch_size)
    #   batch_x=tf.reshape(batch_x,shape=[batch_size,sequence_length,frame_size])
    _loss, __ = sess.run([cost, train],
                         feed_dict={
                             x: batch_x,
                             y: batch_y,
示例#14
0
    def __init__(self):
        self.config = Config()  # 配置参数
        self.input_x = tf.placeholder(shape=[None, self.config.seq_length],
                                      dtype=tf.int32,
                                      name='input-x')  # 输入文本
        self.input_y = tf.placeholder(shape=[None, self.config.seq_length],
                                      dtype=tf.int32,
                                      name='input-y')  # 输入文本对应的true label
        self.input_length = tf.placeholder(shape=[None],
                                           dtype=tf.int32,
                                           name='input-length')  # 输入文本的长度
        self.input_keep_prob = tf.placeholder(
            dtype=tf.float32, name='input-keep-prob')  # keep-prob

        # Embedding layer
        embedding = tf.get_variable(
            shape=[self.config.vocab_size, self.config.embedding_dim],
            dtype=tf.float32,
            name='embedding')
        embedding_x = tf.nn.embedding_lookup(
            params=embedding,
            ids=self.input_x)  # dim:(batch_size, max_length, 300)
        embedding_x = tf.expand_dims(
            embedding_x, axis=1
        )  # 扩充维度dim:(batch_size, 1, max_length, 300)  卷积操作后seq_length长度不变
        # 卷积层
        conv = tf.layers.conv2d(
            inputs=embedding_x,
            filters=self.config.hidden_dim,
            kernel_size=[1, self.config.kernel_size],
            strides=1,
            padding='SAME',
            activation='relu',
            use_bias=True,
            kernel_initializer=tf.contrib.layers.xavier_initializer(),
            bias_initializer=tf.contrib.layers.xavier_initializer())

        final_output_layer_list = []  # 存储多个block的结果
        for block_i in range(self.config.block_num):
            for dilation in self.config.dilation_size:
                with tf.variable_scope(name_or_scope='atrous-conv-layer-%d' %
                                       dilation,
                                       reuse=tf.AUTO_REUSE):
                    # weight = tf.get_variable(shape=[self.config.kernel_size, self.config.embedding_dim, self.config.hidden_dim, self.config.hidden_dim],
                    #                          dtype=tf.float32,
                    #                          name='dilation-weight',
                    #                          initializer=tf.contrib.layers.xavier_initializer())
                    # bias = tf.get_variable(shape=[self.config.hidden_dim],
                    #                        dtype=tf.float32,
                    #                        name='dilation-bias',
                    #                        initializer=tf.contrib.layers.xavier_initializer())
                    # conv = tf.nn.atrous_conv2d(value=conv, filters=weight, rate=dilation, padding='SAME')
                    # conv = conv + bias
                    # conv = tf.nn.relu(conv)

                    # 与上面语句效果等价
                    conv = tf.layers.conv2d(inputs=conv,
                                            filters=self.config.hidden_dim,
                                            kernel_size=[
                                                self.config.kernel_size,
                                                self.config.embedding_dim
                                            ],
                                            strides=1,
                                            dilation_rate=dilation,
                                            padding='SAME',
                                            activation='relu',
                                            use_bias=True,
                                            kernel_initializer=tf.contrib.
                                            layers.xavier_initializer(),
                                            bias_initializer=tf.contrib.layers.
                                            xavier_initializer())
            # 存储当前block的输出
            final_output_layer_list.append(conv)
        # 将多个block的输出结果进行拼接
        final_output = tf.concat(final_output_layer_list, axis=-1)
        # drop out
        final_output = tf.nn.dropout(final_output,
                                     keep_prob=self.input_keep_prob)
        # 压缩降维,去除维度为1的项 dim:(batch_size, max_length, 3*hidden_dim)
        final_output = tf.squeeze(input=final_output, axis=1)
        # 输出层   dim:(batch_size, max_length, num_classes)
        self.logits = tf.layers.dense(inputs=final_output,
                                      units=self.config.num_classes,
                                      name='logits')

        # 是否使用CRF层
        if self.config.crf:
            log_likelihood, self.transition_params = crf.crf_log_likelihood(
                inputs=self.logits,
                tag_indices=self.input_y,
                sequence_lengths=self.input_length)
            self.loss = -tf.reduce_mean(log_likelihood)
            # 结果输出
            self.predict, self.viterbi_score = crf.crf_decode(
                potentials=self.logits,
                transition_params=self.transition_params,
                sequence_length=self.input_length)
        else:
            # 损失函数,交叉熵
            cross_entropy = tf.nn.softmax_cross_entropy_with_logits(
                labels=self.input_y, logits=self.logits)
            mask = tf.sequence_mask(lengths=self.input_length)
            losses = tf.boolean_mask(cross_entropy, mask=mask)
            self.loss = tf.reduce_mean(losses)
            # 结果输出
            self.predict = tf.argmax(tf.nn.softmax(self.logits),
                                     axis=1,
                                     name='predict')

        # 优化器
        self.optimizer = tf.train.AdamOptimizer(
            learning_rate=self.config.learning_rate).minimize(loss=self.loss)
示例#15
0
def association(hidden,
                pool_idx,
                targets,
                n_targets,
                config,
                train=False,
                reuse=None,
                **kwargs):
    """
    An Attention based sequence labeler model with association.

    :param hidden: The output of the featurizer. [batch_size, sequence_length, embed_dim]
    :param pool_idx: the index of the classify tokens along the sequence dimension. [batch_size]
    :param targets: A dict containing:
     'labels': The sequence labeling targets. [batch_size, sequence_length],
     'associations': A matrix of class ids for the associations [batch_size, sequence_length, seqence_length]
    :param n_targets: A python int containing the number of classes that the model should be learning to predict over.
    :param config: A config object, containing all parameters for the featurizer.
    :param train: If this flag is true, dropout and losses are added to the graph.
    :param reuse: Should reuse be set within this scope.
    :param kwargs: Spare arguments.
    :return: dict containing:
        "logits": The un-normalised log probabilities of each class being in each location. For usable predictions,
            sampling from this distrobution is not sufficiant and a viterbi decoding method should be used.
        "losses": The negative log likelihood for the sequence targets.
        "predict_params": A dictionary of params to be fed to the viterbi decode function.
    """
    with tf.variable_scope("sequence-labeler", reuse=reuse):
        nx = config.n_embed
        length = config.max_length
        num_associations = len(config.association_types) + 1

        def seq_lab_internal(hidden):
            attn_fn = functools.partial(
                attn,
                scope="seq_label_attn",
                n_state=nx,
                n_head=config.seq_num_heads,
                resid_pdrop=config.resid_p_drop,
                attn_pdrop=config.attn_p_drop,
                train=train,
                scale=False,
                mask=False,
            )
            n = norm(attn_fn(hidden) + hidden, "seq_label_residual")
            flat_logits = tf.layers.dense(n, n_targets)
            logits = tf.reshape(
                flat_logits, tf.concat([tf.shape(hidden)[:2], [n_targets]], 0))

            association_head = tf.layers.dense(n, nx)
            association_head = tf.reshape(
                association_head, tf.concat([tf.shape(hidden)[:2], [nx]], 0))

            a = tf.expand_dims(association_head, 1)
            b = tf.expand_dims(association_head, 2)

            features = tf.concat(
                [
                    a - b,
                    a * b,
                    tf.tile(a, [1, length, 1, 1]),
                    tf.tile(b, [1, 1, length, 1]),
                    # TODO: Think about using prediction as a feature for associations.
                ],
                axis=-1,
            )
            associations_flat = tf.layers.dense(
                tf.reshape(features, shape=[-1, nx * 4]), num_associations)
            associations = tf.reshape(associations_flat,
                                      [-1, length, length, num_associations])

            return logits, associations_flat, associations

        with tf.variable_scope("seq_lab_attn"):
            if config.low_memory_mode and train:
                seq_lab_internal = recompute_grad(seq_lab_internal,
                                                  use_entire_scope=True)

            logits, associations_flat, associations = seq_lab_internal(hidden)

        log_likelihood = 0.0
        association_loss = 0.0
        class_weights = kwargs.get("class_weights")
        if class_weights is not None:
            logits = class_reweighting(class_weights)(logits)

        transition_params = tf.get_variable("Transition_matrix",
                                            shape=[n_targets, n_targets])
        if targets is not None:
            log_likelihood, _ = crf_log_likelihood(
                logits,
                targets["labels"],
                kwargs.get("max_length") *
                tf.ones(tf.shape(targets["labels"])[0]),
                transition_params=transition_params,
            )
            sequence_mask = tf.sequence_mask(pool_idx + 1,
                                             maxlen=length,
                                             dtype=tf.float32)
            mask = tf.expand_dims(sequence_mask, 1) * tf.expand_dims(
                sequence_mask, 2)

            association_loss = tf.losses.sparse_softmax_cross_entropy(
                logits=associations_flat,
                labels=tf.reshape(targets["associations"], shape=[-1]),
                weights=tf.reshape(mask, shape=[-1]),
            )

        return {
            "logits": {
                "sequence": logits,
                "association": associations
            },
            "losses": -log_likelihood + config.assocation_loss_weight *
            association_loss,  # TODO: think about weighting.
            "predict_params": {
                "transition_matrix": transition_params
            },
        }
示例#16
0
    def loss_layer(self, project_logits):
        """
        calculate crf loss
        :param project_logits: [1, num_steps, num_tags]
        :return: scalar loss
        """

        with tf.variable_scope("crf_loss"):
            small = -1000.0
            # pad logits for crf loss
            start_logits = tf.concat([
                tf.constant(small, shape=[1, self.num_tags]),
                tf.zeros([1, 1]),
                tf.constant(small, shape=[1, 1])
            ], -1)
            start_logits = tf.expand_dims(start_logits, 0)
            start_logits = tf.tile(
                start_logits,
                tf.concat(
                    [tf.expand_dims(self.batch_size, 0),
                     tf.constant([1, 1])], 0))
            end_logits = tf.concat([
                tf.constant(small, shape=[1, self.num_tags + 1]),
                tf.zeros([1, 1])
            ], -1)
            end_logits = tf.expand_dims(end_logits, 0)
            end_logits = tf.tile(
                end_logits,
                tf.concat(
                    [tf.expand_dims(self.batch_size, 0),
                     tf.constant([1, 1])], 0))
            pad_logits = tf.cast(
                small * tf.ones([self.batch_size, self.num_steps, 2]),
                tf.float32)

            logits = tf.concat([project_logits, pad_logits], axis=-1)
            logits = tf.concat([start_logits, logits, end_logits], axis=1)
            #targets = tf.expand_dims(self.targets, axis=0)
            targets = tf.concat([
                tf.ones([tf.shape(self.targets)[0], 1], tf.int32) *
                self.num_tags, self.targets,
                tf.ones([tf.shape(self.targets)[0], 1], tf.int32) *
                tf.add(self.num_tags, 1)
            ],
                                axis=-1)

            self.trans = tf.get_variable(
                "transitions",
                shape=[self.num_tags + 2, self.num_tags + 2],
                initializer=self.initializer)
            log_likelihood, self.trans = crf_log_likelihood(
                inputs=logits,
                tag_indices=targets,
                transition_params=self.trans,
                sequence_lengths=tf.reduce_sum(
                    tf.concat([
                        tf.expand_dims(self.sequence_length, 0),
                        tf.expand_dims(
                            tf.ones([self.batch_size], tf.int32) * 2, 0)
                    ], 0), 0))
            return tf.reduce_mean(-log_likelihood)
    def __init__(self,
                 config: BaselineConfig,
                 is_training,
                 features,
                 init_embedding=None):
        """Constructor for BertModel.

        Args:
          config: `BertConfig` instance.
          is_training: bool. rue for training model, false for eval model. Controls
            whether dropout will be applied.
          input_ids: int64 Tensor of shape [batch_size, seq_length, feat_size].
          label_ids: (optional) int64 Tensor of shape [batch_size, seq_length].
          seq_length: (optional) int64 Tensor of shape [batch_size].
          init_embedding: (optional)

        Raises:
          ValueError: The config is invalid or one of the input tensor shapes
            is invalid.
        """

        super(BaselineModel).__init__()
        input_ids = features["input_ids"]
        seq_length = features["seq_length"]
        label_ids = features["label_ids"]

        self.input_ids = input_ids
        self.label_ids = label_ids
        self.seq_length = seq_length
        self.is_training = is_training
        input_shape = model_utils.get_shape_list(input_ids, expected_rank=3)
        self.batch_size = input_shape[0]
        self.max_length = input_shape[1]
        self.window_size = input_shape[2]

        if not is_training:
            config.embedding_dropout_prob = 0.0
            config.hidden_dropout_prob = 0.0

        if init_embedding is None:
            self.embedding = tf.get_variable(
                shape=[config.vocab_size, config.embedding_size],
                dtype=tf.float32,
                name='embedding',
                initializer=tf.truncated_normal_initializer(stddev=0.02))
        else:
            self.embedding = tf.Variable(init_embedding,
                                         dtype=tf.float32,
                                         name='embedding')

        with tf.variable_scope('embedding'):
            x = tf.nn.embedding_lookup(self.embedding, self.input_ids)
            feat_size = self.window_size
            x = tf.reshape(
                x, [self.batch_size, -1, feat_size * config.embedding_size])

        x = model_utils.dropout(x, config.embedding_dropout_prob)

        def lstm_cell(dim):
            cell = tf.nn.rnn_cell.LSTMCell(dim, name='basic_lstm_cell')
            cell = rnn.DropoutWrapper(cell,
                                      output_keep_prob=1.0 -
                                      config.hidden_dropout_prob)
            cell = tf.nn.rnn_cell.MultiRNNCell([cell] *
                                               config.num_hidden_layers)
            return cell

        with tf.variable_scope('rnn'):
            (forward_output,
             backword_output), _ = tf.nn.bidirectional_dynamic_rnn(
                 cell_fw=lstm_cell(config.hidden_size),
                 cell_bw=lstm_cell(config.hidden_size),
                 inputs=x,
                 sequence_length=self.seq_length,
                 dtype=tf.float32)
            output = tf.concat([forward_output, backword_output], axis=2)

        with tf.variable_scope('output'):
            scores = layers.fully_connected(inputs=output,
                                            num_outputs=config.num_classes,
                                            activation_fn=None)
            transition_param = tf.get_variable(
                "transitions", [config.num_classes, config.num_classes])
            self.prediction, _ = crf.crf_decode(scores, transition_param,
                                                self.seq_length)

        with tf.variable_scope('loss'):
            # crf
            self.log_likelihood, _ = crf.crf_log_likelihood(
                scores, self.label_ids, self.seq_length, transition_param)

            self.loss = tf.reduce_mean(-self.log_likelihood)
    def __init__(self):
        self.config = Config()  # 配置参数
        self.input_x = tf.placeholder(shape=[None, self.config.seq_length],
                                      dtype=tf.int32,
                                      name='input-x')  # 输入文本
        self.input_y = tf.placeholder(shape=[None, self.config.seq_length],
                                      dtype=tf.int32,
                                      name='input-y')  # 输入文本对应的true label
        self.input_length = tf.placeholder(shape=[None],
                                           dtype=tf.int32,
                                           name='input-length')  # 输入文本的长度
        self.input_keep_prob = tf.placeholder(
            dtype=tf.float32, name='input-keep-prob')  # keep-prob

        # Embedding layer
        embedding = tf.get_variable(
            shape=[self.config.vocab_size, self.config.embedding_dim],
            dtype=tf.float32,
            name='embedding')
        embedding_x = tf.nn.embedding_lookup(
            params=embedding,
            ids=self.input_x)  # dim:(batch_size, max_length, 300)

        # Bi-LSTM/Bi-GRU
        cell_fw = self.get_rnn(self.config.rnn_type)  # 前向cell
        cell_bw = self.get_rnn(self.config.rnn_type)  # 后向cell
        outputs, states = tf.nn.bidirectional_dynamic_rnn(cell_fw=cell_fw,
                                                          cell_bw=cell_bw,
                                                          inputs=embedding_x,
                                                          dtype=tf.float32)
        outputs = tf.concat(
            values=outputs, axis=2
        )  # 将前向cell和后向cell的结果进行concat拼接   dim:(batch_size, max_length, 2*hidden_dim)
        outputs = tf.layers.dropout(inputs=outputs, rate=self.input_keep_prob)

        # 输出层   dim:(batch_size, max_length, num_classes)
        self.logits = tf.layers.dense(inputs=outputs,
                                      units=self.config.num_classes,
                                      name='logits')

        # 是否使用CRF层
        if self.config.crf:
            log_likelihood, self.transition_params = crf.crf_log_likelihood(
                inputs=self.logits,
                tag_indices=self.input_y,
                sequence_lengths=self.input_length)
            self.loss = -tf.reduce_mean(log_likelihood)
            # 结果输出
            self.predict, self.viterbi_score = crf.crf_decode(
                potentials=self.logits,
                transition_params=self.transition_params,
                sequence_length=self.input_length)
        else:
            # 损失函数,交叉熵
            cross_entropy = tf.nn.softmax_cross_entropy_with_logits(
                labels=self.input_y, logits=self.logits)
            mask = tf.sequence_mask(lengths=self.input_length)
            losses = tf.boolean_mask(cross_entropy, mask=mask)
            self.loss = tf.reduce_mean(losses)
            # 结果输出
            self.predict = tf.argmax(tf.nn.softmax(self.logits),
                                     axis=1,
                                     name='predict')

        # 优化器
        self.optimizer = tf.train.AdamOptimizer(
            learning_rate=self.config.learning_rate).minimize(loss=self.loss)
    def __init__(self,
                 batch_size,
                 tag_nums,
                 hidden_nums,
                 sentence_len,
                 word_embeddings,
                 device='/gpu:1'):
        self.batch_size = batch_size
        self.tag_nums = tag_nums
        self.hidden_nums = hidden_nums
        self.sentence_len = sentence_len
        self.word_embeddings = word_embeddings
        self.device = device

        with tf.device(device):
            #网络的变量
            word_embeddings = tf.Variable(initial_value=word_embeddings,
                                          trainable=True)  #参与训练
            #输入占位符
            self.input_x = tf.placeholder(dtype=tf.int32,
                                          shape=[None, self.sentence_len],
                                          name='input_word_id')  #输入词的id
            self.input_y = tf.placeholder(dtype=tf.int32,
                                          shape=[None, self.sentence_len],
                                          name='input_labels')
            self.sequence_lengths = tf.placeholder(
                dtype=tf.int32, shape=[None], name='sequence_lengths_vector')
            self.dropout_keep_prob = tf.placeholder(tf.float32,
                                                    name="dropout_keep_prob")

            with tf.name_scope('projection'):
                #投影层,先将输入的词投影成相应的词向量
                word_id = self.input_x
                word_vectors = tf.nn.embedding_lookup(word_embeddings,
                                                      ids=word_id,
                                                      name='word_vectors')
                #word_vectors = tf.nn.dropout(word_vectors,0.8)
            with tf.name_scope('bi-lstm'):

                #labels = tf.reshape(input_y,shape=[-1,self.sentence_len],name='labels')
                #labels = tf.reshape(input_y,shape=[-1,self.tag_nums],name='labels')
                labels = tf.reshape(self.input_y,
                                    shape=[self.batch_size, self.sentence_len],
                                    name='labels')
                fw_lstm_cell = tf.nn.rnn_cell.LSTMCell(self.hidden_nums)
                bw_lstm_cell = tf.nn.rnn_cell.LSTMCell(self.hidden_nums)
                #双向传播
                output, _state = tf.nn.bidirectional_dynamic_rnn(
                    fw_lstm_cell,
                    bw_lstm_cell,
                    inputs=word_vectors,
                    sequence_length=self.sequence_lengths,
                    dtype=tf.float32)
                fw_output = output[
                    0]  #[batch_size,self.sentence_len,self.hidden_nums]
                bw_output = output[
                    1]  #[batch_size,self.sentence_len,self.hidden_nums]
                V1 = tf.get_variable(
                    'V1',
                    dtype=tf.float32,
                    initializer=tf.contrib.layers.xavier_initializer(),
                    shape=[self.hidden_nums, self.hidden_nums])
                V2 = tf.get_variable(
                    'V2',
                    dtype=tf.float32,
                    initializer=tf.contrib.layers.xavier_initializer(),
                    shape=[self.hidden_nums, self.hidden_nums])
                fw_output = tf.reshape(tf.matmul(
                    tf.reshape(fw_output, [-1, self.hidden_nums], name='Lai'),
                    V1),
                                       shape=tf.shape(output[0]))
                bw_output = tf.reshape(tf.matmul(
                    tf.reshape(bw_output, [-1, self.hidden_nums], name='Rai'),
                    V2),
                                       shape=tf.shape(output[1]))
                contact = tf.concat(
                    [fw_output, bw_output], -1, name='bi_lstm_concat'
                )  #[batch_size,self.sentence_len,2*self.hidden_nums]
                contact = tf.nn.dropout(contact, self.dropout_keep_prob)
                s = tf.shape(contact)
                contact_reshape = tf.reshape(contact,
                                             shape=[-1, 2 * self.hidden_nums],
                                             name='contact')
                W_lstm = tf.get_variable(
                    'W_lstm',
                    dtype=tf.float32,
                    initializer=tf.contrib.layers.xavier_initializer(),
                    shape=[2 * self.hidden_nums, self.tag_nums],
                    trainable=True)
                b_lstm = tf.get_variable(
                    'b_lstm', initializer=tf.zeros(shape=[self.tag_nums]))
                p = tf.nn.relu(tf.matmul(contact_reshape, W_lstm) + b_lstm)
                #logit= tf.reshape(p,shape=[-1,s[1],self.tag_nums],name='omit_matrix')
                #logit= tf.reshape(p,shape=[-1,s[1],self.sentence_len],name='omit_matrix')
                self.logit = tf.reshape(
                    p,
                    shape=[-1, self.sentence_len, self.tag_nums],
                    name='omit_matrix')

            with tf.name_scope("crf"):
                log_likelihood, transition_matrix = crf.crf_log_likelihood(
                    self.logit, labels, sequence_lengths=self.sequence_lengths)
                self.cost = -tf.reduce_mean(log_likelihood)
                self.crf_labels, _ = crf.crf_decode(
                    self.logit,
                    transition_matrix,
                    sequence_length=self.sequence_lengths
                )  #返回的第一个值:decode_tags: A [batch_size, max_seq_len]
示例#20
0
    def bils_crf(self):

        with tf.device('/cpu:0'), tf.name_scope('embedding'):
            embedding = tf.Variable(tf.truncated_normal(
                [pm.vacab_size, pm.embedding_size], -0.25, 0.25),
                                    name='embedding')
            embeding_input = tf.nn.embedding_lookup(embedding, self.input_x)
            self.embedding = tf.nn.dropout(embeding_input,
                                           keep_prob=self.keep_pro)

        with tf.name_scope('Cell'):
            cell_fw = tf.nn.rnn_cell.LSTMCell(pm.hidden_dim,
                                              state_is_tuple=True)
            Cell_fw = tf.contrib.rnn.DropoutWrapper(cell_fw, self.keep_pro)

            cell_bw = tf.nn.rnn_cell.LSTMCell(pm.hidden_dim,
                                              state_is_tuple=True)
            Cell_bw = tf.contrib.rnn.DropoutWrapper(cell_bw, self.keep_pro)

        with tf.name_scope('biLSTM'):
            outputs, _ = tf.nn.bidirectional_dynamic_rnn(
                cell_fw=Cell_fw,
                cell_bw=Cell_bw,
                inputs=self.embedding,
                sequence_length=self.seq_length,
                dtype=tf.float32)
            outputs = tf.concat(outputs, 2)

        # with tf.name_scope('GCN'):

        with tf.name_scope('output'):
            s = tf.shape(outputs)
            output = tf.reshape(outputs, [-1, 2 * pm.hidden_dim])
            output = tf.layers.dense(output, pm.num_tags)
            output = tf.contrib.layers.dropout(output, self.keep_pro)
            self.logits = tf.reshape(output, [-1, s[1], pm.num_tags])

        with tf.name_scope('crf'):
            self.log_likelihood, self.transition_params = crf_log_likelihood(
                inputs=self.logits,
                tag_indices=self.input_y,
                sequence_lengths=self.seq_length)
            # log_likelihood是对数似然函数,transition_params是转移概率矩阵
            # crf_log_likelihood{inputs:[batch_size,max_seq_length,num_tags],
            # tag_indices:[batchsize,max_seq_length],
            # sequence_lengths:[real_seq_length]
            # transition_params: A [num_tags, num_tags] transition matrix
            # log_likelihood: A scalar containing the log-likelihood of the given sequence of tag indices.

        with tf.name_scope('loss'):
            self.loss = tf.reduce_mean(-self.log_likelihood)  #最大似然取负,使用梯度下降

        with tf.name_scope('optimizer'):
            # 退化学习率 learning_rate = lr*(0.9**(global_step/10);staircase=True表示每decay_steps更新梯度
            # learning_rate = tf.train.exponential_decay(self.config.lr, global_step=self.global_step,
            # decay_steps=10, decay_rate=self.config.lr_decay, staircase=True)
            # optimizer = tf.train.AdamOptimizer(learning_rate)
            # self.optimizer = optimizer.minimize(self.loss, global_step=self.global_step) #global_step 自动+1
            # no.2
            optimizer = tf.train.AdamOptimizer(pm.learning_rate)
            gradients, variables = zip(*optimizer.compute_gradients(
                self.loss))  # 计算变量梯度,得到梯度值,变量
            gradients, _ = tf.clip_by_global_norm(gradients, pm.clip)
            # 对g进行l2正则化计算,比较其与clip的值,如果l2后的值更大,让梯度*(clip/l2_g),得到新梯度
            self.optimizer = optimizer.apply_gradients(
                zip(gradients, variables), global_step=self.global_step)
            # global_step 自动+1

        def build_gcn(self, input_shape):
            features_dim = self.fea_dim
            self.wei = self.add_variable(name='wei',
                                         shape=[features_dim, self.out_dim],
                                         initializer=tf.zeros_initializer())

        def call_gcn(self, inputs, support):
            # inputs = np.array(inputs, dtype=float)
            # support = np.array(support, dtype=float)
            inputs = tf.cast(inputs, dtype=tf.float32)
            support = tf.cast(support, dtype=tf.float32)
            H_t = tf.matmul(support, inputs)

            output = tf.matmul(H_t, self.wei)
            return tf.sigmoid(output)
示例#21
0
 def loss_op(self):
     log_likelihood, self.transition_params = crf_log_likelihood(
         inputs=self.logits,
         tag_indices=self.labels,
         sequence_lengths=self.sequence_lengths)
     self.loss = -tf.reduce_mean(log_likelihood)
示例#22
0
# -*- coding: utf-8 -*-
示例#23
0
    def build_tagging_graph(self, inputs, hidden_layers, channels, num_tags,
                            use_crf, lamd, dropout_emb, dropout_hidden,
                            kernel_size, use_bn, use_wn, active_type):
        """
        Build a deep neural model for sequence tagging.
        """
        stag_ids = tf.placeholder(dtype=INT_TYPE,
                                  shape=[None, None],
                                  name='stag_ids')
        seq_lengths = tf.placeholder(dtype=INT_TYPE,
                                     shape=[None],
                                     name='seq_lengths')

        # Default is not train.
        is_train = tf.placeholder(dtype=tf.bool, shape=[], name='is_train')

        masks = tf.cast(tf.sequence_mask(seq_lengths), FLOAT_TYPE)

        # Dropout on embedding output.
        if dropout_emb:
            inputs = tf.cond(is_train,
                             lambda: tf.nn.dropout(inputs, 1 - dropout_emb),
                             lambda: inputs)

        hidden_output = inputs
        pre_channels = inputs.get_shape()[-1].value
        for i in range(hidden_layers):

            k = kernel_size
            cur_channels = channels[i]
            filter_w = tf.get_variable('filter_w_%d' % i,
                                       shape=[k, pre_channels, cur_channels],
                                       dtype=FLOAT_TYPE)
            filter_v = tf.get_variable('filter_v_%d' % i,
                                       shape=[k, pre_channels, cur_channels],
                                       dtype=FLOAT_TYPE)
            bias_b = tf.get_variable(
                'bias_b_%d' % i,
                shape=[cur_channels],
                initializer=tf.zeros_initializer(dtype=FLOAT_TYPE))
            bias_c = tf.get_variable(
                'bias_c_%d' % i,
                shape=[cur_channels],
                initializer=tf.zeros_initializer(dtype=FLOAT_TYPE))

            # Weight normalization.
            if use_wn:
                epsilon = 1e-12
                g_w = tf.get_variable('g_w_%d' % i,
                                      shape=[k, 1, cur_channels],
                                      dtype=FLOAT_TYPE)
                g_v = tf.get_variable('g_v_%d' % i,
                                      shape=[k, 1, cur_channels],
                                      dtype=FLOAT_TYPE)
                # Perform wn
                filter_w = g_w * filter_w / (tf.sqrt(
                    tf.reduce_sum(filter_w**2, 1, keep_dims=True)) + epsilon)
                filter_v = g_v * filter_v / (tf.sqrt(
                    tf.reduce_sum(filter_v**2, 1, keep_dims=True)) + epsilon)

            w = tf.nn.conv1d(hidden_output, filter_w, 1, 'SAME') + bias_b
            v = tf.nn.conv1d(hidden_output, filter_v, 1, 'SAME') + bias_c

            if use_bn:
                w = layers.batch_norm(inputs=v,
                                      decay=0.9,
                                      is_training=is_train,
                                      center=True,
                                      scale=True,
                                      scope='BatchNorm_w_%d' % i)
                v = layers.batch_norm(inputs=w,
                                      decay=0.9,
                                      is_training=is_train,
                                      center=True,
                                      scale=True,
                                      scope='BatchNorm_v_%d' % i)

            if active_type == 'glu':
                hidden_output = w * tf.nn.sigmoid(v)
            elif active_type == 'relu':
                hidden_output = tf.nn.relu(w)
            elif active_type == 'gtu':
                hidden_output = tf.tanh(w) * tf.nn.sigmoid(v)
            elif active_type == 'tanh':
                hidden_output = tf.tanh(w)
            elif active_type == 'linear':
                hidden_output = w
            elif active_type == 'bilinear':
                hidden_output = w * v

            # Mask paddings.
            hidden_output = hidden_output * tf.expand_dims(masks, -1)
            # Dropout on hidden output.
            if dropout_hidden:
                hidden_output = tf.cond(
                    is_train,
                    lambda: tf.nn.dropout(hidden_output, 1 - dropout_hidden),
                    lambda: hidden_output)

            pre_channels = cur_channels

        # Un-scaled log probabilities.
        scores = layers.fully_connected(hidden_output, num_tags, tf.identity)

        if use_crf:
            cost, transitions = crf.crf_log_likelihood(
                inputs=scores,
                tag_indices=stag_ids,
                sequence_lengths=seq_lengths)
            cost = -tf.reduce_mean(cost)
        else:
            reshaped_scores = tf.reshape(scores, [-1, num_tags])
            reshaped_stag_ids = tf.reshape(stag_ids, [-1])
            real_distribution = layers.one_hot_encoding(
                reshaped_stag_ids, num_tags)
            cost = tf.nn.softmax_cross_entropy_with_logits(
                reshaped_scores, real_distribution)
            cost = tf.reduce_sum(
                tf.reshape(cost, tf.shape(stag_ids)) * masks) / tf.cast(
                    tf.shape(inputs)[0], FLOAT_TYPE)

        # Calculate L2 penalty.
        l2_penalty = 0
        if lamd > 0:
            for v in tf.trainable_variables():
                if '/B:' not in v.name and '/biases:' not in v.name:
                    l2_penalty += lamd * tf.nn.l2_loss(v)
        train_cost = cost + l2_penalty

        # Summary cost.
        tf.summary.scalar('cost', cost)

        summaries = tf.summary.merge_all()

        update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
        if update_ops:
            updates = tf.group(*update_ops)
            with tf.control_dependencies([updates]):
                cost = tf.identity(cost)

        return stag_ids, seq_lengths, is_train, cost, train_cost, scores, summaries
示例#24
0
    def __init__(self,
                 config,
                 features,
                 dropout_keep_prob,
                 init_embeddings=None):

        super(AttendedDictModel).__init__()
        input_ids = features["input_ids"]
        input_dicts = features["input_dicts"]
        seq_length = features["seq_length"]
        label_ids = features["label_ids"]

        self.label_ids = label_ids
        self.dict = input_dicts
        self.seq_length = seq_length

        dict_shape = model_utils.get_shape_list(input_dicts, expected_rank=3)
        self.dict_dim = dict_shape[2]

        x, batch_size, feat_size = model_utils.input_embedding(
            input_ids, config, init_embeddings=init_embeddings)
        x = tf.reshape(x, [batch_size, -1, feat_size * config.embedding_size])
        x = tf.nn.dropout(x, dropout_keep_prob)

        with tf.variable_scope('character'):
            (forward_output,
             backword_output), _ = tf.nn.bidirectional_dynamic_rnn(
                 cell_fw=model_utils.multi_lstm_cell(config.hidden_size,
                                                     config.num_hidden_layers,
                                                     dropout_keep_prob),
                 cell_bw=model_utils.multi_lstm_cell(config.hidden_size,
                                                     config.num_hidden_layers,
                                                     dropout_keep_prob),
                 inputs=x,
                 sequence_length=self.seq_length,
                 dtype=tf.float32)
            output = tf.concat([forward_output, backword_output], axis=2)

        with tf.variable_scope('dict_attention'):
            dict_attention = layers.fully_connected(inputs=output,
                                                    num_outputs=self.dict_dim,
                                                    activation_fn=tf.sigmoid)
            # [B, L, D]
            self.dict = tf.cast(self.dict, dtype=tf.float32)
            attend_dict = tf.multiply(self.dict, dict_attention)

        with tf.variable_scope('dict'):
            (forward_output,
             backword_output), _ = tf.nn.bidirectional_dynamic_rnn(
                 cell_fw=model_utils.multi_lstm_cell(config.hidden_size,
                                                     config.num_hidden_layers,
                                                     dropout_keep_prob),
                 cell_bw=model_utils.multi_lstm_cell(config.hidden_size,
                                                     config.num_hidden_layers,
                                                     dropout_keep_prob),
                 inputs=attend_dict,
                 sequence_length=self.seq_length,
                 dtype=tf.float32)
            dict_output = tf.concat([forward_output, backword_output], axis=2)

        with tf.variable_scope('output'):
            output = tf.concat([dict_output, output], axis=2)
            scores = layers.fully_connected(inputs=output,
                                            num_outputs=config.num_classes,
                                            activation_fn=None)
            transition_param = tf.get_variable(
                "transitions", [config.num_classes, config.num_classes])
            self.prediction, _ = crf.crf_decode(scores, transition_param,
                                                self.seq_length)

        with tf.variable_scope('loss'):
            # crf
            if config.multitag:
                self.label_ids = tf.cast(self.label_ids, dtype=tf.bool)
                self.log_likelihood, _ = model_utils.crf_multitag_log_likelihood(
                    scores, self.label_ids, self.seq_length, transition_param)
            else:
                self.log_likelihood, _ = crf.crf_log_likelihood(
                    scores, self.label_ids, self.seq_length, transition_param)

            self.loss = tf.reduce_mean(-self.log_likelihood)
示例#25
0
    def __init__(self, config, char_embeddings):

        #config
        self.config = config
        self.lr = config.lr
        self.l2_lamda = config.l2_lamda
        self.clip = config.clip

        self.char_dim = config.char_dim
        self.lstm_dim = config.lstm_dim
        self.seg_dim = config.seg_dim
        self.num_tags = config.num_tags
        self.num_chars = config.num_chars
        self.num_segs = config.num_segs

        #placeholder
        self.char_inputs = tf.placeholder(dtype=tf.int32,
                                          shape=[None, None],
                                          name='CharInputs')
        self.seg_inputs = tf.placeholder(dtype=tf.int32,
                                         shape=[None, None],
                                         name='SegInputs')
        self.tags = tf.placeholder(dtype=tf.int32,
                                   shape=[None, None],
                                   name='Tags')
        self.dropout_keep = tf.placeholder(dtype=tf.float32,
                                           name='Dropout_keep')

        #shape
        #[batch_size]
        self.lengths = tf.reduce_sum(
            tf.cast(
                tf.greater(self.char_inputs, tf.zeros_like(self.char_inputs)),
                tf.int32), 1)
        self.batch_size = tf.shape(self.char_inputs)[0]
        self.max_length = tf.shape(self.char_inputs)[1]

        #embedding_layer
        with tf.variable_scope("embedding_layer"):
            if char_embeddings is None:
                self.char_embeddings = tf.get_variable(
                    name='char_embeddings',
                    shape=[self.num_chars, self.char_dim],
                    dtype=tf.float32)
            else:
                self.char_embeddings = tf.Variable(char_embeddings,
                                                   name='char_embeddings',
                                                   dtype=tf.float32)
            char_inputs = tf.nn.embedding_lookup(self.char_embeddings,
                                                 self.char_inputs)

            if self.config.seg_dim > 0:
                self.seg_embeddings = tf.get_variable(
                    name='seg_embeddings', shape=[self.num_segs, self.seg_dim])
                seg_inputs = tf.nn.embedding_lookup(self.seg_embeddings,
                                                    self.seg_inputs)
                inputs = tf.concat([char_inputs, seg_inputs], axis=-1)
            else:
                inputs = char_inputs

        #dropout
        lstm_inputs = tf.nn.dropout(inputs, keep_prob=self.dropout_keep)

        #bilistm_layer
        with tf.variable_scope("bilstm_layer"):
            cell_fw = rnn.LSTMCell(num_units=self.lstm_dim)
            cell_bw = rnn.LSTMCell(num_units=self.lstm_dim)

            (output_fw, output_bw), _ = tf.nn.bidirectional_dynamic_rnn(
                cell_fw=cell_fw,
                cell_bw=cell_bw,
                inputs=lstm_inputs,
                sequence_length=self.lengths,
                dtype=tf.float32)
            lstm_outputs = tf.concat([output_fw, output_bw], axis=2)

        #project_layer
        self.logits = layers.fully_connected(inputs=lstm_outputs,
                                             num_outputs=self.num_tags,
                                             activation_fn=None,
                                             scope='project_layer')

        #crf_layer
        with tf.variable_scope("crf_layer"):
            log_likelihood, self.transition_params = crf.crf_log_likelihood(
                inputs=self.logits,
                tag_indices=self.tags,
                sequence_lengths=self.lengths)

        self.loss = tf.reduce_mean(-log_likelihood)
        #summary
        tf.summary.scalar("loss", self.loss)

        #train_op
        self.global_step = tf.Variable(0, trainable=False)
        optimizer = tf.train.AdamOptimizer(self.lr)
        tvars = tf.trainable_variables()
        grads, _ = tf.clip_by_global_norm(tf.gradients(self.loss, tvars),
                                          self.clip)
        self.train_op = optimizer.apply_gradients(zip(grads, tvars),
                                                  self.global_step)
示例#26
0
文件: model.py 项目: marvel2120/NER
    def run(self):
        data_loader = DataLoader(self.TRAIN_DATA_PATH, self.TRAIN_LABEL_PATH,
                                 self.TEST_DATA_PATH, self.TEST_LABEL_PATH)
        x_data, y_data = data_loader.get_train_data()
        test_x_data, test_y_data = data_loader.get_test_data()
        vocab_size = data_loader.vocab_size

        test_gt_list = []
        test_res_list = []

        graph = tf.Graph()

        with graph.as_default():
            words = tf.placeholder(tf.int32, shape=[1, None], name="words")
            labels = tf.placeholder(tf.int32, shape=[1, None], name="labels")
            sequence_lengths = tf.placeholder(tf.int32,
                                              shape=[None],
                                              name="sequence_lengths")

            embeddings = tf.Variable(tf.random_uniform(
                [vocab_size, self.WORD_DIM], -1.0, 1.0),
                                     name="embeddings_o")
            embeddings = tf.nn.l2_normalize(embeddings,
                                            1,
                                            name="embeddings_norm")

            word_embeddings = tf.nn.embedding_lookup(embeddings,
                                                     words,
                                                     name="word_embeddings")

            cell_fw = LSTMCell(self.HIDDEN_DIM)
            cell_bw = LSTMCell(self.HIDDEN_DIM)

            (output_fw_seq,
             output_bw_seq), _ = tf.nn.bidirectional_dynamic_rnn(
                 cell_fw=cell_fw,
                 cell_bw=cell_bw,
                 inputs=word_embeddings,
                 dtype="float32")
            output = tf.concat([output_fw_seq, output_bw_seq], axis=-1)

            W = tf.get_variable(
                name="W",
                shape=[2 * self.HIDDEN_DIM, self.NUM_TAG],
                initializer=tf.contrib.layers.xavier_initializer(),
                dtype=tf.float32)
            b = tf.get_variable(name="b",
                                shape=[self.NUM_TAG],
                                initializer=tf.zeros_initializer(),
                                dtype=tf.float32)
            s = tf.shape(output)
            output = tf.reshape(output, [-1, 2 * self.HIDDEN_DIM])
            pred = tf.matmul(output, W) + b
            logits = tf.reshape(pred, [-1, s[1], self.NUM_TAG], name="logits")
            transition_params_copy = tf.get_variable(
                name="transition_params",
                shape=[self.NUM_TAG, self.NUM_TAG],
                initializer=tf.zeros_initializer(),
                dtype=tf.float32)
            log_likelihood, transition_params = crf_log_likelihood(
                inputs=logits,
                tag_indices=labels,
                sequence_lengths=sequence_lengths)
            transition_params_copy = transition_params
            loss = -tf.reduce_mean(log_likelihood)
            optimizer = tf.train.AdamOptimizer(
                learning_rate=0.001).minimize(loss)
            saver = tf.train.Saver(max_to_keep=1)

            with tf.Session(graph=graph) as sess:
                sess.run(tf.global_variables_initializer())
                for epoch in range(self.EPOCH_NUM):
                    print("epoch: ", epoch)
                    batch = 0
                    self.BATCH_NUM = 0
                    while (1):
                        batch += 1
                        batch_x, batch_y = self.next_batch(x_data, y_data)
                        self.BATCH_NUM = self.BATCH_NUM + 1
                        seq_len = 0
                        for i in batch_x:
                            seq_len += len(i)
                        temp_seq = []
                        temp_seq.append(seq_len)
                        seq_len = np.array(temp_seq)
                        reshape_x = []
                        for x in batch_x:
                            reshape_x.extend(x)
                        reshape_x = np.array(reshape_x)
                        reshape_y = []
                        for x in batch_y:
                            reshape_y.extend(x)
                        reshape_y = np.array(reshape_y)
                        reshape_x = reshape_x.reshape(1, -1)
                        reshape_y = reshape_y.reshape(1, -1)
                        feed_dict = {
                            words: reshape_x,
                            labels: reshape_y,
                            sequence_lengths: seq_len
                        }
                        train_loss, _ = sess.run([loss, optimizer], feed_dict)
                        print("batch: ", batch)
                        print("train_loss: ", train_loss)
                        saver.save(sess,
                                   'ckpt/BiLSTM_CRF.ckpt',
                                   global_step=batch)
                        if self.check_end == 1:
                            self.check_end = 0
                            break

                print("testing----------------------")
                result_file = open("resultnew.txt", "w")
                for i in range(test_x_data.shape[0]):
                    seq_len = np.array([test_x_data[i].shape[0]])
                    batch_xdata = test_x_data[i].reshape(1, -1)
                    batch_ydata = test_y_data[i].reshape(1, -1)
                    for ydata in test_y_data[i]:
                        test_gt_list.append(ydata)
                    feed_dict = {
                        words: batch_xdata,
                        labels: batch_ydata,
                        sequence_lengths: seq_len
                    }
                    temp_logits, temp_transition_params = sess.run(
                        [logits, transition_params], feed_dict=feed_dict)
                    viterbi_seq, _ = viterbi_decode(
                        temp_logits[0][:seq_len[0]], temp_transition_params)
                    for pred_data in viterbi_seq:
                        test_res_list.append(pred_data)
                    result_file.write(str(viterbi_seq))
                    result_file.write('\n')

        if len(test_gt_list) != len(test_res_list):
            print("test error!")

        precision, recall, f1 = self.evaluation(test_gt_list, test_res_list)
        print("Average Precision: ", precision)
        print("Average Recall: ", recall)
        print("Average F1: ", f1)
示例#27
0
 def loss_op(self):
     log_likelihood, self.transition_params = crf_log_likelihood(inputs=self.logits,
                                                                 tag_indices=self.labels,
                                                                 sequence_lengths=self.sequence_lengths)
     self.loss = tf.reduce_mean(-log_likelihood)
     tf.summary.scalar("loss", self.loss)
示例#28
0
def sequence_labeler(hidden,
                     targets,
                     n_targets,
                     config,
                     pad_id,
                     multilabel=False,
                     train=False,
                     reuse=None,
                     pool_idx=None,
                     **kwargs):
    """
    An Attention based sequence labeler model.

    In the case of unidirectional base models such as GPT this model takes the output of the pre-trained model,
    applies an additional randomly initialised multihead attention block, with residuals on top.
    The extra attention is not future masked to allow the model to label sequences based on context in both directions.
    The representations fed into this model are necessarily future masked because a language modelling loss is the
    original objective of the featurizer.

    For bidirectional base models we apply the crf model directly to the output of the base model.

    :param hidden: The output of the featurizer. [batch_size, sequence_length, embed_dim]
    :param targets: The placeholder representing the sequence labeling targets. [batch_size, sequence_length]
    :param n_targets: A python int containing the number of classes that the model should be learning to predict over.
    :param dropout_placeholder:
    :param config: A config object, containing all parameters for the featurizer.
    :param train: If this flag is true, dropout and losses are added to the graph.
    :param reuse: Should reuse be set within this scope.
    :param kwargs: Spare arguments.
    :return: dict containing:
        "logits": The un-normalised log probabilities of each class being in each location. For usable predictions,
            sampling from this distribution is not sufficient and a viterbi decoding method should be used.
        "losses": The negative log likelihood for the sequence targets.
        "predict_params": A dictionary of params to be fed to the viterbi decode function.
    """
    with tf.variable_scope("sequence-labeler", reuse=reuse):
        if targets is not None:
            targets = tf.cast(targets, dtype=tf.int32)

        nx = config.n_embed
        if config.use_auxiliary_info:
            nx += config.n_context_embed

        def seq_lab_internal(hidden):
            if config.base_model.is_bidirectional:
                n = hidden
            else:
                attn_fn = functools.partial(
                    attn,
                    scope="seq_label_attn",
                    n_state=nx,
                    n_head=config.seq_num_heads,
                    resid_pdrop=config.resid_p_drop,
                    attn_pdrop=config.attn_p_drop,
                    train=train,
                    scale=False,
                    mask=False,
                )
                n = norm(attn_fn(hidden) + hidden, "seq_label_residual")
            flat_logits = tf.layers.dense(n, n_targets)
            logits = tf.reshape(
                flat_logits, tf.concat([tf.shape(hidden)[:2], [n_targets]], 0))
            return logits

        with tf.variable_scope("seq_lab_attn"):
            if config.low_memory_mode and train:
                seq_lab_internal = recompute_grad(seq_lab_internal,
                                                  use_entire_scope=True)
            logits = seq_lab_internal(hidden)

        class_weights = kwargs.get("class_weights")
        if class_weights is not None and train:
            class_weights = tf.reshape(class_weights, [1, 1, -1])
            one_hot_class_weights = class_weights * tf.one_hot(targets,
                                                               depth=n_targets)
            per_token_weights = tf.reduce_sum(one_hot_class_weights,
                                              axis=-1,
                                              keep_dims=True)
            logits = class_reweighting(per_token_weights)(logits)

        log_likelihood = 0.0

        default_lengths = kwargs.get("max_length") * tf.ones(
            tf.shape(hidden)[0], dtype=tf.int32)
        if pool_idx is None:
            pool_idx = default_lengths
        else:
            pool_idx = tf.where(
                tf.equal(pool_idx, 0),
                default_lengths,
                tf.cast(pool_idx, dtype=tf.int32),
            )

        with tf.device("CPU:0"):
            if multilabel:
                transition_params = []
                logits_individual = tf.unstack(logits, n_targets, axis=-1)
                if targets is not None:
                    targets_individual = tf.unstack(targets,
                                                    n_targets,
                                                    axis=-1)
                logits = []
                for i in range(n_targets):
                    transition_params.append(
                        tf.get_variable("Transition_matrix_{}".format(i),
                                        shape=[2, 2]))
                    logits.append(
                        tf.stack(
                            (logits_individual[pad_id], logits_individual[i]),
                            axis=-1))
                    if targets is not None and i != pad_id:
                        log_likelihood += crf_log_likelihood(
                            logits[-1],
                            targets_individual[i],
                            pool_idx,
                            transition_params=transition_params[-1],
                        )[0]
                logits = tf.stack(logits, axis=-1)
            else:
                transition_params = tf.get_variable(
                    "Transition_matrix", shape=[n_targets, n_targets])
                if targets is not None:
                    log_likelihood, _ = crf_log_likelihood(
                        logits,
                        targets,
                        pool_idx,
                        transition_params=transition_params)

        return {
            "logits": logits,
            "losses": -log_likelihood,
            "predict_params": {
                "transition_matrix": transition_params
            },
        }
示例#29
0
    def __init__(self, is_trainning, config):
        self.batch_size = batch_size = config.batch_size
        self.num_steps = num_steps = config.num_steps
        self.num_classes = num_classes = config.num_classes
        self._logits = []
        size = config.hidden_size
        vocab_size = config.vocab_size

        self._input_data = tf.placeholder(tf.int32, [batch_size, num_steps])
        self._targets = tf.placeholder(tf.int32, [batch_size, num_steps])

        self.initializer = initializers.xavier_initializer()

        with tf.device("/cpu:0"):
            embedding = tf.get_variable("embedding", [vocab_size, size],
                                        dtype=data_type())
            inputs = tf.nn.embedding_lookup(embedding, self._input_data)

        if FLAGS.cnn_option != 1:
            with tf.variable_scope("CNN"):
                reshaped_inputs = tf.reshape(inputs,
                                             [batch_size, num_steps, -1, 1])
                # reshaped_inputs.shape[2] is actually 200
                filter_weight = tf.get_variable(
                    'weights', [3, reshaped_inputs.shape[2], 1, 1],
                    initializer=tf.truncated_normal_initializer(stddev=0.1))
                biases = tf.get_variable(
                    'biases', [1], initializer=tf.constant_initializer(0.0))

                conv = tf.nn.conv2d(reshaped_inputs,
                                    filter_weight,
                                    strides=[1, 1, 1, 1],
                                    padding='SAME')
                relu = tf.nn.relu(tf.nn.bias_add(conv, biases))

                relu = tf.reshape(relu, [batch_size, num_steps, -1])

        # get the length of each sample
        self.length = tf.reduce_sum(tf.sign(self._input_data),
                                    reduction_indices=1)
        self.length = tf.cast(self.length, tf.int32)

        if FLAGS.cnn_option == 2:
            inputs1 = relu
            inputs = tf.concat([inputs, inputs1], 2)
            size = size * 2
# ========================= CNN BILSTM

        if FLAGS.cnn_option == 3:
            inputs1 = relu
            if is_trainning and config.keep_prob < 1:
                inputs1 = tf.nn.dropout(relu, config.keep_prob)

            lstm_bw_cell1 = tf.nn.rnn_cell.BasicLSTMCell(size,
                                                         forget_bias=1.0,
                                                         state_is_tuple=True)
            lstm_fw_cell1 = tf.nn.rnn_cell.BasicLSTMCell(size,
                                                         forget_bias=1.0,
                                                         state_is_tuple=True)
            if is_trainning and config.keep_prob < 1:
                lstm_fw_cell1 = tf.nn.rnn_cell.DropoutWrapper(
                    cell=lstm_fw_cell1,
                    input_keep_prob=1.0,
                    output_keep_prob=config.keep_prob)
                lstm_bw_cell1 = tf.nn.rnn_cell.DropoutWrapper(
                    cell=lstm_bw_cell1,
                    input_keep_prob=1.0,
                    output_keep_prob=config.keep_prob)
            # 多层lstm单元叠加起来
            cell_fw1 = tf.nn.rnn_cell.MultiRNNCell([lstm_fw_cell1] *
                                                   config.num_layers,
                                                   state_is_tuple=True)
            cell_bw1 = tf.nn.rnn_cell.MultiRNNCell([lstm_bw_cell1] *
                                                   config.num_layers,
                                                   state_is_tuple=True)

            self._initial_state_fw1 = initial_state_fw1 = cell_fw1.zero_state(
                batch_size, data_type())
            self._initial_state_bw1 = initial_state_bw1 = cell_bw1.zero_state(
                batch_size, data_type())

            inputs1 = tf.unstack(inputs1, num_steps, 1)

            # 此处可以不要sequence length参数 因为卷积之后谁也说不准
            outputs1, _, _ = tf.contrib.rnn.static_bidirectional_rnn(
                cell_fw1,
                cell_bw1,
                inputs1,
                initial_state_fw=initial_state_fw1,
                initial_state_bw=initial_state_bw1,
                dtype=tf.float32,
                scope="cnn_rnn")

            output1 = tf.reshape(tf.concat(outputs1, 1), [-1, size * 2])


# ========================= end

        if is_trainning and config.keep_prob < 1:
            inputs = tf.nn.dropout(inputs, config.keep_prob)

        lstm_bw_cell = tf.nn.rnn_cell.BasicLSTMCell(size,
                                                    forget_bias=1.0,
                                                    state_is_tuple=True)
        lstm_fw_cell = tf.nn.rnn_cell.BasicLSTMCell(size,
                                                    forget_bias=1.0,
                                                    state_is_tuple=True)
        if is_trainning and config.keep_prob < 1:
            lstm_fw_cell = tf.nn.rnn_cell.DropoutWrapper(
                cell=lstm_fw_cell,
                input_keep_prob=1.0,
                output_keep_prob=config.keep_prob)
            lstm_bw_cell = tf.nn.rnn_cell.DropoutWrapper(
                cell=lstm_bw_cell,
                input_keep_prob=1.0,
                output_keep_prob=config.keep_prob)
        # 多层lstm单元叠加起来
        cell_fw = tf.nn.rnn_cell.MultiRNNCell([lstm_fw_cell] *
                                              config.num_layers,
                                              state_is_tuple=True)
        cell_bw = tf.nn.rnn_cell.MultiRNNCell([lstm_bw_cell] *
                                              config.num_layers,
                                              state_is_tuple=True)

        self._initial_state_fw = initial_state_fw = cell_fw.zero_state(
            batch_size, data_type())
        self._initial_state_bw = initial_state_bw = cell_bw.zero_state(
            batch_size, data_type())

        # get the length of each sample
        # self.length = tf.reduce_sum(tf.sign(self._input_data), reduction_indices=1)
        # self.length = tf.cast(self.length, tf.int32)

        inputs = tf.unstack(inputs, num_steps, 1)

        outputs, _, _ = tf.contrib.rnn.static_bidirectional_rnn(
            cell_fw,
            cell_bw,
            inputs,
            initial_state_fw=initial_state_fw,
            initial_state_bw=initial_state_bw,
            dtype=tf.float32,
            sequence_length=self.length)

        # outputs = []
        state_fw = self._initial_state_fw
        state_bw = self._initial_state_bw
        # with tf.variable_scope("RNN"):
        # 	for time_step in range(num_steps):
        # 		if time_step > 0: tf.get_variable_scope().reuse_variables()
        # 		(cell_output, state) = cell(inputs[:, time_step, :], state)
        # 		outputs.append(cell_output)
        # output = tf.reshape(tf.concat(outputs,1 ), [-1, size])
        output = tf.reshape(tf.concat(outputs, 1), [-1, size * 2])

        if FLAGS.cnn_option == 3:
            size = size * 2
            final_output = tf.concat([output, output1], 1)

        weight = tf.get_variable("weight", [size * 2, 5], dtype=data_type())
        bias = tf.get_variable("bias", [5], dtype=data_type())
        if FLAGS.cnn_option != 3:
            logits = tf.matmul(output, weight) + bias
        else:
            logits = tf.matmul(final_output, weight) + bias

        self.tags_scores = tf.reshape(logits,
                                      [batch_size, num_steps, num_classes])

        small = -1000.0
        # pad logits for crf loss
        start_logits = tf.concat([
            small * tf.ones(shape=[self.batch_size, 1, self.num_classes]),
            tf.zeros(shape=[self.batch_size, 1, 1])
        ],
                                 axis=-1)
        pad_logits = tf.cast(
            small * tf.ones([self.batch_size, self.num_steps, 1]), tf.float32)
        logits = tf.concat([self.tags_scores, pad_logits], axis=-1)
        logits = tf.concat([start_logits, logits], axis=1)
        targets = tf.concat([
            tf.cast(self.num_classes * tf.ones([self.batch_size, 1]),
                    tf.int32), self._targets
        ],
                            axis=-1)

        self.trans = tf.get_variable(
            "transitions",
            shape=[self.num_classes + 1, self.num_classes + 1],
            initializer=self.initializer)

        log_likelihood, self.trans = crf_log_likelihood(
            inputs=logits,
            tag_indices=targets,
            transition_params=self.trans,
            sequence_lengths=self.length + 1)

        self.loss = loss = -tf.reduce_mean(log_likelihood)

        self._tg = self.tags_scores
        self._l = self.length
        self._tr = self.trans
        # loss
        # log_likelihood, self.transition_params = tf.contrib.crf.crf_log_likelihood(inputs=self.tags_scores,
        # 	tag_indices=self._targets,
        # 	sequence_lengths=self.length)
        # self.loss = loss = -tf.reduce_mean(log_likelihood)

        # loss = tf.contrib.legacy_seq2seq.sequence_loss_by_example(
        # 	[logits],
        # 	[tf.reshape(self._targets,[-1])],
        # 	[tf.ones([batch_size * num_steps], dtype=data_type())])

        # self._cost = cost = tf.reduce_sum(loss) / batch_size
        self._cost = cost = loss
        self._final_state_fw = state_fw
        self._final_state_bw = state_bw
        # 只在训练模型的时候定义BP操作
        if not is_trainning: return

        self._learning_rate = tf.Variable(0.0, trainable=False)
        trainable_variables = tf.trainable_variables()

        grads, _ = tf.clip_by_global_norm(
            tf.gradients(loss, trainable_variables), config.max_grad_norm)

        # 梯度下降优化,指定学习速率
        optimizer = tf.train.GradientDescentOptimizer(self._learning_rate)
        self._train_op = optimizer.apply_gradients(
            zip(grads, trainable_variables))
        # self._train_op = optimizer.minimize(loss)

        self._new_learning_rate = tf.placeholder(tf.float32,
                                                 shape=[],
                                                 name="new_learning_rate")
        self._learning_rate_update = tf.assign(self._learning_rate,
                                               self._new_learning_rate)