def crf_loss(y, y_, transitions, nums_tags, batch_size): tag_scores = y nums_steps = len(tf.unstack(tag_scores, axis=1)) masks = tf.cast(tf.sign(y_), dtype=tf.float32) lengths = tf.reduce_sum(tf.sign(y_), axis=1) tag_ids = y_ b_id = tf.stack([[nums_tags]] * batch_size) #e_id = tf.pack([[0]] * batch_size) padded_tag_ids = tf.concat(axis=1, values=[b_id, tag_ids]) idx_tag_ids = tf.stack( [tf.slice(padded_tag_ids, [0, i], [-1, 2]) for i in range(nums_steps)], axis=1) tag_ids = tf.contrib.layers.one_hot_encoding(tag_ids, nums_tags) point_score = tf.reduce_sum(tag_scores * tag_ids, axis=2) point_score *= masks #Save for future #trans_score = tf.gather_nd(transitions, idx_tag_ids) trans_sh = tf.stack(transitions.get_shape()) trans_sh = tf.cumprod(trans_sh, exclusive=True, reverse=True) flat_tag_ids = tf.reduce_sum(trans_sh * idx_tag_ids, axis=2) trans_score = tf.gather(tf.reshape(transitions, [-1]), flat_tag_ids) ## #extend_mask = tf.concat(1, [tf.ones([batch_size, 1]), masks]) extend_mask = masks trans_score *= extend_mask target_path_score = tf.reduce_sum(point_score) + tf.reduce_sum(trans_score) total_path_score, _, _ = Forward(tag_scores, transitions, nums_tags, lengths, batch_size)() return -(target_path_score - total_path_score)
def decode_graph(self): self.decode_holders = [] self.scores = [] for bucket in self.buckets_char: decode_holders = [] scores = [] for nt in self.nums_tags: ob = tf.placeholder(tf.float32, [None, bucket, nt]) trans = tf.placeholder(tf.float32, [nt + 1, nt + 1]) nums_steps = ob.get_shape().as_list()[1] length = tf.placeholder(tf.int32, [None]) b_size = tf.placeholder(tf.int32, []) small = -1000 class_pad = tf.pack(small * tf.ones([b_size, nums_steps, 1])) observations = tf.concat(2, [ob, class_pad]) b_vec = tf.tile(([small] * nt + [0]), [b_size]) b_vec = tf.cast(b_vec, tf.float32) b_vec = tf.reshape(b_vec, [b_size, 1, -1]) e_vec = tf.tile(([0] + [small] * nt), [b_size]) e_vec = tf.cast(e_vec, tf.float32) e_vec = tf.reshape(e_vec, [b_size, 1, -1]) observations = tf.concat(1, [b_vec, observations, e_vec]) transitions = tf.reshape(tf.tile(trans, [b_size, 1]), [b_size, nt + 1, nt + 1]) observations = tf.reshape(observations, [-1, nums_steps + 2, nt + 1, 1]) observations = tf.transpose(observations, [1, 0, 2, 3]) previous = observations[0, :, :, :] max_scores = [] max_scores_pre = [] alphas = [previous] for t in xrange(1, nums_steps + 2): previous = tf.reshape(previous, [-1, nt + 1, 1]) current = tf.reshape(observations[t, :, :, :], [-1, 1, nt + 1]) alpha_t = previous + current + transitions max_scores.append( tf.reduce_max(alpha_t, reduction_indices=1)) max_scores_pre.append(tf.argmax(alpha_t, dimension=1)) alpha_t = tf.reshape(Forward.log_sum_exp(alpha_t, axis=1), [-1, nt + 1, 1]) alphas.append(alpha_t) previous = alpha_t max_scores = tf.pack(max_scores, axis=1) max_scores_pre = tf.pack(max_scores_pre, axis=1) decode_holders.append([ob, trans, length, b_size]) scores.append((max_scores, max_scores_pre)) self.decode_holders.append(decode_holders) self.scores.append(scores)
def crf_loss(y, y_, ly, ly_, transitions, nums_tags, batch_size): """ 计算 CRF 损失函数值 :param y: 预测值,shape = (batch_size, 句子长度,标签数量),即每个句子中各个字符对应的标签概率 :param y_: ground truth,shape=(batch_size, 句子长度) :param transitions: 标签转移矩阵, shape=(标签数量+1, 标签数量+1) :param nums_tags: 标签数量 :param batch_size: real batch size :return: """ tag_scores = y # 句子长度,即解码步数 nums_steps = len(tf.unstack(tag_scores, axis=1)) # shape = (batch_size, 句子长度) masks = tf.cast(tf.sign(y_), dtype=tf.float32) lengths = tf.reduce_sum(tf.sign(y_), axis=1) tag_ids = y_ # shape = (batch_size, 1),实际上就是将 a list of arrays/tensors 变成一个 tensor b_id = tf.stack([[nums_tags]] * batch_size) # e_id = tf.pack([[0]] * batch_size) # shape=(batch_size, 句子长度+1),因为 tag_ids.shape=(batch_size, 句子长度), b_id.shape=(batch_size, 1) padded_tag_ids = tf.concat(axis=1, values=[b_id, tag_ids]) # tf.slice() 的作用是将输入标签序列中的每个标签两两成对切开,表示的就是标签间的转移关系 # 每一个 slice 得到的是 shape=(batch_size, 2) 的 tensor,总共有 nums_steps 个,stack 到一个 tensor 里得到 # shape = (batch_size, 句子长度,2) idx_tag_ids = tf.stack( [tf.slice(padded_tag_ids, [0, i], [-1, 2]) for i in range(nums_steps)], axis=1) tag_ids = tf.contrib.layers.one_hot_encoding(tag_ids, nums_tags) point_score = tf.reduce_sum(tag_scores * tag_ids, axis=2) point_score *= masks # Save for future # trans_score = tf.gather_nd(transitions, idx_tag_ids) trans_sh = tf.stack(transitions.get_shape()) trans_sh = tf.cumprod(trans_sh, exclusive=True, reverse=True) flat_tag_ids = tf.reduce_sum(trans_sh * idx_tag_ids, axis=2) trans_score = tf.gather(tf.reshape(transitions, [-1]), flat_tag_ids) # extend_mask = tf.concat(1, [tf.ones([batch_size, 1]), masks]) extend_mask = masks trans_score *= extend_mask target_path_score = tf.reduce_sum(point_score) + tf.reduce_sum(trans_score) total_path_score, _, _ = Forward(tag_scores, transitions, nums_tags, lengths, batch_size)() tagging_loss = -(target_path_score - total_path_score) lm_loss = tf.reduce_sum(sparse_cross_entropy(ly, ly_) * masks) #return tagging_loss, tf.zeros_like(lm_loss) return tagging_loss, lm_loss
def crf_loss(y, y_, transitions, nums_tags, batch_size): tag_scores = y nums_steps = len(tf.unstack(tag_scores, axis=1)) masks = tf.cast(tf.sign(y_), dtype=tf.float32) lengths = tf.reduce_sum(tf.sign(y_), axis=1) tag_ids = y_ b_id = tf.stack([[nums_tags]] * batch_size) #e_id = tf.pack([[0]] * batch_size) padded_tag_ids = tf.concat(axis=1, values=[b_id, tag_ids]) idx_tag_ids = tf.stack([tf.slice(padded_tag_ids, [0, i], [-1, 2]) for i in range(nums_steps)], axis=1) tag_ids = tf.contrib.layers.one_hot_encoding(tag_ids, nums_tags) point_score = tf.reduce_sum(tag_scores * tag_ids, axis=2) point_score *= masks trans_score = tf.gather_nd(transitions, idx_tag_ids) extend_mask = masks trans_score *= extend_mask target_path_score = tf.reduce_sum(point_score) + tf.reduce_sum(trans_score) total_path_score, _, _ = Forward(tag_scores, transitions, nums_tags, lengths, batch_size)() return - (target_path_score - total_path_score)