def compute_loss(self, inputs, mask=None): pred, ytrue = inputs acc = keras.metrics.sparse_categorical_accuracy(ytrue, pred) self.add_metric(acc, name='clf_acc') ytrue = K.cast(ytrue, 'int32') ytrue = K.one_hot(ytrue, num_classes=num_classes) ytrue = K.reshape(ytrue, (-1, num_classes)) loss = ytrue * K.log(pred + K.epsilon()) + (1 - ytrue) * K.log(1 - pred + K.epsilon()) loss = -K.mean(loss) loss = loss * self.alpha self.add_metric(loss, name='clf_loss') return loss
def compute_position_ids(self, inputs): """T5的相对位置分桶(直接翻译自官方T5源码) i-i: 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14... f(i-j):0 1 2 3 4 5 6 7 8 8 8 8 9 9 9 ... """ q, v = inputs # 计算位置差 q_idxs = K.arange(0, K.shape(q)[1], dtype='int32') q_idxs = K.expand_dims(q_idxs, 1) v_idxs = K.arange(0, K.shape(v)[1], dtype='int32') v_idxs = K.expand_dims(v_idxs, 0) pos_ids = v_idxs - q_idxs # 后处理操作 num_buckets, max_distance = self.input_dim, self.max_distance ret = 0 n = -pos_ids if self.bidirectional: num_buckets //= 2 ret += K.cast(K.less(n, 0), 'int32') * num_buckets n = K.abs(n) else: n = K.maximum(n, 0) # now n is in the range [0, inf) max_exact = num_buckets // 2 is_small = K.less(n, max_exact) val_if_large = max_exact + K.cast( K.log(K.cast(n, K.floatx()) / max_exact) / np.log(max_distance / max_exact) * (num_buckets - max_exact), 'int32', ) val_if_large = K.minimum(val_if_large, num_buckets - 1) ret += K.switch(is_small, n, val_if_large) return ret
def compute_loss_of_scl(self, inputs, mask=None): y_pred, y_true = inputs label_mask = self.get_label_mask(y_true) y_pred = K.l2_normalize(y_pred, axis=1) # 特征向量归一化 similarities = K.dot(y_pred, K.transpose(y_pred)) # 相似矩阵 similarities = similarities - K.eye(K.shape(y_pred)[0]) * 1e12 # 排除对角线,即 i == j similarities = similarities / self.T # Temperature scale similarities = K.exp(similarities) # exp sum_similarities = K.sum(similarities, axis=-1, keepdims=True) # sum i != k scl = similarities / sum_similarities scl = K.log(scl + K.epsilon()) # sum log scl = -K.sum(scl * label_mask, axis=1, keepdims=True) / (K.sum(label_mask, axis=1, keepdims=True) + K.epsilon()) return K.mean(scl)
def normal_shannon_entropy(p, labels_num=num_classes): # normalized entropy p = K.cast(p, K.floatx()) norm = K.log(1. / labels_num) s = K.sum(p * K.log(p), axis=-1, keepdims=True) return s / norm