def focal_loss(logits, labels, mask, lambda_param=1.5): probs = K.softmax(logits, axis=-1) pos_probs = probs[:, :, 1] prob_label_pos = tf.where(K.equal(labels, 1), pos_probs, K.ones_like(pos_probs)) prob_label_neg = tf.where(K.equal(labels, 0), pos_probs, K.zeros_like(pos_probs)) loss = K.pow(1. - prob_label_pos, lambda_param) * K.log(prob_label_pos + 1e-7) + \ K.pow(prob_label_neg, lambda_param) * K.log(1. - prob_label_neg + 1e-7) loss = -loss * K.cast(mask, 'float32') loss = K.sum(loss, axis=-1, keepdims=True) loss = K.mean(loss) return loss
def focal_loss(logits, labels, mask, gamma=2): pos_probs = logits[:, :, 1] prob_label_pos = tf.where(K.equal(labels, 1), pos_probs, tf.ones_like(pos_probs)) prob_label_neg = tf.where(K.equal(labels, 0), pos_probs, tf.zeros_like(pos_probs)) loss = tf.pow(1. - prob_label_pos, gamma) * tf.log(prob_label_pos + 1e-7) + \ tf.pow(prob_label_neg, gamma) * tf.log(1. - prob_label_neg + 1e-7) """ loss = -loss * K.cast(mask, tf.float32) loss = tf.reduce_sum(loss, axis=-1, keepdims=True) loss = tf.reduce_mean(loss) """ loss = K.sum(-loss * mask) / K.sum(mask) return loss
def sparse_accuracy(y_true, y_pred): # y_true需要重新明确一下shape和dtype y_true = K.reshape(y_true, K.shape(y_pred)[:-1]) y_true = K.cast(y_true, 'int32') # 计算准确率 y_pred = K.cast(K.argmax(y_pred, axis=2), 'int32') return K.mean(K.cast(K.equal(y_true, y_pred), K.floatx()))
def get_updates(self, loss, params): # 更新判据 cond = K.equal(self.iterations % self.grad_accum_steps, 0) # 获取梯度 grads = self.get_gradients(loss, params) self.accum_grads = [ K.zeros(K.int_shape(p), dtype=K.dtype(p), name='accum_grad_%s' % i) for i, p in enumerate(params) ] old_update = K.update def new_update(x, new_x): new_x = K.switch(cond, new_x, x) return old_update(x, new_x) K.update = new_update updates = super(new_optimizer, self).get_updates(loss, params) K.update = old_update # 累积梯度 with tf.control_dependencies(updates): accum_updates = [ K.update(ag, K.switch(cond, g, ag + g)) for g, ag in zip(grads, self.accum_grads) ] return accum_updates
def _resource_apply_op(self, grad, var, indices=None): # 更新判据 cond = K.equal(self.iterations % self.grad_accum_steps, 0) # 获取梯度 ag = self.get_slot(var, 'ag') old_update = K.update def new_update(x, new_x): new_x = K.switch(cond, new_x, x) return old_update(x, new_x) K.update = new_update ag_t = ag / self.grad_accum_steps op = super(new_optimizer, self)._resource_apply_op(ag_t, var) K.update = old_update # 累积梯度 with tf.control_dependencies([op]): ag_t = K.switch(cond, K.zeros_like(ag), ag) with tf.control_dependencies([K.update(ag, ag_t)]): if indices is None: ag_t = K.update(ag, ag + grad) else: ag_t = self._resource_scatter_add(ag, indices, grad) return ag_t
def basic_accuracy(self, y_true, y_pred, go_backwards=False): """训练过程中显示逐帧准确率的函数,排除了mask的影响 此处y_true需要是整数形式(非one hot) """ # 导出mask并转换数据类型 mask = K.all(K.greater(y_pred, -1e6), axis=2) mask = K.cast(mask, K.floatx()) # y_true需要重新明确一下shape和dtype y_true = K.reshape(y_true, K.shape(y_pred)[:-1]) y_true = K.cast(y_true, 'int32') # 反转相关 if self.hidden_dim is None: if go_backwards: # 是否反转序列 y_true, y_pred = self.reverse_sequence([y_true, y_pred], mask) trans = K.transpose(self.trans) else: trans = self.trans histoty = K.gather(trans, y_true) else: if go_backwards: # 是否反转序列 y_true, y_pred = self.reverse_sequence([y_true, y_pred], mask) r_trans, l_trans = self.l_trans, self.r_trans else: l_trans, r_trans = self.l_trans, self.r_trans histoty = K.gather(l_trans, y_true) histoty = tf.einsum('bnd,kd->bnk', histoty, r_trans) # 计算逐标签accuracy histoty = K.concatenate([y_pred[:, :1], histoty[:, :-1]], 1) y_pred = (y_pred + histoty) / 2 y_pred = K.cast(K.argmax(y_pred, 2), 'int32') isequal = K.cast(K.equal(y_true, y_pred), K.floatx()) return K.sum(isequal * mask) / K.sum(mask)
def get_labels_of_similarity(self, y_pred): idxs = K.arange(0, K.shape(y_pred)[0]) idxs_1 = idxs[None, :] idxs_2 = (idxs + 1 - idxs % 2 * 2)[:, None] labels = K.equal(idxs_1, idxs_2) labels = K.cast(labels, K.floatx()) return labels
def get_updates(self, loss, params): # 更新判据 cond = K.equal(self.iterations % self.grad_accum_steps, 0) cond = K.cast(cond, K.floatx()) old_update = K.update def new_update(x, new_x): new_x = cond * new_x + (1 - cond) * x return old_update(x, new_x) K.update = new_update updates = super(NewOptimizer, self).get_updates(loss, params) K.update = old_update # 获取梯度 grads = super(NewOptimizer, self).get_gradients(loss, params) accum_grads = [self.accum_grads[p] for p in params] # 累积梯度 with tf.control_dependencies(updates): accum_updates = [ K.update(ag, g + (1 - cond) * ag) for g, ag in zip(grads, accum_grads) ] return accum_updates
def get_updates(self, loss, params): # 更新判据 cond = K.equal(self.iterations % self.grad_accum_steps, 0) cond = K.cast(cond, K.floatx()) # 获取梯度 grads = self.get_gradients(loss, params) self.accum_grads = [ K.zeros( K.int_shape(p), dtype=K.dtype(p), name='accum_grad_%s' % i ) for i, p in enumerate(params) ] old_update = K.update def new_update(x, new_x): new_x = cond * new_x + (1 - cond) * x return old_update(x, new_x) K.update = new_update updates = super(NewOptimizer, self).get_updates(loss, params) K.update = old_update # 累积梯度 with tf.control_dependencies(updates): accum_updates = [ K.update(ag, g + (1 - cond) * ag) for g, ag in zip(grads, self.accum_grads) ] return accum_updates
def get_labels_of_similarity(self, y_pred): idxs = K.arange(0, K.shape(y_pred)[0]) # value=[0, ..., batch-1] idxs_1 = idxs[None, :] # shape=(1, batch) idxs_2 = (idxs + 1 - idxs % 2 * 2)[:, None] # shape=(batch, 1) labels = K.equal(idxs_1, idxs_2) # eg: batch=2 [[False, True], [True, False]] labels = K.cast(labels, K.floatx()) return labels
def __init__(self, optimizer, steps_per_update=1, **kwargs): super(GradientAccumulation, self).__init__(optimizer, **kwargs) self.steps_per_update = steps_per_update # 判断是否要更新的标记 self.cond = K.equal(self.iterations % self.steps_per_update, 0) # 用学习率来决定是否更新,不更新即学习率为0 self.learning_rate = K.switch(self.cond, self.learning_rate, 0.) # 滑动平均量在非更新期内不要动 for attr in ['momentum', 'rho', 'beta_1', 'beta_2']: if hasattr(self, attr): value = K.switch(self.cond, getattr(self, attr), 1. - 1e-7) setattr(self, attr, value)
def sparse_accuracy(self, y_true, y_pred): """训练过程中显示逐帧准确率的函数,排除了mask的影响 此处y_true需要是整数形式(非one hot) """ # 导出mask并转换数据类型 mask = K.all(K.greater(y_pred, -1e6), axis=2) mask = K.cast(mask, K.floatx()) # y_true需要重新明确一下shape和dtype y_true = K.reshape(y_true, K.shape(y_pred)[:-1]) y_true = K.cast(y_true, 'int32') # 逐标签取最大来粗略评测训练效果 y_pred = K.cast(K.argmax(y_pred, 2), 'int32') isequal = K.cast(K.equal(y_true, y_pred), K.floatx()) return K.sum(isequal * mask) / K.sum(mask)
def _resource_apply(self, grad, var, indices=None): op = super(NewOptimizer, self)._resource_apply(grad, var, indices) k, alpha = self.steps_per_slow_update, self.slow_step_size cond = K.equal(self.iterations % k, 0) slow_var = self.get_slot(var, 'slow_var') slow_var_t = slow_var + alpha * (var - slow_var) with tf.control_dependencies([op]): slow_update = K.update(slow_var, K.switch(cond, slow_var_t, slow_var)) with tf.control_dependencies([slow_update]): copy_update = K.update(var, K.switch(cond, slow_var, var)) return copy_update
def sparse_accuracy(self, y_true, y_pred): """训练过程中显示逐帧准确率的函数,排除了mask的影响 此处y_true需要是整数形式(非one hot) """ mask = self.output_mask # y_true需要重新明确一下dtype和shape y_true = K.cast(y_true, 'int32') y_true = K.reshape(y_true, [K.shape(y_true)[0], -1]) # 逐标签取最大来粗略评测训练效果 y_pred = K.cast(K.argmax(y_pred, 2), 'int32') isequal = K.cast(K.equal(y_true, y_pred), K.floatx()) if mask is None: return K.mean(isequal) else: return K.sum(isequal * mask) / K.sum(mask)
def sparse_accuracy(self, y_true, y_pred): """训练过程中显示逐帧准确率的函数,排除了mask的影响 此处y_true需要是整数形式(非one hot) """ # 导出mask并转换数据类型 if self.input_mask is None: mask = None else: mask = K.cast(self.input_mask, K.floatx()) # y_true需要重新明确一下shape和dtype y_true = K.reshape(y_true, K.shape(y_pred)[:-1]) y_true = K.cast(y_true, 'int32') # 逐标签取最大来粗略评测训练效果 y_pred = K.cast(K.argmax(y_pred, 2), 'int32') isequal = K.cast(K.equal(y_true, y_pred), K.floatx()) if mask is None: return K.mean(isequal) else: return K.sum(isequal * mask) / K.sum(mask)
def get_labels_of_similarity(self, y_pred): idxs = K.arange(0, K.shape(y_pred)[0]) # 0到btz (btz,) idxs_1 = idxs[None, :] # (1, btz) idxs_2 = (idxs + 1 - idxs % 2 * 2)[:, None] # (?,1) labels = K.equal( idxs_1, idxs_2) # (btz, btz) 左右摇,相邻的两个btz是代表着相似的(generator中设置了前后颠倒) ''' 所以btz中,[0]是在第二个位置为True,[1]是在第一个位置为True, [2]是在第四个位置为True,[3]是在第三个位置为True。。。 ''' labels = K.cast(labels, K.floatx()) # 从true和false转成0 1 ''' [ [0, 1, 0, 0, 0, 0], [1, 0, 0, 0, 0, 0], ... [0, 0, 0, 0, 0, 1], [0, 0, 0, 0, 1, 0]] ''' return labels # (btz, btz)
def get_updates(self, loss, params): updates = super(new_optimizer, self).get_updates(loss, params) k, alpha = self.steps_per_slow_update, self.slow_step_size cond = K.equal(self.iterations % k, 0) slow_vars = [ K.zeros(K.int_shape(p), dtype=K.dtype(p), name='slow_var_%s' % i) for i, p in enumerate(params) ] with tf.control_dependencies(updates): slow_updates = [ K.update(q, K.switch(cond, q + alpha * (p - q), q)) for p, q in zip(params, slow_vars) ] with tf.control_dependencies(slow_updates): copy_updates = [ K.update(p, K.switch(cond, q, p)) for p, q in zip(params, slow_vars) ] return copy_updates
def basic_accuracy(self, y_true, y_pred, go_backwards=False): """训练过程中显示逐帧准确率的函数,排除了mask的影响 此处y_true需要是整数形式(非one hot) """ mask = self.output_mask # y_true需要重新明确一下dtype和shape y_true = K.cast(y_true, 'int32') y_true = K.reshape(y_true, [K.shape(y_true)[0], -1]) # 是否反转序列 if go_backwards: y_true, y_pred = self.reverse_sequence([y_true, y_pred], mask) trans = K.transpose(self.trans) else: trans = self.trans # 计算逐标签accuracy histoty = K.gather(trans, y_true) histoty = K.concatenate([y_pred[:, :1], histoty[:, :-1]], 1) y_pred = (y_pred + histoty) / 2 y_pred = K.cast(K.argmax(y_pred, 2), 'int32') isequal = K.cast(K.equal(y_true, y_pred), K.floatx()) if mask is None: return K.mean(isequal) else: return K.sum(isequal * mask) / K.sum(mask)