def get_updates(self, loss, params): # 是否更新 cond = K.equal(self.iterations % self.grad_accum_steps, 0) cond = K.cast(cond, K.floatx()) # 获取梯度 grads = self.get_gradients(loss, params) self.accum_grads = [ K.zeros(shape=K.int_shape(p), dtype=K.dtype(p), name='accum_grad_{}'.format(i)) for i, p in enumerate(params) ] old_update = K.update def new_update(x, new_x): new_x = cond * new_x + (1 - cond) * x return old_update(x, new_x) K.update = new_update updates = super(NewOptimizer, self).get_updates(loss, params) K.update = old_update # 累计更新 with K.control_dependencies(updates): acc_updates = [ K.update(ag, g + (1 - cond) * ag) for ag, g in zip(self.accum_grads, grads) ] return acc_updates
def get_labels_of_similarity(self, y_pred): idxs = K.arange(0, K.shape(y_pred)[0]) idxs_1 = idxs[None, :] idxs_2 = (idxs + 1 - idxs % 2 * 2)[:, None] labels = K.equal(idxs_1, idxs_2) labels = K.cast(labels, K.floatx()) return labels
def sparse_accuracy(y_true, y_pred): # y_true需要重新明确一下shape和dtype y_true = K.reshape(y_true, K.shape(y_pred)[:-1]) y_true = K.cast(y_true, 'int32') # 计算准确率 y_pred = K.cast(K.argmax(y_pred, axis=2), 'int32') return K.mean(K.cast(K.equal(y_true, y_pred), K.floatx()))
def get_labels_of_similarity(self, inputs): idx = K.arange(0, K.shape(inputs)[0]) idx_1 = idx[None, :] idx_2 = (idx + 1 - idx % 2 * 2)[:, None] labels = K.equal(idx_1, idx_2) labels = K.cast(labels, K.floatx()) return labels
def get_label_mask(self, y_true): """获取batch内相同label样本""" label = K.cast(y_true, 'int32') label_2 = K.reshape(label, (1, -1)) mask = K.equal(label_2, label) mask = K.cast(mask, K.floatx()) mask = mask * (1 - K.eye(K.shape(y_true)[0])) # 排除对角线,即 i == j return mask
def sparse_accuracy(self, y_true, y_pred): """训练过程中显示逐帧准确率的函数,排除了mask的影响 此处y_true需要是整数形式(非one hot) """ # 导出mask并转换数据类型 mask = K.all(K.greater(y_pred, -1e6), axis=2) mask = K.cast(mask, K.floatx()) # y_true需要重新明确一下shape和dtype y_true = K.reshape(y_true, K.shape(y_pred)[:-1]) y_true = K.cast(y_true, 'int32') # 逐标签取最大来粗略评测训练效果 y_pred = K.cast(K.argmax(y_pred, 2), 'int32') isequal = K.cast(K.equal(y_true, y_pred), K.floatx()) return K.sum(isequal * mask) / K.sum(mask)
def _resource_apply(self, grad, var, indices=None): """interation % acc_steps==0 then update else accumulate 思路是先判断是否累计了 acc_steps,如果没有,则update时保持原样, 并累计当前梯度,否则,更新梯度并将累计的梯度置零 """ # 是否更新 cond = K.equal(self.iterations % self.grad_accum_steps, 0) # 获取梯度累计量 gradient_accumulation = self.get_slot(var, 'gradient_accumulation') # 获取平均梯度 gradient_t = gradient_accumulation / self.grad_accum_steps old_update = K.update # 根据条件判断是否真的更新 def new_update(x, new_x): new_x = K.switch(cond, new_x, x) return old_update(x, new_x) K.update = new_update op = super(NewOptimizer, self)._resource_apply(gradient_t, var) K.update = old_update # 根据条件判断是否需要置零 with tf.control_dependencies([op]): gradient_t = K.switch(cond, K.zeros_like(gradient_accumulation), gradient_accumulation) with tf.control_dependencies( [K.update(gradient_accumulation, gradient_t)]): if indices is None: K.update(gradient_accumulation, gradient_accumulation + grad) else: self._resource_scatter_add(gradient_accumulation, indices, grad) return gradient_t
def compute_classification_acc(self, inputs, mask=None): _, _, y_pred, _, y_true = inputs equal = K.equal(K.cast(K.argmax(y_pred, axis=-1), 'int32'), K.cast(y_true, 'int32')) return K.cast(equal, K.floatx()) / K.cast( K.shape(y_true)[0], K.floatx())