def huber_loss_backward(inputs, delta=1.0): """ Args: inputs (list of nn.Variable): Incomming grads/inputs to/of the forward function. kwargs (dict of arguments): Dictionary of the corresponding function arguments. Return: list of Variable: Return the gradients wrt inputs of the corresponding function. """ dy = inputs[0] x0 = inputs[1] x1 = inputs[2] d = x0 - x1 m0 = F.less_scalar(F.abs(d), delta) m1 = 1 - m0 mg = F.greater(x0, x1) ml = 1 - mg m0 = no_grad(m0) m1 = no_grad(m1) mg = no_grad(mg) ml = no_grad(ml) t0 = 2 * d * m0 t1 = 2 * delta * m1 * mg t2 = -2 * delta * m1 * ml dx0 = dy * (t0 + t1 + t2) dx1 = -dx0 return dx0, dx1
def backward_impl(self, inputs, outputs, prop_down, accum): # inputs: [inputs_fwd_graph] + [inputs_bwd_graph] or # [inputs_fwd_graph] + [outputs_fwd_graph] + [inputs_bwd_graph] # Inputs x0 = inputs[0].data x1 = inputs[1].data dy = inputs[2].data # Outputs dx0 = outputs[0].data dx1 = outputs[1].data # Grads of inputs g_x0 = inputs[0].grad g_x1 = inputs[1].grad g_dy = inputs[2].grad # Grads of outputs g_dx0 = outputs[0].grad g_dx1 = outputs[1].grad # Computation if prop_down[2]: mask = F.greater(x0, x1) if accum[2]: g_dy += g_dx0 * mask + g_dx1 * (1.0 - mask) else: g_dy.copy_from(g_dx0 * mask + g_dx1 * (1.0 - mask))
def epsilon_insensitive_loss_backward(inputs, epsilon): """ Args: inputs (list of nn.Variable): Incomming grads/inputs to/of the forward function. kwargs (dict of arguments): Dictionary of the corresponding function arguments. Return: list of Variable: Return the gradients wrt inputs of the corresponding function. """ dy = inputs[0] x0 = inputs[1] x1 = inputs[2] d = x0 - x1 m0 = F.greater_scalar(F.abs(d), epsilon) m1 = 1 - m0 mg = F.greater(x0, x1) ml = 1 - mg m0 = no_grad(m0) mg = no_grad(mg) ml = no_grad(ml) t0 = m0 * mg t1 = -m0 * ml dx0 = dy * (t0 + t1) dx1 = -dx0 return dx0, dx1
def maximum2_backward(inputs): """ Args: inputs (list of nn.Variable): Incomming grads/inputs to/of the forward function. kwargs (dict of arguments): Dictionary of the corresponding function arguments. Return: list of Variable: Return the gradients wrt inputs of the corresponding function. """ dy = inputs[0] x0 = inputs[1] x1 = inputs[2] m0 = F.greater(x0, x1) m1 = 1 - m0 m0 = no_grad(m0) m1 = no_grad(m1) dx0 = dy * m0 dx1 = dy * m1 return dx0, dx1
def augment(batch, aug_list, p_aug=1.0): if isinstance(p_aug, float): p_aug = nn.Variable.from_numpy_array(p_aug * np.ones((1,))) if "flip" in aug_list: rnd = F.rand(shape=[batch.shape[0], ]) batch_aug = F.random_flip(batch, axes=(2, 3)) batch = F.where( F.greater(F.tile(p_aug, batch.shape[0]), rnd), batch_aug, batch) if "lrflip" in aug_list: rnd = F.rand(shape=[batch.shape[0], ]) batch_aug = F.random_flip(batch, axes=(3,)) batch = F.where( F.greater(F.tile(p_aug, batch.shape[0]), rnd), batch_aug, batch) if "translation" in aug_list and batch.shape[2] >= 8: rnd = F.rand(shape=[batch.shape[0], ]) # Currently nnabla does not support random_shift with border_mode="noise" mask = np.ones((1, 3, batch.shape[2], batch.shape[3])) mask[:, :, :, 0] = 0 mask[:, :, :, -1] = 0 mask[:, :, 0, :] = 0 mask[:, :, -1, :] = 0 batch_int = F.concatenate( batch, nn.Variable().from_numpy_array(mask), axis=0) batch_int_aug = F.random_shift(batch_int, shifts=( batch.shape[2]//8, batch.shape[3]//8), border_mode="nearest") batch_aug = F.slice(batch_int_aug, start=( 0, 0, 0, 0), stop=batch.shape) mask_var = F.slice(batch_int_aug, start=( batch.shape[0], 0, 0, 0), stop=batch_int_aug.shape) batch_aug = batch_aug * F.broadcast(mask_var, batch_aug.shape) batch = F.where( F.greater(F.tile(p_aug, batch.shape[0]), rnd), batch_aug, batch) if "color" in aug_list: rnd = F.rand(shape=[batch.shape[0], ]) rnd_contrast = 1.0 + 0.5 * \ (2.0 * F.rand(shape=[batch.shape[0], 1, 1, 1] ) - 1.0) # from 0.5 to 1.5 rnd_brightness = 0.5 * \ (2.0 * F.rand(shape=[batch.shape[0], 1, 1, 1] ) - 1.0) # from -0.5 to 0.5 rnd_saturation = 2.0 * \ F.rand(shape=[batch.shape[0], 1, 1, 1]) # from 0.0 to 2.0 # Brightness batch_aug = batch + rnd_brightness # Saturation mean_s = F.mean(batch_aug, axis=1, keepdims=True) batch_aug = rnd_saturation * (batch_aug - mean_s) + mean_s # Contrast mean_c = F.mean(batch_aug, axis=(1, 2, 3), keepdims=True) batch_aug = rnd_contrast * (batch_aug - mean_c) + mean_c batch = F.where( F.greater(F.tile(p_aug, batch.shape[0]), rnd), batch_aug, batch) if "cutout" in aug_list and batch.shape[2] >= 16: batch = F.random_erase(batch, prob=p_aug.d[0], replacements=(0.0, 0.0)) return batch