def backward_impl(self, inputs, outputs, prop_down, accum): # inputs: [inputs_fwd_graph] + [inputs_bwd_graph] or # [inputs_fwd_graph] + [outputs_fwd_graph] + [inputs_bwd_graph] # Args axes = self.forward_func.info.args["axes"] # Inputs x0 = inputs[0].data dy = inputs[1].data # Outputs dx0 = outputs[0].data # Grads of inputs g_x0 = inputs[0].grad g_dy = inputs[1].grad # Grads of outputs g_dx0 = outputs[0].grad # Compute # TODO: Optimize by creating max_pooling with indeces if prop_down[1]: # dx0 is not accumulated on the backward graph mask = F.not_equal_scalar(dx0, 0.0) if accum[1]: g_dy_ = F.sum(g_dx0 * mask, axes) g_dy += g_dy_ else: g_dy_ = F.sum(g_dx0 * mask, axes) g_dy.copy_from(g_dy_)
def backward_impl(self, inputs, outputs, prop_down, accum): # inputs: [inputs_fwd_graph] + [inputs_bwd_graph] or # [inputs_fwd_graph] + [outputs_fwd_graph] + [inputs_bwd_graph] # Args p = self.forward_func.info.args["p"] # Inputs x0 = inputs[0].data dy = inputs[1].data # Outputs dx0 = outputs[0].data # Grads of inputs g_x0 = inputs[0].grad g_dy = inputs[1].grad # Grads of outputs g_dx0 = outputs[0].grad # Computation if prop_down[1]: # TODO: Optimize by creating dropout with mask # dx0 is not accumulated in the backward graph mask = F.not_equal_scalar(dx0, 0.0) if accum[1]: g_dy += g_dx0 * mask / (1.0 - p) else: g_dy.copy_from(g_dx0 * mask / (1.0 - p))
def dropout_backward(inputs, p=0.5, seed=-1): """ Args: inputs (list of nn.Variable): Incomming grads/inputs to/of the forward function. kwargs (dict of arguments): Dictionary of the corresponding function arguments. Return: list of Variable: Return the gradients wrt inputs of the corresponding function. """ dy = inputs[0] x0 = inputs[1] y0 = get_output(x0, "Dropout") m0 = F.not_equal_scalar(y0, 0) m0 = no_grad(m0) dx0 = dy * m0 / (1 - p) return dx0
def sinc_backward(inputs): """ Args: inputs (list of nn.Variable): Incomming grads/inputs to/of the forward function. kwargs (dict of arguments): Dictionary of the corresponding function arguments. Return: list of Variable: Return the gradients wrt inputs of the corresponding function. """ dy = inputs[0] x0 = inputs[1] m0 = F.not_equal_scalar(x0, 0) m0 = no_grad(m0) y0 = get_output(x0, "Sinc") dx0 = dy * (F.cos(x0) - y0) / x0 c0 = F.constant(0, x0.shape) dx0 = F.where(m0, dx0, c0) return dx0