def L_op(self, inputs, outputs, out_grads): x, k = inputs k_grad = grad_undefined(self, 1, k, "topk: k is not differentiable") if not (self.return_indices or self.return_values): x_grad = grad_undefined( self, 0, x, "topk: cannot get gradient" " without both indices and values", ) else: x_shp = theano.tensor.shape(x) z_grad = out_grads[0] ndim = x.ndim axis = self.axis % ndim grad_indices = [ arange(x_shp[i]).dimshuffle([0] + ["x"] * (ndim - i - 1)) if i != axis else outputs[-1] for i in range(ndim) ] x_grad = x.zeros_like(dtype=z_grad.dtype) x_grad = set_subtensor(x_grad[tuple(grad_indices)], z_grad) return [x_grad, k_grad]
def L_op(self, inputs, outputs, out_grads): x, k = inputs k_grad = grad_undefined(self, 1, k, 'topk: k is not differentiable') if not (self.return_indices or self.return_values): x_grad = grad_undefined( self, 0, x, 'topk: cannot get gradient' ' without both indices and values') else: x_shp = theano.tensor.shape(x) z_grad = out_grads[0] ndim = x.ndim axis = self.axis % ndim grad_indices = [ arange(x_shp[i]).dimshuffle([0] + ['x'] * (ndim - i - 1)) if i != axis else outputs[-1] for i in range(ndim)] x_grad = x.zeros_like(dtype=z_grad.dtype) x_grad = set_subtensor(x_grad[tuple(grad_indices)], z_grad) return [x_grad, k_grad]