def grad(self, inputs, output_grads): a, axis = inputs inp_grad = theano.gradient.grad_not_implemented( self, 0, axis, "Currently, we only implement the gradient on sort for vector" " and matrix (and axis is None)") if a.ndim == 1: idx = argsort(*inputs, kind=self.kind, order=self.order) # rev_idx = numpy.where(idx[None, :]==numpy.arange(5)[:,None])[1] rev_idx = theano.tensor.eq(idx[None, :], arange(a.shape[0])[:, None]).nonzero()[1] inp_grad = output_grads[0][rev_idx] elif a.ndim == 2: if (axis is None or (isinstance(axis, theano.Constant) and axis.data is None)): idx = argsort(*inputs, kind=self.kind, order=self.order) rev_idx = theano.tensor.eq(idx[None, :], arange(a.shape[0]*a.shape[1])[:, None]).nonzero()[1] inp_grad = output_grads[0][rev_idx].reshape(a.shape) elif (axis == 0 or (isinstance(axis, theano.Constant) and axis.data == 0)): idx = argsort(*inputs, kind=self.kind, order=self.order) # not working: numpy.where(idx[None, :]==numpy.arange(2)[:, None, None]) pass axis_grad = theano.gradient.grad_undefined( self, 1, axis, "sort is not defined for non-integer axes so" " sort(x, axis+eps) is undefined") return [inp_grad, axis_grad]
def __get_expanded_dim(self, a, axis, i): index_shape = [1] * a.ndim index_shape[i] = a.shape[i] # it's a way to emulate # numpy.ogrid[0: a.shape[0], 0: a.shape[1], 0: a.shape[2]] index_val = arange(a.shape[i]).reshape(index_shape) return index_val
def L_op(self, inputs, outputs, out_grads): x, k = inputs k_grad = grad_undefined(self, 1, k, "topk: k is not differentiable") if not (self.return_indices or self.return_values): x_grad = grad_undefined( self, 0, x, "topk: cannot get gradient" " without both indices and values", ) else: x_shp = theano.tensor.shape(x) z_grad = out_grads[0] ndim = x.ndim axis = self.axis % ndim grad_indices = [ arange(x_shp[i]).dimshuffle([0] + ["x"] * (ndim - i - 1)) if i != axis else outputs[-1] for i in range(ndim) ] x_grad = x.zeros_like(dtype=z_grad.dtype) x_grad = set_subtensor(x_grad[tuple(grad_indices)], z_grad) return [x_grad, k_grad]
def L_op(self, inputs, outputs, out_grads): x, k = inputs k_grad = grad_undefined(self, 1, k, 'topk: k is not differentiable') if not (self.return_indices or self.return_values): x_grad = grad_undefined( self, 0, x, 'topk: cannot get gradient' ' without both indices and values') else: x_shp = theano.tensor.shape(x) z_grad = out_grads[0] ndim = x.ndim axis = self.axis % ndim grad_indices = [ arange(x_shp[i]).dimshuffle([0] + ['x'] * (ndim - i - 1)) if i != axis else outputs[-1] for i in range(ndim)] x_grad = x.zeros_like(dtype=z_grad.dtype) x_grad = set_subtensor(x_grad[tuple(grad_indices)], z_grad) return [x_grad, k_grad]