def generic_argkmin(formula, output, *aliases, **kwargs): r"""Alias for :class:`numpy.Genred <pykeops.numpy.Genred>` with an "ArgKMin" reduction. Args: formula (string): Scalar-valued symbolic KeOps expression, as in :class:`numpy.Genred <pykeops.numpy.Genred>`. output (string): An identifier of the form ``"AL = TYPE(K)"`` that specifies the category and dimension of the output variable. Here: - ``AL`` is a dummy alphanumerical name. - ``TYPE`` is a *category*. One of: - ``Vi``: indexation by :math:`i` along axis 0; reduction is performed along axis 1. - ``Vj``: indexation by :math:`j` along axis 1; reduction is performed along axis 0. - ``K`` is an integer, the number of values to extract. *aliases (strings): List of identifiers, as in :class:`numpy.Genred <pykeops.numpy.Genred>`. Keyword Args: dtype (string, default = ``"float64"``): Specifies the numerical **dtype** of the input and output arrays. The supported values are: - **dtype** = ``"float32"``, - **dtype** = ``"float64"``. Returns: A generic reduction that can be called on arbitrary NumPy arrays, as documented in :class:`numpy.Genred <pykeops.numpy.Genred>`. Example: Bruteforce K-nearest neighbors search in dimension 100: >>> knn = generic_argkmin( ... 'SqDist(x, y)', # Formula ... 'a = Vi(3)', # Output: 3 scalars per line ... 'x = Vi(100)', # 1st input: dim-100 vector per line ... 'y = Vj(100)') # 2nd input: dim-100 vector per line >>> x = np.random.randn(5, 100) >>> y = np.random.randn(20000, 100) >>> a = knn(x, y) >>> print(a) [[ 9054., 11653., 11614.], [13466., 11903., 14180.], [14164., 8809., 3799.], [ 2092., 3323., 18479.], [14433., 11315., 11841.]] >>> print( np.linalg.norm(x - y[ a[:,0].astype(int) ], axis=1) ) # Distance to the nearest neighbor [10.7933, 10.3235, 10.1218, 11.4919, 10.5100] >>> print( np.linalg.norm(x - y[ a[:,1].astype(int) ], axis=1) ) # Distance to the second neighbor [11.3702, 10.6550, 10.7646, 11.5676, 11.1356] >>> print( np.linalg.norm(x - y[ a[:,2].astype(int) ], axis=1) ) # Distance to the third neighbor [11.3820, 10.6725, 10.8510, 11.6071, 11.1968] """ _, cat, k, _ = get_type(output) return Genred(formula, list(aliases), reduction_op='ArgKMin', axis=cat2axis(cat), opt_arg=k, **kwargs)
def preprocess_half2(args, aliases, axis, ranges, nx, ny): N = ny if axis == 1 else nx if ranges is not None: if axis == 1: ranges = ranges2half2(ranges[0:3], ny) + ranges[3:6] else: ranges = ranges[0:3] + ranges2half2(ranges[3:6], nx) newargs = len(aliases) * [None] for (var_ind, sig) in enumerate(aliases): _, cat, dim, pos = get_type(sig, position_in_list=var_ind) arg = args[ pos].data # we don't want to record our cuisine in the Autograd mechanism ! if cat == 2: arg = arg[..., None, :] # (...,D) -> (...,1,D) arg, _ = make_even_size(arg) # (...,1,D) -> (...,2,D) elif cat == axis: arg, Narg = make_odd_cat(arg) N = max(N, Narg) else: arg, tag_dummy = make_even_size(arg) arg = half2half2(arg) if cat == 2: arg = arg.view( tuple(arg.shape[:-2]) + (2 * dim, ) ) # (...,2,D) -> (...,2*D) (we "hide" the factor 2 in the dimension...) newargs[pos] = arg return newargs, ranges, tag_dummy, N
def generic_logsumexp(formula, output, *aliases, **kwargs): r"""Alias for :class:`torch.Genred <pykeops.torch.Genred>` with a "LogSumExp" reduction. Args: formula (string): Scalar-valued symbolic KeOps expression, as in :class:`torch.Genred <pykeops.torch.Genred>`. output (string): An identifier of the form ``"AL = TYPE(1)"`` that specifies the category and dimension of the output variable. Here: - ``AL`` is a dummy alphanumerical name. - ``TYPE`` is a *category*. One of: - ``Vi``: indexation by :math:`i` along axis 0; reduction is performed along axis 1. - ``Vj``: indexation by :math:`j` along axis 1; reduction is performed along axis 0. *aliases (strings): List of identifiers, as in :class:`torch.Genred <pykeops.torch.Genred>`. Keyword Args: dtype (string, default = ``"float32"``): Specifies the numerical **dtype** of the input and output arrays. The supported values are: - **dtype** = ``"float16"`` or ``"half"``. - **dtype** = ``"float32"`` or ``"float"``. - **dtype** = ``"float64"`` or ``"double"``. Returns: A generic reduction that can be called on arbitrary Torch tensors, as documented in :class:`torch.Genred <pykeops.torch.Genred>`. Example: Log-likelihood of a Gaussian Mixture Model, .. math:: a_i~=~f(x_i)~&=~ \log \sum_{j=1}^{N} \exp(-\gamma\cdot\|x_i-y_j\|^2)\cdot b_j \\\\ ~&=~ \log \sum_{j=1}^{N} \exp\big(-\gamma\cdot\|x_i-y_j\|^2 \,+\, \log(b_j) \big). >>> log_likelihood = generic_logsumexp( ... '(-(g * SqNorm2(x - y))) + b', # Formula ... 'a = Vi(1)', # Output: 1 scalar per line ... 'x = Vi(3)', # 1st input: dim-3 vector per line ... 'y = Vj(3)', # 2nd input: dim-3 vector per line ... 'g = Pm(1)', # 3rd input: vector of size 1 ... 'b = Vj(1)') # 4th input: 1 scalar per line >>> x = torch.randn(1000000, 3, requires_grad=True).cuda() >>> y = torch.randn(2000000, 3).cuda() >>> g = torch.Tensor([.5]).cuda() # Parameter of our GMM >>> b = torch.rand(2000000, 1).cuda() # Positive weights... >>> b = b / b.sum() # Normalized to get a probability measure >>> a = log_likelihood(x, y, g, b.log()) # a_i = log sum_j exp(-g*|x_i-y_j|^2) * b_j >>> print(a.shape) torch.Size([1000000, 1]) """ _, cat, _, _ = get_type(output) axis = cat2axis(cat) return Genred(formula, aliases, reduction_op='LogSumExp', axis=axis, **kwargs)
def generic_argmin(formula, output, *aliases, **kwargs): r"""Alias for :class:`numpy.Genred <pykeops.numpy.Genred>` with an "ArgMin" reduction. Args: formula (string): Scalar-valued symbolic KeOps expression, as in :class:`numpy.Genred <pykeops.numpy.Genred>`. output (string): An identifier of the form ``"AL = TYPE(1)"`` that specifies the category and dimension of the output variable. Here: - ``AL`` is a dummy alphanumerical name. - ``TYPE`` is a *category*. One of: - ``Vi``: indexation by :math:`i` along axis 0; reduction is performed along axis 1. - ``Vj``: indexation by :math:`j` along axis 1; reduction is performed along axis 0. *aliases (strings): List of identifiers, as in :class:`numpy.Genred <pykeops.numpy.Genred>`. Keyword Args: dtype (string, default = ``"float64"``): Specifies the numerical **dtype** of the input and output arrays. The supported values are: - **dtype** = ``"float16"``, - **dtype** = ``"float32"``, - **dtype** = ``"float64"``. Returns: A generic reduction that can be called on arbitrary NumPy arrays, as documented in :class:`numpy.Genred <pykeops.numpy.Genred>`. Example: Bruteforce nearest neighbor search in dimension 100: >>> nearest_neighbor = generic_argmin( ... 'SqDist(x, y)', # Formula ... 'a = Vi(1)', # Output: 1 scalar per line ... 'x = Vi(100)', # 1st input: dim-100 vector per line ... 'y = Vj(100)') # 2nd input: dim-100 vector per line >>> x = np.random.randn(5, 100) >>> y = np.random.randn(20000, 100) >>> a = nearest_neighbor(x, y) >>> print(a) [[ 8761.], [ 2836.], [ 906.], [16130.], [ 3158.]] >>> dists = np.linalg.norm(x - y[ a.view(-1).long() ], axis=1) # Distance to the nearest neighbor >>> print(dists) [10.5926, 10.9132, 9.9694, 10.1396, 10.1955] """ _, cat, _, _ = get_type(output) return Genred(formula, list(aliases), reduction_op='ArgMin', axis=cat2axis(cat), **kwargs)
def preprocess_half2(args, aliases, axis, ranges, nx, ny): # When the dtype is "half", i.e. float16, we need to use special tricks # because internally the Cuda code will use half2 data type, i.e. # vectors of two float16 scalars. So we need to : # - make a distinction between the actual nx and ny sizes of the reduction # on the Python side, i.e. for the user, and the sizes in the c++ code # which need to be divided by two (modulo the next point...) # - make a copy of data for variables corresponding to the axis of reduction, # switching the order of the pairs. To understand this, let's consider # that we have two variables x_i and y_j, with nx = ny = 2, # and we need to sum over the j axis some kernel, # i.e. compute out_i = sum_j k(x_i, y_j) for i,j ranging from 1 to 2. # After conversion to half2 data type, without any copy, we would get # only one half2 for the x_i : X=(x_0,x_1) and one half2 # for the y_j : Y=(y_0,y_1). The computation of k(X,Y), with # the rules of vectorization of Cuda, would compute only the two scalars # k(x_0,y_0) and k(x_1,y_1) and store the result as a half2. # To get the two other required kernel evaluations k(x_0,y_1) and k(x_1,y_0), # we need to create a second half2 Ytilde=(y_1,y_0). The correct # computation will then be acheived by computing k(X,Y) + k(X,Ytilde). # N is the actual size of reduction, we record it for not mixing up things # when we will do the post-process back conversion after reduction N = ny if axis == 1 else nx if ranges is not None: # When using ranges, we need to adapt the ranges to the special copy trick if axis == 1: ranges = ranges2half2(ranges[0:3], ny) + ranges[3:6] else: ranges = ranges[0:3] + ranges2half2(ranges[3:6], nx) newargs = len(aliases) * [None] for (var_ind, sig) in enumerate(aliases): _, cat, dim, pos = get_type(sig, position_in_list=var_ind) arg = args[ pos ].data # we don't want to record our cuisine in the Autograd mechanism ! if cat == 2: arg = arg[..., None, :] # (...,D) -> (...,1,D) arg, _ = make_even_size(arg) # (...,1,D) -> (...,2,D) elif cat == axis: arg, Narg = make_odd_cat(arg) N = max(N, Narg) else: arg, tag_dummy = make_even_size(arg) arg = half2half2(arg) if cat == 2: arg = arg.view( tuple(arg.shape[:-2]) + (2 * dim,) ) # (...,2,D) -> (...,2*D) (we "hide" the factor 2 in the dimension...) newargs[pos] = arg return newargs, ranges, tag_dummy, N
def generic_sum(formula, output, *aliases, **kwargs): r"""Alias for :class:`numpy.Genred <pykeops.numpy.Genred>` with a "Sum" reduction. Args: formula (string): Symbolic KeOps expression, as in :class:`numpy.Genred <pykeops.numpy.Genred>`. output (string): An identifier of the form ``"AL = TYPE(DIM)"`` that specifies the category and dimension of the output variable. Here: - ``AL`` is a dummy alphanumerical name. - ``TYPE`` is a *category*. One of: - ``Vi``: indexation by :math:`i` along axis 0; reduction is performed along axis 1. - ``Vj``: indexation by :math:`j` along axis 1; reduction is performed along axis 0. - ``DIM`` is an integer, the dimension of the output variable; it should be compatible with **formula**. *aliases (strings): List of identifiers, as in :class:`numpy.Genred <pykeops.numpy.Genred>`. Keyword Args: dtype (string, default = ``"float64"``): Specifies the numerical **dtype** of the input and output arrays. The supported values are: - **dtype** = ``"float16"``, - **dtype** = ``"float32"``, - **dtype** = ``"float64"``. Returns: A generic reduction that can be called on arbitrary NumPy arrays, as documented in :class:`numpy.Genred <pykeops.numpy.Genred>`. Example: >>> my_conv = generic_sum( # Custom Kernel Density Estimator ... 'Exp(-SqNorm2(x - y))', # Formula ... 'a = Vi(1)', # Output: 1 scalar per line ... 'x = Vi(3)', # 1st input: dim-3 vector per line ... 'y = Vj(3)') # 2nd input: dim-3 vector per line >>> # Apply it to 2d arrays x and y with 3 columns and a (huge) number of lines >>> x = np.random.randn(1000000, 3) >>> y = np.random.randn(2000000, 3) >>> a = my_conv(x, y) # a_i = sum_j exp(-|x_i-y_j|^2) >>> print(a.shape) (1000000, 1) """ _, cat, _, _ = get_type(output) return Genred(formula, list(aliases), reduction_op='Sum', axis=cat2axis(cat), **kwargs)
def backward(ctx, G): formula = ctx.formula aliases = ctx.aliases varinvpos = ctx.varinvpos backend = ctx.backend alpha = ctx.alpha dtype = ctx.dtype device_id = ctx.device_id eps = ctx.eps myconv = ctx.myconv ranges = ctx.ranges optional_flags = ctx.optional_flags rec_multVar_highdim = ctx.rec_multVar_highdim args = ctx.saved_tensors[:-1] # Unwrap the saved variables nargs = len(args) result = ctx.saved_tensors[-1] # If formula takes 5 variables (numbered from 0 to 4), then the gradient # wrt. the output, G, should be given as a 6-th variable (numbered 5), # with the same dim-cat as the formula's output. eta = ("Var(" + str(nargs) + "," + str(myconv.dimout) + "," + str(myconv.tagIJ) + ")") # there is also a new variable for the formula's output resvar = ("Var(" + str(nargs + 1) + "," + str(myconv.dimout) + "," + str(myconv.tagIJ) + ")") newargs = args[:varinvpos] + (G, ) + args[varinvpos + 1:] KinvG = KernelSolveAutograd.apply(formula, aliases, varinvpos, alpha, backend, dtype, device_id, eps, ranges, optional_flags, rec_multVar_highdim, *newargs) grads = [] # list of gradients wrt. args; for (var_ind, sig) in enumerate(aliases): # Run through the arguments # If the current gradient is to be discarded immediatly... if not ctx.needs_input_grad[ var_ind + 11]: # because of (formula, aliases, varinvpos, alpha, backend, dtype, device_id, eps, ranges, optional_flags, rec_multVar_highdim) grads.append(None) # Don't waste time computing it. else: # Otherwise, the current gradient is really needed by the user: if var_ind == varinvpos: grads.append(KinvG) else: # adding new aliases is way too dangerous if we want to compute # second derivatives, etc. So we make explicit references to Var<ind,dim,cat> instead. # New here (Joan) : we still add the new variables to the list of "aliases" (without giving new aliases for them) # these will not be used in the C++ code, # but are useful to keep track of the actual variables used in the formula _, cat, dim, pos = get_type(sig, position_in_list=var_ind) var = "Var(" + str(pos) + "," + str(dim) + "," + str( cat) + ")" # V formula_g = ("Grad_WithSavedForward(" + formula + ", " + var + ", " + eta + ", " + resvar + ")" ) # Grad<F,V,G,R> aliases_g = aliases + [eta, resvar] args_g = (args[:varinvpos] + (result, ) + args[varinvpos + 1:] + (-KinvG, ) + (result, ) ) # Don't forget the gradient to backprop ! # N.B.: if I understand PyTorch's doc, we should redefine this function every time we use it? genconv = GenredAutograd().apply if ( cat == 2 ): # we're referring to a parameter, so we'll have to sum both wrt 'i' and 'j' # WARNING !! : here we rely on the implementation of DiffT in files in folder keops/core/formulas/reductions # if tagI==cat of V is 2, then reduction is done wrt j, so we need to further sum output wrt i grad = genconv(formula_g, aliases_g, backend, dtype, device_id, ranges, optional_flags, None, *args_g) # Then, sum 'grad' wrt 'i' : # I think that '.sum''s backward introduces non-contiguous arrays, # and is thus non-compatible with GenredAutograd: grad = grad.sum(0) # We replace it with a 'handmade hack' : grad = torch.ones(1, grad.shape[0]).type_as( grad.data) @ grad grad = grad.view(-1) else: grad = genconv(formula_g, aliases_g, backend, dtype, device_id, ranges, optional_flags, None, *args_g) grads.append(grad) # Grads wrt. formula, aliases, varinvpos, alpha, backend, dtype, device_id, eps, ranges, optional_flags, rec_multVar_highdim, *args return ( None, None, None, None, None, None, None, None, None, None, None, *grads, )
def backward(ctx, G): formula = ctx.formula aliases = ctx.aliases backend = ctx.backend dtype = ctx.dtype ranges = ctx.ranges accuracy_flags = ctx.accuracy_flags device_id = ctx.device_id myconv = ctx.myconv args = ctx.saved_tensors[:-1] # Unwrap the saved variables nargs = len(args) result = ctx.saved_tensors[-1].detach() not_supported = [ "Min_ArgMin_Reduction", "Min_Reduction", "Max_ArgMax_Reduction", "Max_Reduction", "KMin_ArgKMin_Reduction", "KMin_Reduction" ] for red in not_supported: if formula.startswith(red): raise NotImplementedError( "As of today, KeOps does not support " + "backpropagation through the " + red + " reduction. " + "Adding this feature to LazyTensors is on the cards " + "for future releases... But until then, you may want " + "to consider extracting the relevant integer indices " + "with a '.argmin()', '.argmax()' or '.argKmin()' reduction " + "before using PyTorch advanced indexing to create a fully-differentiable " + "tensor containing the relevant 'minimal' values.") # If formula takes 5 variables (numbered from 0 to 4), then the gradient # wrt. the output, G, should be given as a 6-th variable (numbered 5), # with the same dim-cat as the formula's output. eta = 'Var(' + str(nargs) + ',' + str(myconv.dimout) + ',' + str( myconv.tagIJ) + ')' # there is also a new variable for the formula's output resvar = 'Var(' + str(nargs + 1) + ',' + str( myconv.dimout) + ',' + str(myconv.tagIJ) + ')' grads = [] # list of gradients wrt. args; for (var_ind, (sig, arg_ind)) in enumerate(zip(aliases, args)): # Run through the arguments # If the current gradient is to be discarded immediatly... if not ctx.needs_input_grad[ var_ind + 7]: # because of (formula, aliases, backend, dtype, device_id, ranges, accuracy_flags) grads.append(None) # Don't waste time computing it. else: # Otherwise, the current gradient is really needed by the user: # adding new aliases is way too dangerous if we want to compute # second derivatives, etc. So we make explicit references to Var<ind,dim,cat> instead. # New here (Joan) : we still add the new variables to the list of "aliases" (without # giving new aliases for them) these will not be used in the C++ code, # but are useful to keep track of the actual variables used in the formula _, cat, dim, pos = get_type(sig, position_in_list=var_ind) var = 'Var(' + str(pos) + ',' + str(dim) + ',' + str( cat) + ')' # V formula_g = 'Grad_WithSavedForward(' + formula + ', ' + var + ', ' + eta + ', ' + resvar + ')' # Grad<F,V,G,R> aliases_g = aliases + [eta, resvar] args_g = args + (G, ) + ( result, ) # Don't forget the gradient to backprop ! # N.B.: if I understand PyTorch's doc, we should redefine this function every time we use it? genconv = GenredAutograd().apply if cat == 2: # we're referring to a parameter, so we'll have to sum both wrt 'i' and 'j' # WARNING !! : here we rely on the implementation of DiffT in files in folder keops/core/formulas/reductions # if tagI==cat of V is 2, then reduction is done wrt j, so we need to further sum output wrt i grad = genconv(formula_g, aliases_g, backend, dtype, device_id, ranges, accuracy_flags, *args_g) # Then, sum 'grad' wrt 'i' : # I think that '.sum''s backward introduces non-contiguous arrays, # and is thus non-compatible with GenredAutograd: grad = grad.sum(0) # We replace it with a 'handmade hack' : # grad = torch.ones(1, grad.shape[0]).type_as(grad.data) @ grad # grad = grad.view(-1) grad = (1. * grad).sum(-2) dims_to_collapse = tuple(i for (i, (x, y)) in enumerate( zip(arg_ind.shape[:-1], grad.shape[:-1])) if x < y) else: grad = genconv(formula_g, aliases_g, backend, dtype, device_id, ranges, accuracy_flags, *args_g) # N.B.: 'grad' is always a full [A, .., B, M, D] or [A, .., B, N, D] or [A, .., B, D] tensor, # whereas 'arg_ind' may have some broadcasted batched dimensions. # Before returning our gradient, we must collapse 'grad' with a .sum() operation, # which is the adjoint of the good old "repmat" that could have been used # to emulate the batch broadcasting. dims_to_collapse = tuple(i for (i, (x, y)) in enumerate( zip(arg_ind.shape[:-2], grad.shape[:-2])) if x < y) if dims_to_collapse != (): grad = (1. * grad).sum(dims_to_collapse, keepdim=True) grad = grad.reshape( arg_ind.shape ) # The gradient should have the same shape as the input! grads.append(grad) # Grads wrt. formula, aliases, backend, dtype, device_id, ranges, *args return (None, None, None, None, None, None, None, *grads)