def forward(ctx, formula, aliases, backend, dtype, device_id, ranges, accuracy_flags, *args): optional_flags = ['-DPYTORCH_INCLUDE_DIR=' + ';'.join(include_dirs)] + accuracy_flags myconv = LoadKeOps(formula, aliases, dtype, 'torch', optional_flags).import_module() # Context variables: save everything to compute the gradient: ctx.formula = formula ctx.aliases = aliases ctx.backend = backend ctx.dtype = dtype ctx.device_id = device_id ctx.ranges = ranges ctx.accuracy_flags = accuracy_flags ctx.myconv = myconv tagCPUGPU, tag1D2D, tagHostDevice = get_tag_backend(backend, args) if tagCPUGPU==1 & tagHostDevice==1: device_id = args[0].device.index for i in range(1,len(args)): if args[i].device.index != device_id: raise ValueError("[KeOps] Input arrays must be all located on the same device.") if ranges is None : ranges = () # To keep the same type result = myconv.genred_pytorch(tagCPUGPU, tag1D2D, tagHostDevice, device_id, ranges, *args) # relying on the 'ctx.saved_variables' attribute is necessary if you want to be able to differentiate the output # of the backward once again. It helps pytorch to keep track of 'who is who'. ctx.save_for_backward(*args, result) return result
def forward(ctx, formula, aliases, varinvpos, alpha, backend, dtype, device_id, eps, ranges, accuracy_flags, *args): optional_flags = ['-DPYTORCH_INCLUDE_DIR=' + ';'.join(include_dirs) ] + accuracy_flags myconv = LoadKeOps(formula, aliases, dtype, 'torch', optional_flags).import_module() # Context variables: save everything to compute the gradient: ctx.formula = formula ctx.aliases = aliases ctx.varinvpos = varinvpos ctx.alpha = alpha ctx.backend = backend ctx.dtype = dtype ctx.device_id = device_id ctx.eps = eps ctx.myconv = myconv ctx.ranges = ranges ctx.accuracy_flags = accuracy_flags if ranges is None: ranges = () # To keep the same type varinv = args[varinvpos] ctx.varinvpos = varinvpos tagCPUGPU, tag1D2D, tagHostDevice = get_tag_backend(backend, args) if tagCPUGPU == 1 & tagHostDevice == 1: device_id = args[0].device.index for i in range(1, len(args)): if args[i].device.index != device_id: raise ValueError( "[KeOps] Input arrays must be all located on the same device." ) (categories, dimensions) = parse_aliases(aliases) def linop(var): newargs = args[:varinvpos] + (var, ) + args[varinvpos + 1:] res = myconv.genred_pytorch(tagCPUGPU, tag1D2D, tagHostDevice, device_id, ranges, categories, dimensions, *newargs) if alpha: res += alpha * var return res global copy result = ConjugateGradientSolver('torch', linop, varinv.data, eps) # relying on the 'ctx.saved_variables' attribute is necessary if you want to be able to differentiate the output # of the backward once again. It helps pytorch to keep track of 'who is who'. ctx.save_for_backward(*args, result) return result
def forward(ctx, formula, aliases, backend, dtype, device_id, ranges, optional_flags, rec_multVar_highdim, nx, ny, *args): # N.B. when rec_multVar_highdim option is set, it means that formula is of the form "sum(F*b)", where b is a variable # with large dimension. In this case we set compiler option MULT_VAR_HIGHDIM to allow for the use of the special "final chunk" computation # mode. However, this may not be also true for the gradients of the same formula. In fact only the gradient # with respect to variable b will have the same form. Hence, we save optional_flags current status into ctx, # before adding the MULT_VAR_HIGHDIM compiler option. ctx.optional_flags = optional_flags.copy() if rec_multVar_highdim is not None: optional_flags += ["-DMULT_VAR_HIGHDIM=1"] myconv = LoadKeOps(formula, aliases, dtype, 'torch', optional_flags, include_dirs).import_module() # Context variables: save everything to compute the gradient: ctx.formula = formula ctx.aliases = aliases ctx.backend = backend ctx.dtype = dtype ctx.device_id = device_id ctx.ranges = ranges ctx.rec_multVar_highdim = rec_multVar_highdim ctx.myconv = myconv ctx.nx = nx ctx.ny = ny tagCPUGPU, tag1D2D, tagHostDevice = get_tag_backend(backend, args) if tagCPUGPU == 1 & tagHostDevice == 1: device_id = args[0].device.index for i in range(1, len(args)): if args[i].device.index != device_id: raise ValueError( "[KeOps] Input arrays must be all located on the same device." ) if ranges is None: ranges = () # To keep the same type # N.B.: KeOps C++ expects contiguous integer arrays as ranges ranges = tuple(r.contiguous() for r in ranges) result = myconv.genred_pytorch(tagCPUGPU, tag1D2D, tagHostDevice, device_id, ranges, nx, ny, *args) # relying on the 'ctx.saved_variables' attribute is necessary if you want to be able to differentiate the output # of the backward once again. It helps pytorch to keep track of 'who is who'. ctx.save_for_backward(*args, result) return result
def forward(ctx, formula, aliases, varinvpos, alpha, backend, dtype, device_id, eps, ranges, optional_flags, rec_multVar_highdim, *args): optional_flags += include_dirs # N.B. when rec_multVar_highdim option is set, it means that formula is of the form "sum(F*b)", where b is a variable # with large dimension. In this case we set compiler option MULT_VAR_HIGHDIM to allow for the use of the special "final chunk" computation # mode. However, this may not be also true for the gradients of the same formula. In fact only the gradient # with respect to variable b will have the same form. Hence, we save optional_flags current status into ctx, # before adding the MULT_VAR_HIGHDIM compiler option. ctx.optional_flags = optional_flags.copy() if rec_multVar_highdim is not None: optional_flags += ["-DMULT_VAR_HIGHDIM=1"] myconv = LoadKeOps(formula, aliases, dtype, "torch", optional_flags).import_module() # Context variables: save everything to compute the gradient: ctx.formula = formula ctx.aliases = aliases ctx.varinvpos = varinvpos ctx.alpha = alpha ctx.backend = backend ctx.dtype = dtype ctx.device_id = device_id ctx.eps = eps ctx.myconv = myconv ctx.ranges = ranges ctx.rec_multVar_highdim = rec_multVar_highdim ctx.optional_flags = optional_flags if ranges is None: ranges = () # To keep the same type varinv = args[varinvpos] ctx.varinvpos = varinvpos tagCPUGPU, tag1D2D, tagHostDevice = get_tag_backend(backend, args) if tagCPUGPU == 1 & tagHostDevice == 1: device_id = args[0].device.index for i in range(1, len(args)): if args[i].device.index != device_id: raise ValueError( "[KeOps] Input arrays must be all located on the same device." ) def linop(var): newargs = args[:varinvpos] + (var, ) + args[varinvpos + 1:] res = myconv.genred_pytorch(tagCPUGPU, tag1D2D, tagHostDevice, device_id, ranges, *newargs) if alpha: res += alpha * var return res global copy result = ConjugateGradientSolver("torch", linop, varinv.data, eps) # relying on the 'ctx.saved_variables' attribute is necessary if you want to be able to differentiate the output # of the backward once again. It helps pytorch to keep track of 'who is who'. ctx.save_for_backward(*args, result) return result
def __call__(self, *args, backend='auto', device_id=-1, alpha=1e-10, eps=1e-6, ranges=None): r""" To apply the routine on arbitrary NumPy arrays. Warning: Even for variables of size 1 (e.g. :math:`a_i\in\mathbb{R}` for :math:`i\in[0,M)`), KeOps expects inputs to be formatted as 2d arrays of size ``(M,dim)``. In practice, ``a.view(-1,1)`` should be used to turn a vector of weights into a *list of scalar values*. Args: *args (2d arrays (variables ``Vi(..)``, ``Vj(..)``) and 1d arrays (parameters ``Pm(..)``)): The input numerical arrays, which should all have the same ``dtype``, be **contiguous** and be stored on the **same device**. KeOps expects one array per alias, with the following compatibility rules: - All ``Vi(Dim_k)`` variables are encoded as **2d-arrays** with ``Dim_k`` columns and the same number of lines :math:`M`. - All ``Vj(Dim_k)`` variables are encoded as **2d-arrays** with ``Dim_k`` columns and the same number of lines :math:`N`. - All ``Pm(Dim_k)`` variables are encoded as **1d-arrays** (vectors) of size ``Dim_k``. Keyword Args: alpha (float, default = 1e-10): Non-negative **ridge regularization** parameter, added to the diagonal of the Kernel matrix :math:`K_{xx}`. backend (string): Specifies the map-reduce scheme, as detailed in the documentation of the :class:`numpy.Genred <pykeops.numpy.Genred>` module. device_id (int, default=-1): Specifies the GPU that should be used to perform the computation; a negative value lets your system choose the default GPU. This parameter is only useful if your system has access to several GPUs. ranges (6-uple of IntTensors, None by default): Ranges of integers that specify a :doc:`block-sparse reduction scheme <../../sparsity>` with *Mc clusters along axis 0* and *Nc clusters along axis 1*, as detailed in the documentation of the :class:`numpy.Genred <pykeops.numpy.Genred>` module. If **None** (default), we simply use a **dense Kernel matrix** as we loop over all indices :math:`i\in[0,M)` and :math:`j\in[0,N)`. Returns: (M,D) or (N,D) array: The solution of the optimization problem, which is always a **2d-array** with :math:`M` or :math:`N` lines (if **axis** = 1 or **axis** = 0, respectively) and a number of columns that is inferred from the **formula**. """ # Get tags tagCpuGpu, tag1D2D, _ = get_tag_backend(backend, args) varinv = args[self.varinvpos] if ranges is None: ranges = () # ranges should be encoded as a tuple def linop(var): newargs = args[:self.varinvpos] + (var, ) + args[self.varinvpos + 1:] res = self.myconv.genred_numpy(tagCpuGpu, tag1D2D, 0, device_id, ranges, *newargs) if alpha: res += alpha * var return res return ConjugateGradientSolver('numpy', linop, varinv, eps=eps)
def __call__(self, *args, backend='auto', device_id=-1, ranges=None): r""" Apply the routine on arbitrary NumPy arrays. Warning: Even for variables of size 1 (e.g. :math:`a_i\in\mathbb{R}` for :math:`i\in[0,M)`), KeOps expects inputs to be formatted as 2d Tensors of size ``(M,dim)``. In practice, ``a.view(-1,1)`` should be used to turn a vector of weights into a *list of scalar values*. Args: *args (2d arrays (variables ``Vi(..)``, ``Vj(..)``) and 1d arrays (parameters ``Pm(..)``)): The input numerical arrays, which should all have the same ``dtype``, be **contiguous** and be stored on the **same device**. KeOps expects one array per alias, with the following compatibility rules: - All ``Vi(Dim_k)`` variables are encoded as **2d-arrays** with ``Dim_k`` columns and the same number of lines :math:`M`. - All ``Vj(Dim_k)`` variables are encoded as **2d-arrays** with ``Dim_k`` columns and the same number of lines :math:`N`. - All ``Pm(Dim_k)`` variables are encoded as **1d-arrays** (vectors) of size ``Dim_k``. Keyword Args: backend (string): Specifies the map-reduce scheme. The supported values are: - ``"auto"`` (default): let KeOps decide which backend is best suited to your data, based on the tensors' shapes. ``"GPU_1D"`` will be chosen in most cases. - ``"CPU"``: use a simple C++ ``for`` loop on a single CPU core. - ``"GPU_1D"``: use a `simple multithreading scheme <https://github.com/getkeops/keops/blob/master/keops/core/GpuConv1D.cu>`_ on the GPU - basically, one thread per value of the output index. - ``"GPU_2D"``: use a more sophisticated `2D parallelization scheme <https://github.com/getkeops/keops/blob/master/keops/core/GpuConv2D.cu>`_ on the GPU. - ``"GPU"``: let KeOps decide which one of the ``"GPU_1D"`` or the ``"GPU_2D"`` scheme will run faster on the given input. device_id (int, default=-1): Specifies the GPU that should be used to perform the computation; a negative value lets your system choose the default GPU. This parameter is only useful if your system has access to several GPUs. ranges (6-uple of integer arrays, None by default): Ranges of integers that specify a :doc:`block-sparse reduction scheme <../../sparsity>` with *Mc clusters along axis 0* and *Nc clusters along axis 1*. If None (default), we simply loop over all indices :math:`i\in[0,M)` and :math:`j\in[0,N)`. **The first three ranges** will be used if **axis** = 1 (reduction along the axis of ":math:`j` variables"), and to compute gradients with respect to ``Vi(..)`` variables: - ``ranges_i``, (Mc,2) integer array - slice indices :math:`[\operatorname{start}^I_k,\operatorname{end}^I_k)` in :math:`[0,M]` that specify our Mc blocks along the axis 0 of ":math:`i` variables". - ``slices_i``, (Mc,) integer array - consecutive slice indices :math:`[\operatorname{end}^S_1, ..., \operatorname{end}^S_{M_c}]` that specify Mc ranges :math:`[\operatorname{start}^S_k,\operatorname{end}^S_k)` in ``redranges_j``, with :math:`\operatorname{start}^S_k = \operatorname{end}^S_{k-1}`. **The first 0 is implicit**, meaning that :math:`\operatorname{start}^S_0 = 0`, and we typically expect that ``slices_i[-1] == len(redrange_j)``. - ``redranges_j``, (Mcc,2) integer array - slice indices :math:`[\operatorname{start}^J_l,\operatorname{end}^J_l)` in :math:`[0,N]` that specify reduction ranges along the axis 1 of ":math:`j` variables". If **axis** = 1, these integer arrays allow us to say that ``for k in range(Mc)``, the output values for indices ``i in range( ranges_i[k,0], ranges_i[k,1] )`` should be computed using a Map-Reduce scheme over indices ``j in Union( range( redranges_j[l, 0], redranges_j[l, 1] ))`` for ``l in range( slices_i[k-1], slices_i[k] )``. **Likewise, the last three ranges** will be used if **axis** = 0 (reduction along the axis of ":math:`i` variables"), and to compute gradients with respect to ``Vj(..)`` variables: - ``ranges_j``, (Nc,2) integer array - slice indices :math:`[\operatorname{start}^J_k,\operatorname{end}^J_k)` in :math:`[0,N]` that specify our Nc blocks along the axis 1 of ":math:`j` variables". - ``slices_j``, (Nc,) integer array - consecutive slice indices :math:`[\operatorname{end}^S_1, ..., \operatorname{end}^S_{N_c}]` that specify Nc ranges :math:`[\operatorname{start}^S_k,\operatorname{end}^S_k)` in ``redranges_i``, with :math:`\operatorname{start}^S_k = \operatorname{end}^S_{k-1}`. **The first 0 is implicit**, meaning that :math:`\operatorname{start}^S_0 = 0`, and we typically expect that ``slices_j[-1] == len(redrange_i)``. - ``redranges_i``, (Ncc,2) integer array - slice indices :math:`[\operatorname{start}^I_l,\operatorname{end}^I_l)` in :math:`[0,M]` that specify reduction ranges along the axis 0 of ":math:`i` variables". If **axis** = 0, these integer arrays allow us to say that ``for k in range(Nc)``, the output values for indices ``j in range( ranges_j[k,0], ranges_j[k,1] )`` should be computed using a Map-Reduce scheme over indices ``i in Union( range( redranges_i[l, 0], redranges_i[l, 1] ))`` for ``l in range( slices_j[k-1], slices_j[k] )``. Returns: (M,D) or (N,D) array: The output of the reduction, a **2d-tensor** with :math:`M` or :math:`N` lines (if **axis** = 1 or **axis** = 0, respectively) and a number of columns that is inferred from the **formula**. """ # Get tags tagCpuGpu, tag1D2D, _ = get_tag_backend(backend, args) if ranges is None: ranges = () # To keep the same type out = self.myconv.genred_numpy(tagCpuGpu, tag1D2D, 0, device_id, ranges, *args) nx, ny = get_sizes(self.aliases, *args) nout = nx if self.axis == 1 else ny return postprocess(out, "numpy", self.reduction_op, nout, self.opt_arg, self.dtype)