Пример #1
0
class TestKeops:
    basic_options = FalkonOptions(debug=True, compute_arch_speed=False, keops_active="force",
                                  max_cpu_mem=max_mem_dense, max_gpu_mem=max_mem_dense)

    @pytest.mark.parametrize("Ao,Adt,Bo,Bdt,vo,vdt", [
        ("C", np.float32, "C", np.float32, "C", np.float32),
        ("C", np.float64, "C", np.float64, "C", np.float64),
        pytest.param("F", np.float32, "F", np.float32, "F", np.float32,
                     marks=[pytest.mark.xfail(reason="KeOps only C")]),
        pytest.param("F", np.float32, "C", np.float32, "C", np.float32,
                     marks=[pytest.mark.xfail(reason="KeOps only C")]),
    ], ids=["AC32-BC32-vC32", "AC64-BC64-vC64", "AF32-BF32-vF32", "AF32-BC32-vC32"])
    @pytest.mark.parametrize("cpu", cpu_params, ids=["cpu", "gpu"])
    def test_fmmv(self, A, B, v, Ao, Adt, Bo, Bdt, vo, vdt, kernel,
                  expected_fmmv, cpu):
        A = fix_mat(A, order=Ao, dtype=Adt)
        B = fix_mat(B, order=Bo, dtype=Bdt)
        v = fix_mat(v, order=vo, dtype=vdt)

        opt = dataclasses.replace(self.basic_options, use_cpu=cpu)
        rtol = choose_on_dtype(A.dtype)

        # Test normal
        _run_fmmv_test(kernel.mmv, expected_fmmv, (A, B, v), out=None, rtol=rtol, opt=opt)
        # Test with out
        out = torch.empty(A.shape[0], v.shape[1], dtype=A.dtype)
        _run_fmmv_test(kernel.mmv, expected_fmmv, (A, B, v), out=out, rtol=rtol, opt=opt)

    @pytest.mark.skipif(not decide_cuda(), reason="No GPU found.")
    def test_gpu_inputs(self, A, B, v, kernel, expected_fmmv):
        A = fix_mat(A, order="C", dtype=n32).cuda()
        B = fix_mat(B, order="C", dtype=n32, device=A.device)
        v = fix_mat(v, order="C", dtype=n32, device=A.device)
        opt = dataclasses.replace(self.basic_options, use_cpu=False, max_gpu_mem=np.inf)
        rtol = choose_on_dtype(A.dtype)
        # Test normal
        _run_fmmv_test(kernel.mmv, expected_fmmv, (A, B, v), out=None, rtol=rtol, opt=opt)
        # Test with out
        out = torch.empty(A.shape[0], v.shape[1], dtype=A.dtype, device=A.device)
        _run_fmmv_test(kernel.mmv, expected_fmmv, (A, B, v), out=out, rtol=rtol, opt=opt)

    @pytest.mark.skipif(not decide_cuda(), reason="No GPU found.")
    def test_gpu_inputs_fail(self, A, B, v, kernel, expected_fmmv):
        A = fix_mat(A, order="C", dtype=n32, device="cuda:0")
        B = fix_mat(B, order="C", dtype=n32, device="cuda:0")
        v = fix_mat(v, order="C", dtype=n32, device="cpu")
        opt = dataclasses.replace(self.basic_options, use_cpu=False, max_gpu_mem=np.inf)
        rtol = choose_on_dtype(A.dtype)
        # Test normal
        with pytest.raises(RuntimeError):
            _run_fmmv_test(kernel.mmv, expected_fmmv, (A, B, v), out=None, rtol=rtol, opt=opt)
Пример #2
0
def _decide_backend(opt: BaseOptions, num_dim: int) -> str:
    """Switch between CPU and GPU backend for KeOps
    """
    if not decide_cuda(opt):
        return 'CPU'
    else:
        return 'GPU_1D'
Пример #3
0
    def __init__(
            self,
            kernel: falkon.kernels.Kernel,
            penalty_list: List[float],
            iter_list: List[int],
            loss: Loss,
            M: int,
            center_selection: Union[str,
                                    falkon.center_selection.NySel] = 'uniform',
            seed: Optional[int] = None,
            error_fn: Optional[callable] = None,
            error_every: Optional[int] = 1,
            options=FalkonOptions(),
    ):
        self.kernel = kernel
        self.penalty_list = penalty_list
        self.iter_list = iter_list
        if len(self.iter_list) != len(self.penalty_list):
            raise ValueError(
                "Iteration list must be of same length as penalty list "
                "(found %d and %d)" %
                (len(self.iter_list), len(self.penalty_list)))
        self.M = M
        self.seed = seed
        self.loss = loss
        if self.seed is not None:
            torch.manual_seed(self.seed)  # Works for both CPU and GPU
        self.random_state_ = check_random_generator(self.seed)

        self.error_fn = error_fn
        self.error_every = error_every
        # Options
        self.options = options
        self._cg_options = options.get_conjgrad_options()
        self._keops_options = options.get_keops_options()
        self._pc_options = options.get_pc_options()
        self._cholesky_opt = options.get_chol_options()
        self._lauum_opt = options.get_lauum_options()
        self._base_opt = options.get_base_options()

        self.use_cuda_ = decide_cuda(self.options)
        self.alpha_ = None
        self.ny_points_ = None
        self.fit_times_ = None

        if isinstance(center_selection, str):
            if center_selection.lower() == 'uniform':
                self.center_selection = falkon.center_selection.UniformSel(
                    self.random_state_)
            else:
                raise ValueError(
                    f'Center selection "{center_selection}" is not valid.')
        else:
            self.center_selection = center_selection

        self._init_cuda()
Пример #4
0
    def _decide_dmmv_impl(self, X1, X2, v, w, opt: FalkonOptions):
        """Choose which `dmmv` function to use for this data.

        Note that `dmmv` functions compute double kernel-vector products (see :meth:`dmmv` for
        an explanation of what they are).

        Parameters
        ----------
        X1 : torch.Tensor
            First data matrix, of shape (N x D)
        X2 : torch.Tensor
            Second data matrix, of shape (M x D)
        v : torch.Tensor or None
            Vector for the matrix-vector multiplication (M x T)
        w : torch.Tensor or None
            Vector for the matrix-vector multiplicatoin (N x T)
        opt : FalkonOptions
            Falkon options. Options may be specified to force GPU or CPU usage.

        Returns
        -------
        dmmv_fn
            A function which allows to perform the `mmv` operation.

        Notes
        -----
        This function decides based on the inputs: if the inputs are sparse, it will choose
        the sparse implementations; if CUDA is detected, it will choose the CUDA implementation;
        otherwise it will simply choose the basic CPU implementation.
        """
        use_cuda = decide_cuda(opt)
        sparsity = check_sparse(X1, X2)
        if not all(sparsity) and any(sparsity):
            raise ValueError(
                "Either all or none of 'X1', 'X2' must be sparse.")
        if (X1.device.type == 'cuda') and (not use_cuda):
            warnings.warn(
                "kernel-vector double product backend was chosen to be CPU, but GPU "
                "input tensors found. Defaulting to use the GPU (note this may "
                "cause issues later). To force usage of the CPU backend, "
                "please pass CPU tensors; to avoid this warning if the GPU backend is "
                "desired, check your options (i.e. set 'use_cpu=False').")
            use_cuda = True
        sparsity = all(sparsity)
        if use_cuda:
            from falkon.mmv_ops.fmmv_cuda import fdmmv_cuda, fdmmv_cuda_sparse
            if sparsity:
                return fdmmv_cuda_sparse
            else:
                return fdmmv_cuda
        else:
            if sparsity:
                return fdmmv_cpu_sparse
            else:
                return fdmmv_cpu
Пример #5
0
    def __init__(self, penalty: float, kernel, opt: FalkonOptions):
        super().__init__()
        self.params = opt
        self._use_cuda = decide_cuda(self.params) and not self.params.cpu_preconditioner

        self._lambda = penalty
        self.kernel = kernel

        self.fC: Optional[torch.Tensor] = None
        self.dT: Optional[torch.Tensor] = None
        self.dA: Optional[torch.Tensor] = None
Пример #6
0
    def __init__(self, kernel, loss, opt: FalkonOptions):
        super().__init__()
        self.params = opt
        self._use_cuda = decide_cuda(
            self.params) and not self.params.cpu_preconditioner

        self.kernel = kernel
        self.loss = loss

        self.fC = None
        self.dT = None
        self.dA = None
Пример #7
0
    def _decide_mm_impl(self, X1, X2, opt: FalkonOptions):
        """Choose which `mm` function to use for this data.

        Note that `mm` functions compute the kernel itself so **KeOps may not be used**.

        Parameters
        ----------
        X1 : torch.Tensor
            First data matrix, of shape (N x D)
        X2 : torch.Tensor
            Second data matrix, of shape (M x D)
        opt : FalkonOptions
            Falkon options. Options may be specified to force GPU or CPU usage.

        Returns
        -------
        mm_fn
            A function which allows to perform the `mm` operation.

        Notes
        -----
        This function decides based on the inputs: if the inputs are sparse, it will choose
        the sparse implementations; if CUDA is detected, it will choose the CUDA implementation;
        otherwise it will simply choose the basic CPU implementation.
        """
        use_cuda = decide_cuda(opt)
        sparsity = check_sparse(X1, X2)
        if not all(sparsity) and any(sparsity):
            raise ValueError(
                "Either all or none of 'X1', 'X2' must be sparse.")
        sparsity = all(sparsity)
        if (X1.device.type == 'cuda') and (not use_cuda):
            warnings.warn(
                "kernel backend was chosen to be CPU, but GPU input tensors found. "
                "Defaulting to use the GPU (note this may cause issues later). "
                "To force usage of the CPU backend, please pass CPU tensors; "
                "to avoid this warning if the GPU backend is "
                "desired, check your options (i.e. set 'use_cpu=False').")
            use_cuda = True
        if use_cuda:
            from falkon.mmv_ops.fmm_cuda import fmm_cuda, fmm_cuda_sparse
            if sparsity:
                return fmm_cuda_sparse
            else:
                return fmm_cuda
        else:
            if sparsity:
                return fmm_cpu_sparse
            else:
                return fmm_cpu
Пример #8
0
    def __init__(self,
                 kernel: falkon.kernels.Kernel,
                 penalty: float,
                 M: int,
                 center_selection: Union[str, falkon.center_selection.NySel] = 'uniform',
                 maxiter: int = 20,
                 seed: Optional[int] = None,
                 error_fn: Optional[callable] = None,
                 error_every: Optional[int] = 1,
                 options=FalkonOptions(),
                 ):
        self.kernel = kernel
        self.penalty = penalty
        self.M = M
        self.maxiter = maxiter
        self.seed = seed
        if self.seed is not None:
            torch.manual_seed(self.seed)  # Works for both CPU and GPU
        self.random_state_ = check_random_generator(self.seed)

        self.error_fn = error_fn
        self.error_every = error_every
        # Options
        self.options = options
        self._cg_options = options.get_conjgrad_options()
        self._keops_options = options.get_keops_options()
        self._pc_options = options.get_pc_options()
        self._cholesky_opt = options.get_chol_options()
        self._lauum_opt = options.get_lauum_options()
        self._base_opt = options.get_base_options()

        self.use_cuda_ = decide_cuda(self.options)
        self.alpha_ = None
        self.ny_points_ = None
        self.fit_times_ = None

        if isinstance(center_selection, str):
            if center_selection.lower() == 'uniform':
                self.center_selection = falkon.center_selection.UniformSel(
                    self.random_state_)
            else:
                raise ValueError(f'Center selection "{center_selection}" is not valid.')
        else:
            self.center_selection = center_selection

        self._init_cuda()
Пример #9
0
 def _decide_dmmv_impl(self, X1, X2, v, w, opt: FalkonOptions):
     use_cuda = decide_cuda(opt)
     sparsity = check_sparse(X1, X2)
     if not all(sparsity) and any(sparsity):
         raise ValueError(
             "Either all or none of 'X1', 'X2' must be sparse.")
     sparsity = all(sparsity)
     if use_cuda:
         from falkon.mmv_ops.fmmv_cuda import fdmmv_cuda, fdmmv_cuda_sparse
         if sparsity:
             return fdmmv_cuda_sparse
         else:
             return fdmmv_cuda
     else:
         if sparsity:
             return fdmmv_cpu_sparse
         else:
             return fdmmv_cpu
Пример #10
0
    def __init__(self,
                 kernel: falkon.kernels.Kernel,
                 M: Optional[int],
                 center_selection: Union[str, falkon.center_selection.CenterSelector] = 'uniform',
                 seed: Optional[int] = None,
                 error_fn: Optional[callable] = None,
                 error_every: Optional[int] = 1,
                 options: Optional[FalkonOptions] = None,
                 ):
        self.kernel = kernel
        self.M = M
        self.seed = seed
        if self.seed is not None:
            torch.manual_seed(self.seed)  # Works for both CPU and GPU
        self.random_state_ = check_random_generator(self.seed)

        self.error_fn = error_fn
        self.error_every = error_every
        # Options
        self.options = options or FalkonOptions()
        self._cg_options = self.options.get_conjgrad_options()
        self._keops_options = self.options.get_keops_options()
        self._pc_options = self.options.get_pc_options()
        self._cholesky_opt = self.options.get_chol_options()
        self._base_opt = self.options.get_base_options()

        self.use_cuda_ = decide_cuda(self.options)
        self.num_gpus = 0
        self.alpha_ = None
        self.ny_points_ = None
        self.fit_times_ = None

        if isinstance(center_selection, str):
            if center_selection.lower() == 'uniform':
                if M is None:
                    raise ValueError(
                        "M must be specified when no `CenterSelector` object is provided. "
                        "Specify an integer value for `M` or a `CenterSelector` object.")
                self.center_selection: falkon.center_selection.CenterSelector = \
                    falkon.center_selection.UniformSelector(self.random_state_, num_centers=M)
            else:
                raise ValueError(f'Center selection "{center_selection}" is not valid.')
        else:
            self.center_selection: falkon.center_selection.CenterSelector = center_selection
Пример #11
0
 def _decide_dmmv_impl(self, X1, X2, v, w, opt: FalkonOptions):
     use_cuda = decide_cuda(opt)
     sparsity = check_sparse(X1, X2)
     if not all(sparsity) and any(sparsity):
         raise ValueError("Either all or none of 'X1', 'X2' must be sparse.")
     if (X1.device.type == 'cuda') and (not use_cuda):
         warnings.warn("kernel-vector double product backend was chosen to be CPU, but GPU "
                       "input tensors found. Defaulting to use the GPU (note this may "
                       "cause issues later). To force usage of the CPU backend, "
                       "please pass CPU tensors; to avoid this warning if the GPU backend is "
                       "desired, check your options (i.e. set 'use_cpu=False').")
         use_cuda = True
     sparsity = all(sparsity)
     if use_cuda:
         from falkon.mmv_ops.fmmv_cuda import fdmmv_cuda, fdmmv_cuda_sparse
         if sparsity:
             return fdmmv_cuda_sparse
         else:
             return fdmmv_cuda
     else:
         if sparsity:
             return fdmmv_cpu_sparse
         else:
             return fdmmv_cpu
Пример #12
0
class TestSparse:
    basic_options = FalkonOptions(debug=True,
                                  compute_arch_speed=False,
                                  max_cpu_mem=max_mem_sparse,
                                  max_gpu_mem=max_mem_sparse)
    # sparse_dim and sparse_density result in sparse matrices with m and n non-zero entries.
    sparse_dim = 10_000
    sparse_density = 1e-4

    @pytest.fixture(scope="class")
    def s_A(self):
        A = gen_sparse_matrix(n,
                              self.sparse_dim,
                              np.float64,
                              density=self.sparse_density,
                              seed=14)
        Ad = torch.from_numpy(A.to_scipy().todense())
        return A, Ad

    @pytest.fixture(scope="class")
    def s_B(self):
        B = gen_sparse_matrix(m,
                              self.sparse_dim,
                              np.float64,
                              density=self.sparse_density,
                              seed=14)
        Bd = torch.from_numpy(B.to_scipy().todense())
        return B, Bd

    @pytest.fixture(scope="class")
    def s_gram(self, kernel, s_A, s_B):
        opt = FalkonOptions(use_cpu=True, compute_arch_speed=False)
        return kernel(s_A[1], s_B[1], opt=opt)  # n x m kernel

    @pytest.fixture(scope="class")
    def s_expected_fmmv(self, s_gram, v):
        return s_gram @ v

    @pytest.fixture(scope="class")
    def s_e_dfmmv1(self, s_gram, v, w):
        return s_gram.T @ (s_gram @ v + w)

    @pytest.fixture(scope="class")
    def s_e_dfmmv2(self, s_gram, v):
        return s_gram.T @ (s_gram @ v)

    @pytest.fixture(scope="class")
    def s_e_dfmmv3(self, s_gram, w):
        return s_gram.T @ w

    @pytest.fixture(scope="class")
    def s_e_dfmmv(self, request):
        return request.getfixturevalue(request.param)

    @pytest.mark.parametrize("cpu", cpu_params, ids=["cpu", "gpu"])
    @pytest.mark.parametrize(
        "Adt,Bdt,vo,vdt", [
            (np.float32, np.float32, "F", np.float32),
            (np.float32, np.float32, "C", np.float32),
            (np.float64, np.float64, "F", np.float64),
            (np.float64, np.float64, "C", np.float64),
        ],
        ids=["A32-B32-vF32", "A32-B32-vC32", "A64-B64-vF64", "A64-B64-vC64"])
    def test_fmmv(self, s_A, s_B, v, Adt, Bdt, vo, vdt, kernel,
                  s_expected_fmmv, cpu):
        A = fix_sparse_mat(s_A[0], dtype=Adt)
        B = fix_sparse_mat(s_B[0], dtype=Bdt)
        v = fix_mat(v, dtype=vdt, order=vo, copy=True)

        opt = dataclasses.replace(self.basic_options, use_cpu=cpu)
        rtol = choose_on_dtype(A.dtype)

        # Test normal
        _run_fmmv_test(kernel.mmv,
                       s_expected_fmmv, (A, B, v),
                       out=None,
                       rtol=rtol,
                       opt=opt)
        # Test with out
        out = torch.empty(A.shape[0], v.shape[1], dtype=A.dtype)
        _run_fmmv_test(kernel.mmv,
                       s_expected_fmmv, (A, B, v),
                       out=out,
                       rtol=rtol,
                       opt=opt)

    @pytest.mark.skipif(not decide_cuda(), reason="No GPU found.")
    @pytest.mark.parametrize("Adt,Bdt,vo,vdt",
                             [(np.float32, np.float32, "F", np.float32)],
                             ids=["A32-B32-vF32"])
    @pytest.mark.xfail(reason="Squared-norm not implemented for CUDA tensors",
                       run=True)
    def test_fmmv_input_device(self, s_A, s_B, v, Adt, Bdt, vo, vdt, kernel,
                               s_expected_fmmv):
        input_device = "cuda:0"
        A = fix_sparse_mat(s_A[0], dtype=Adt, device=input_device)
        B = fix_sparse_mat(s_B[0], dtype=Bdt, device=input_device)
        v = fix_mat(v, dtype=vdt, order=vo, copy=True, device=input_device)

        opt = dataclasses.replace(self.basic_options, use_cpu=False)
        rtol = choose_on_dtype(A.dtype)

        # Test normal
        _run_fmmv_test(kernel.mmv,
                       s_expected_fmmv, (A, B, v),
                       out=None,
                       rtol=rtol,
                       opt=opt)
        # Test with out
        out = torch.empty(A.shape[0],
                          v.shape[1],
                          dtype=A.dtype,
                          device=input_device)
        _run_fmmv_test(kernel.mmv,
                       s_expected_fmmv, (A, B, v),
                       out=out,
                       rtol=rtol,
                       opt=opt)

    @pytest.mark.parametrize("cpu", cpu_params, ids=["cpu", "gpu"])
    @pytest.mark.parametrize(
        "Adt,Bdt,vo,vdt,wo,wdt,s_e_dfmmv",
        [
            pytest.param(n32,
                         n32,
                         "F",
                         n32,
                         "F",
                         n32,
                         "s_e_dfmmv1",
                         marks=mark.usefixtures("s_e_dfmmv1")),
            pytest.param(n32,
                         n32,
                         "C",
                         n32,
                         "C",
                         n32,
                         "s_e_dfmmv1",
                         marks=mark.usefixtures("s_e_dfmmv1")),
            pytest.param(n64,
                         n64,
                         "F",
                         n64,
                         "F",
                         n64,
                         "s_e_dfmmv1",
                         marks=mark.usefixtures("s_e_dfmmv1")),
            pytest.param(n64,
                         n64,
                         "C",
                         n64,
                         "C",
                         n64,
                         "s_e_dfmmv1",
                         marks=mark.usefixtures("s_e_dfmmv1")),
            pytest.param(n32,
                         n32,
                         "F",
                         n32,
                         None,
                         None,
                         "s_e_dfmmv2",
                         marks=mark.usefixtures("s_e_dfmmv2")),
            pytest.param(n32,
                         n32,
                         "C",
                         n32,
                         None,
                         None,
                         "s_e_dfmmv2",
                         marks=mark.usefixtures("s_e_dfmmv2")),
            pytest.param(n64,
                         n64,
                         "F",
                         n64,
                         None,
                         None,
                         "s_e_dfmmv2",
                         marks=mark.usefixtures("s_e_dfmmv2")),
            pytest.param(n64,
                         n64,
                         "C",
                         n64,
                         None,
                         None,
                         "s_e_dfmmv2",
                         marks=mark.usefixtures("s_e_dfmmv2")),
            pytest.param(n32,
                         n32,
                         None,
                         None,
                         "F",
                         n32,
                         "s_e_dfmmv3",
                         marks=mark.usefixtures("s_e_dfmmv3")),
            pytest.param(n32,
                         n32,
                         None,
                         None,
                         "C",
                         n32,
                         "s_e_dfmmv3",
                         marks=mark.usefixtures("s_e_dfmmv3")),
            pytest.param(n64,
                         n64,
                         None,
                         None,
                         "F",
                         n64,
                         "s_e_dfmmv3",
                         marks=mark.usefixtures("s_e_dfmmv3")),
            pytest.param(n64,
                         n64,
                         None,
                         None,
                         "C",
                         n64,
                         "s_e_dfmmv3",
                         marks=mark.usefixtures("s_e_dfmmv3")),
            # A few mixed-contiguity examples
            pytest.param(n32,
                         n32,
                         "C",
                         n32,
                         "F",
                         n32,
                         "s_e_dfmmv1",
                         marks=mark.usefixtures("s_e_dfmmv1")),
        ],
        ids=[
            "32-32-vF32-wF32", "32-32-vC32-wC32", "64-64-vF64-wF64",
            "64-64-vC64-wC64", "32-32-vF32", "32-32-vC32", "64-64-vF64",
            "64-64-vC64", "32-32-wF32", "32-32-wC32", "64-64-wF64",
            "64-64-wC64", "32-32-vC32-wF32"
        ],
        indirect=["s_e_dfmmv"])
    def test_dfmmv(self, s_A, s_B, v, w, Adt, Bdt, vo, vdt, wo, wdt, kernel,
                   s_e_dfmmv, cpu):
        A = fix_sparse_mat(s_A[0], dtype=Adt)
        B = fix_sparse_mat(s_B[0], dtype=Bdt)
        v = fix_mat(v, order=vo, dtype=vdt)
        w = fix_mat(w, order=wo, dtype=wdt)

        opt = dataclasses.replace(self.basic_options, use_cpu=cpu)
        rtol = choose_on_dtype(A.dtype)

        # Test normal
        _run_fmmv_test(kernel.dmmv,
                       s_e_dfmmv, (A, B, v, w),
                       out=None,
                       rtol=rtol,
                       opt=opt)
        # Test with out
        out = torch.empty(m, t, dtype=A.dtype)
        _run_fmmv_test(kernel.dmmv,
                       s_e_dfmmv, (A, B, v, w),
                       out=out,
                       rtol=rtol,
                       opt=opt)

    @pytest.mark.skipif(not decide_cuda(), reason="No GPU found.")
    @pytest.mark.xfail(reason="Squared-norm not implemented for CUDA tensors",
                       run=True)
    @pytest.mark.parametrize(
        "Adt,Bdt,vo,vdt,wo,wdt,s_e_dfmmv", [
            pytest.param(n32,
                         n32,
                         "F",
                         n32,
                         "F",
                         n32,
                         "s_e_dfmmv1",
                         marks=mark.usefixtures("s_e_dfmmv1")),
            pytest.param(n32,
                         n32,
                         "F",
                         n32,
                         None,
                         None,
                         "s_e_dfmmv2",
                         marks=mark.usefixtures("s_e_dfmmv2")),
            pytest.param(n32,
                         n32,
                         None,
                         None,
                         "F",
                         n32,
                         "s_e_dfmmv3",
                         marks=mark.usefixtures("s_e_dfmmv3")),
        ],
        ids=["32-32-vF32-wF32", "32-32-vF32", "32-32-wF32"],
        indirect=["s_e_dfmmv"])
    def test_dfmmv_input_devices(self, s_A, s_B, v, w, Adt, Bdt, vo, vdt, wo,
                                 wdt, kernel, s_e_dfmmv):
        input_device = "cuda:0"
        A = fix_sparse_mat(s_A[0], dtype=Adt, device=input_device)
        B = fix_sparse_mat(s_B[0], dtype=Bdt, device=input_device)
        v = fix_mat(v, order=vo, dtype=vdt, device=input_device)
        w = fix_mat(w, order=wo, dtype=wdt, device=input_device)

        opt = dataclasses.replace(self.basic_options, use_cpu=False)
        rtol = choose_on_dtype(A.dtype)

        # Test normal
        _run_fmmv_test(kernel.dmmv,
                       s_e_dfmmv, (A, B, v, w),
                       out=None,
                       rtol=rtol,
                       opt=opt)
        # Test with out
        out = torch.empty(m, t, dtype=A.dtype, device=input_device)
        _run_fmmv_test(kernel.dmmv,
                       s_e_dfmmv, (A, B, v, w),
                       out=out,
                       rtol=rtol,
                       opt=opt)
Пример #13
0
class TestDense:
    basic_options = FalkonOptions(debug=True,
                                  compute_arch_speed=False,
                                  keops_active="no",
                                  max_gpu_mem=max_mem_dense,
                                  max_cpu_mem=max_mem_dense)

    @pytest.mark.parametrize(
        "Ao,Adt,Bo,Bdt,vo,vdt",
        [
            ("F", np.float32, "F", np.float32, "F", np.float32),
            ("C", np.float32, "C", np.float32, "C", np.float32),
            ("F", np.float64, "F", np.float64, "F", np.float64),
            ("C", np.float64, "C", np.float64, "C", np.float64),
            # A few mixed-contiguity examples
            ("F", np.float32, "C", np.float32, "F", np.float32),
            ("F", np.float32, "C", np.float32, "C", np.float32),
        ],
        ids=[
            "AF32-BF32-vF32", "AC32-BC32-vC32", "AF64-BF64-vF64",
            "AC64-BC64-vC64", "AF32-BC32-vF32", "AF32-BC32-vC32"
        ])
    @pytest.mark.parametrize("cpu", cpu_params, ids=["cpu", "gpu"])
    def test_fmmv(self, A, B, v, Ao, Adt, Bo, Bdt, vo, vdt, kernel,
                  expected_fmmv, cpu):
        A = fix_mat(A, order=Ao, dtype=Adt)
        B = fix_mat(B, order=Bo, dtype=Bdt)
        v = fix_mat(v, order=vo, dtype=vdt)

        opt = dataclasses.replace(self.basic_options, use_cpu=cpu)
        rtol = choose_on_dtype(A.dtype)

        # Test normal
        _run_fmmv_test(kernel.mmv,
                       expected_fmmv, (A, B, v),
                       out=None,
                       rtol=rtol,
                       opt=opt)
        # Test with out
        out = torch.empty(A.shape[0], v.shape[1], dtype=A.dtype)
        _run_fmmv_test(kernel.mmv,
                       expected_fmmv, (A, B, v),
                       out=out,
                       rtol=rtol,
                       opt=opt)

    @pytest.mark.skipif(not decide_cuda(), reason="No GPU found.")
    @pytest.mark.parametrize("Ao,Adt,Bo,Bdt,vo,vdt", [
        ("F", np.float32, "F", np.float32, "F", np.float32),
    ],
                             ids=["AF32-BF32-vF32"])
    def test_fmmv_input_device(self, A, B, v, Ao, Adt, Bo, Bdt, vo, vdt,
                               kernel, expected_fmmv):
        input_device = "cuda:0"
        A = fix_mat(A, order=Ao, dtype=Adt, device=input_device)
        B = fix_mat(B, order=Bo, dtype=Bdt, device=input_device)
        v = fix_mat(v, order=vo, dtype=vdt, device=input_device)

        opt = dataclasses.replace(self.basic_options, use_cpu=False)
        rtol = choose_on_dtype(A.dtype)

        # Test normal
        _run_fmmv_test(kernel.mmv,
                       expected_fmmv, (A, B, v),
                       out=None,
                       rtol=rtol,
                       opt=opt)
        # Test with out
        out = torch.empty(A.shape[0],
                          v.shape[1],
                          dtype=A.dtype,
                          device=input_device)
        _run_fmmv_test(kernel.mmv,
                       expected_fmmv, (A, B, v),
                       out=out,
                       rtol=rtol,
                       opt=opt)

    @pytest.mark.parametrize("cpu", cpu_params, ids=["cpu", "gpu"])
    @pytest.mark.parametrize(
        "Ao,Adt,Bo,Bdt,vo,vdt,wo,wdt,e_dfmmv",
        [
            pytest.param("F",
                         n32,
                         "F",
                         n32,
                         "F",
                         n32,
                         "F",
                         n32,
                         "e_dfmmv1",
                         marks=mark.usefixtures("e_dfmmv1")),
            pytest.param("C",
                         n32,
                         "C",
                         n32,
                         "C",
                         n32,
                         "C",
                         n32,
                         "e_dfmmv1",
                         marks=mark.usefixtures("e_dfmmv1")),
            pytest.param("F",
                         n64,
                         "F",
                         n64,
                         "F",
                         n64,
                         "F",
                         n64,
                         "e_dfmmv1",
                         marks=mark.usefixtures("e_dfmmv1")),
            pytest.param("C",
                         n64,
                         "C",
                         n64,
                         "C",
                         n64,
                         "C",
                         n64,
                         "e_dfmmv1",
                         marks=mark.usefixtures("e_dfmmv1")),
            pytest.param("F",
                         n32,
                         "F",
                         n32,
                         "F",
                         n32,
                         None,
                         None,
                         "e_dfmmv2",
                         marks=mark.usefixtures("e_dfmmv2")),
            pytest.param("C",
                         n32,
                         "C",
                         n32,
                         "C",
                         n32,
                         None,
                         None,
                         "e_dfmmv2",
                         marks=mark.usefixtures("e_dfmmv2")),
            pytest.param("F",
                         n64,
                         "F",
                         n64,
                         "F",
                         n64,
                         None,
                         None,
                         "e_dfmmv2",
                         marks=mark.usefixtures("e_dfmmv2")),
            pytest.param("C",
                         n64,
                         "C",
                         n64,
                         "C",
                         n64,
                         None,
                         None,
                         "e_dfmmv2",
                         marks=mark.usefixtures("e_dfmmv2")),
            pytest.param("F",
                         n32,
                         "F",
                         n32,
                         None,
                         None,
                         "F",
                         n32,
                         "e_dfmmv3",
                         marks=mark.usefixtures("e_dfmmv3")),
            pytest.param("C",
                         n32,
                         "C",
                         n32,
                         None,
                         None,
                         "C",
                         n32,
                         "e_dfmmv3",
                         marks=mark.usefixtures("e_dfmmv3")),
            pytest.param("F",
                         n64,
                         "F",
                         n64,
                         None,
                         None,
                         "F",
                         n64,
                         "e_dfmmv3",
                         marks=mark.usefixtures("e_dfmmv3")),
            pytest.param("C",
                         n64,
                         "C",
                         n64,
                         None,
                         None,
                         "C",
                         n64,
                         "e_dfmmv3",
                         marks=mark.usefixtures("e_dfmmv3")),
            # A few mixed-contiguity examples
            pytest.param("F",
                         n32,
                         "C",
                         n32,
                         "C",
                         n32,
                         "F",
                         n32,
                         "e_dfmmv1",
                         marks=mark.usefixtures("e_dfmmv1")),
        ],
        ids=[
            "F32-F32-vF32-wF32", "C32-C32-vC32-wC32", "F64-F64-vF64-wF64",
            "C64-C64-vC64-wC64", "F32-F32-vF32", "C32-C32-vC32",
            "F64-F64-vF64", "C64-C64-vC64", "F32-F32-wF32", "C32-C32-wC32",
            "F64-F64-wF64", "C64-C64-wC64", "F32-C32-vC32-wF32"
        ],
        indirect=["e_dfmmv"])
    def test_dfmmv(self, A, B, v, w, Ao, Adt, Bo, Bdt, vo, vdt, wo, wdt,
                   kernel, e_dfmmv, cpu):
        A = fix_mat(A, order=Ao, dtype=Adt)
        B = fix_mat(B, order=Bo, dtype=Bdt)
        v = fix_mat(v, order=vo, dtype=vdt)
        w = fix_mat(w, order=wo, dtype=wdt)

        opt = dataclasses.replace(self.basic_options, use_cpu=cpu)
        rtol = choose_on_dtype(A.dtype)

        # Test normal
        _run_fmmv_test(kernel.dmmv,
                       e_dfmmv, (A, B, v, w),
                       out=None,
                       rtol=rtol,
                       opt=opt)
        # Test with out
        out = torch.empty(m, t, dtype=A.dtype)
        _run_fmmv_test(kernel.dmmv,
                       e_dfmmv, (A, B, v, w),
                       out=out,
                       rtol=rtol,
                       opt=opt)

    @pytest.mark.parametrize(
        "Ao,Adt,Bo,Bdt,vo,vdt,wo,wdt,e_dfmmv", [
            pytest.param("F",
                         n32,
                         "F",
                         n32,
                         "F",
                         n32,
                         "F",
                         n32,
                         "e_dfmmv1",
                         marks=mark.usefixtures("e_dfmmv1")),
            pytest.param("F",
                         n32,
                         "F",
                         n32,
                         "F",
                         n32,
                         None,
                         None,
                         "e_dfmmv2",
                         marks=mark.usefixtures("e_dfmmv2")),
            pytest.param("F",
                         n32,
                         "F",
                         n32,
                         None,
                         None,
                         "F",
                         n32,
                         "e_dfmmv3",
                         marks=mark.usefixtures("e_dfmmv3"))
        ],
        ids=["F32-F32-vF32-wF32", "F32-F32-vF32", "F32-F32-wF32"],
        indirect=["e_dfmmv"])
    @pytest.mark.skipif(not decide_cuda(), reason="No GPU found.")
    def test_dfmmv_input_device(self, A, B, v, w, Ao, Adt, Bo, Bdt, vo, vdt,
                                wo, wdt, kernel, e_dfmmv):
        input_device = "cuda:0"
        A = fix_mat(A, order=Ao, dtype=Adt, device=input_device)
        B = fix_mat(B, order=Bo, dtype=Bdt, device=input_device)
        v = fix_mat(v, order=vo, dtype=vdt, device=input_device)
        w = fix_mat(w, order=wo, dtype=wdt, device=input_device)

        opt = dataclasses.replace(self.basic_options, use_cpu=False)
        rtol = choose_on_dtype(A.dtype)

        # Test normal
        _run_fmmv_test(kernel.dmmv,
                       e_dfmmv, (A, B, v, w),
                       out=None,
                       rtol=rtol,
                       opt=opt)
        # Test with out
        out = torch.empty(m, t, dtype=A.dtype, device=input_device)
        _run_fmmv_test(kernel.dmmv,
                       e_dfmmv, (A, B, v, w),
                       out=out,
                       rtol=rtol,
                       opt=opt)

    @pytest.mark.skipif(not decide_cuda(), reason="No GPU found.")
    def test_incorrect_dev_setting(self, A, B, v, w, kernel, e_dfmmv1,
                                   expected_fmmv):
        # tests when use_cpu = True, but CUDA input tensors
        A = A.cuda()
        B = B.cuda()
        v = v.cuda()
        w = w.cuda()
        opt = dataclasses.replace(self.basic_options, use_cpu=True)
        rtol = choose_on_dtype(A.dtype)

        with pytest.warns(
                UserWarning,
                match=
                'backend was chosen to be CPU, but GPU input tensors found'):
            _run_fmmv_test(kernel.dmmv,
                           e_dfmmv1, (A, B, v, w),
                           out=None,
                           rtol=rtol,
                           opt=opt)

        with pytest.warns(
                UserWarning,
                match=
                'backend was chosen to be CPU, but GPU input tensors found'):
            _run_fmmv_test(kernel.mmv,
                           expected_fmmv, (A, B, v),
                           out=None,
                           rtol=rtol,
                           opt=opt)
Пример #14
0
from falkon.tests.gen_random import gen_random, gen_sparse_matrix
from falkon.utils import decide_cuda

n32 = np.float32
n64 = np.float64
# Global dimensions
n = 1000
m = 850
d = 10
t = 5
max_mem_dense = 0.5 * 2**20
max_mem_sparse = 0.5 * 2**20
cpu_params = [
    pytest.param(True),
    pytest.param(
        False, marks=[mark.skipif(not decide_cuda(), reason="No GPU found.")])
]


def choose_on_dtype(dtype):
    if dtype == np.float64 or dtype == torch.float64:
        return 1e-12
    else:
        return 1e-4


def numpy_to_torch_type(dt):
    if dt == np.float32:
        return torch.float32
    elif dt == np.float64:
        return torch.float64
Пример #15
0
class TestWeightedFalkon:
    @pytest.mark.parametrize("cuda_usage", [
        pytest.param("incore",
                     marks=[
                         pytest.mark.skipif(not decide_cuda(),
                                            reason="No GPU found.")
                     ]),
        pytest.param("mixed",
                     marks=[
                         pytest.mark.skipif(not decide_cuda(),
                                            reason="No GPU found.")
                     ]),
        "cpu_only",
    ])
    def test_classif(self, cls_data, cuda_usage):
        X, Y = cls_data
        if cuda_usage == "incore":
            X, Y = X.cuda(), Y.cuda()
            flk_cls = InCoreFalkon
        else:
            flk_cls = Falkon
        kernel = kernels.GaussianKernel(2.0)

        def error_fn(t, p):
            return 100 * torch.sum(t * p <= 0).to(
                torch.float32) / t.shape[0], "c-err"

        def weight_fn(y):
            weight = torch.empty_like(y)
            weight[y == 1] = 1
            weight[y == -1] = 2
            return weight

        opt = FalkonOptions(use_cpu=cuda_usage == "cpu_only",
                            keops_active="no",
                            debug=False)

        flk_weight = flk_cls(kernel=kernel,
                             penalty=1e-6,
                             M=500,
                             seed=10,
                             options=opt,
                             error_fn=error_fn,
                             weight_fn=weight_fn)
        flk_weight.fit(X, Y)
        preds_weight = flk_weight.predict(X)
        preds_weight_m1 = preds_weight[Y == -1]
        preds_weight_p1 = preds_weight[Y == 1]
        err_weight_m1 = error_fn(preds_weight_m1, Y[Y == -1])[0]
        err_weight_p1 = error_fn(preds_weight_p1, Y[Y == 1])[0]

        flk = flk_cls(kernel=kernel,
                      penalty=1e-6,
                      M=500,
                      seed=10,
                      options=opt,
                      error_fn=error_fn,
                      weight_fn=None)
        flk.fit(X, Y)
        preds = flk.predict(X)
        preds_m1 = preds[Y == -1]
        preds_p1 = preds[Y == 1]
        err_m1 = error_fn(preds_m1, Y[Y == -1])[0]
        err_p1 = error_fn(preds_p1, Y[Y == 1])[0]

        print(
            "Weighted errors: -1 (%f) +1 (%f) -- Normal errors: -1 (%f) +1 (%f)"
            % (err_weight_m1, err_weight_p1, err_m1, err_p1))

        assert err_weight_m1 < err_m1, "Error of weighted class is higher than without weighting"
        assert err_weight_p1 >= err_p1, "Error of unweighted class is lower than in flk with no weights"
Пример #16
0
import dataclasses

import numpy as np
import pytest
import scipy.linalg.lapack as scll
import torch

from falkon.options import FalkonOptions
from falkon.tests.conftest import memory_checker, fix_mat
from falkon.utils import decide_cuda
from falkon.utils.helpers import sizeof_dtype
from falkon.utils.tensor_helpers import move_tensor
from falkon.ooc_ops.ooc_utils import calc_block_sizes3

if decide_cuda():
    from falkon.ooc_ops.ooc_lauum import gpu_lauum
    # noinspection PyUnresolvedReferences
    from falkon.ooc_ops.cuda import cuda_lauum


class TestBlockSizeCalculator:
    def test_small_edge(self):
        assert calc_block_sizes3(max_block_size=1, num_devices=4,
                                 num_rows=3) == [1, 1, 1]
        assert calc_block_sizes3(max_block_size=1, num_devices=5,
                                 num_rows=1) == [1]

    def test_small(self):
        assert calc_block_sizes3(max_block_size=10000,
                                 num_devices=2,
                                 num_rows=100) == [100]
Пример #17
0
 def initialize_cuda():
     # your setup code goes here, executed ahead of first test
     opt = BaseOptions(compute_arch_speed=False, use_cpu=False)
     if decide_cuda():
         initialization.init(opt)
Пример #18
0
import dataclasses
from contextlib import contextmanager

import numpy as np
import pytest
import torch
import torch.cuda as tcd
from falkon.utils.tensor_helpers import move_tensor

from falkon.options import BaseOptions, FalkonOptions
from falkon.utils import decide_cuda
from falkon.utils.devices import _cpu_used_mem

if decide_cuda():
    from falkon.cuda import initialization

    @pytest.fixture(scope="session", autouse=True)
    def initialize_cuda():
        # your setup code goes here, executed ahead of first test
        opt = BaseOptions(compute_arch_speed=False, use_cpu=False)
        if decide_cuda():
            initialization.init(opt)


@contextmanager
def memory_checker(opt: FalkonOptions, extra_mem=0):
    is_cpu = opt.use_cpu
    mem_check = False
    if (is_cpu and opt.max_cpu_mem < np.inf) or (not is_cpu
                                                 and opt.max_gpu_mem < np.inf):
        mem_check = True
Пример #19
0
class TestFalkonPreconditioner:
    rtol = {np.float64: 1e-10, np.float32: 1e-2}
    basic_opt = FalkonOptions(compute_arch_speed=False, no_single_kernel=True)

    @pytest.mark.parametrize("order", ["C", "F"])
    @pytest.mark.parametrize("dtype", [np.float32, np.float64])
    @pytest.mark.parametrize("cpu", [
        pytest.param(True),
        pytest.param(False,
                     marks=[
                         pytest.mark.skipif(not decide_cuda(),
                                            reason="No GPU found.")
                     ])
    ],
                             ids=["cpu", "gpu"])
    def test_simple(self, mat, kernel, gram, cpu, dtype, order):
        opt = dataclasses.replace(self.basic_opt,
                                  use_cpu=cpu,
                                  cpu_preconditioner=cpu)
        rtol = self.rtol[dtype]

        mat = fix_mat(mat, dtype=dtype, order=order, copy=True)
        gram = fix_mat(gram, dtype=dtype, order=order, copy=True)

        la = 100
        prec = FalkonPreconditioner(la, kernel, opt)
        prec.init(mat)
        assert_invariant_on_TT(prec, gram, tol=rtol)
        assert_invariant_on_AT(prec, gram, la, tol=rtol)
        assert_invariant_on_T(prec, gram, tol=rtol * 10)
        assert_invariant_on_prec(prec, N, gram, la, tol=rtol * 10)

    @pytest.mark.parametrize("cpu", [
        pytest.param(True),
        pytest.param(False,
                     marks=[
                         pytest.mark.skipif(not decide_cuda(),
                                            reason="No GPU found.")
                     ])
    ],
                             ids=["cpu", "gpu"])
    def test_zero_lambda(self, mat, kernel, gram, cpu):
        opt = dataclasses.replace(self.basic_opt,
                                  use_cpu=cpu,
                                  cpu_preconditioner=cpu)
        mat = fix_mat(mat, dtype=np.float64, order="K", copy=True)
        gram = fix_mat(gram, dtype=np.float64, order="K", copy=True)

        la = 0
        prec = FalkonPreconditioner(la, kernel, opt)
        prec.init(mat)
        assert_invariant_on_TT(prec, gram, tol=1e-10)
        assert_invariant_on_AT(prec, gram, la, tol=1e-10)
        assert_invariant_on_T(prec, gram, tol=1e-9)
        assert_invariant_on_prec(prec, N, gram, la, tol=1e-8)

    @pytest.mark.parametrize("order", ["C", "F"])
    @pytest.mark.parametrize("dtype", [np.float32, np.float64])
    @pytest.mark.skipif(not decide_cuda(), reason="No GPU found.")
    def test_cuda_start(self, mat, kernel, gram, dtype, order):
        opt = dataclasses.replace(self.basic_opt,
                                  use_cpu=False,
                                  cpu_preconditioner=False)
        rtol = self.rtol[dtype]

        mat = fix_mat(mat, dtype=dtype, order=order, copy=True)
        gpu_mat = move_tensor(mat, "cuda:0")
        gram = fix_mat(gram, dtype=dtype, order=order, copy=True)
        gpu_gram = move_tensor(gram, "cuda:0")

        la = 1

        prec = FalkonPreconditioner(la, kernel, opt)
        prec.init(mat)

        gpu_prec = FalkonPreconditioner(la, kernel, opt)
        gpu_prec.init(gpu_mat)

        np.testing.assert_allclose(prec.dT.numpy(),
                                   gpu_prec.dT.cpu().numpy(),
                                   rtol=rtol)
        np.testing.assert_allclose(prec.dA.numpy(),
                                   gpu_prec.dA.cpu().numpy(),
                                   rtol=rtol)
        np.testing.assert_allclose(prec.fC.numpy(),
                                   gpu_prec.fC.cpu().numpy(),
                                   rtol=rtol * 10)
        assert gpu_prec.fC.device == gpu_mat.device, "Device changed unexpectedly"

        assert_invariant_on_TT(gpu_prec, gpu_gram, tol=rtol)
        assert_invariant_on_AT(prec, gram, la, tol=rtol)
        assert_invariant_on_T(prec, gram, tol=rtol * 10)
        assert_invariant_on_prec(prec, N, gram, la, tol=rtol * 10)
Пример #20
0
@pytest.fixture
def colmaj_arr() -> torch.Tensor:
    return torch.from_numpy(gen_random(M, D, 'float64', True))


@pytest.fixture
def uniform_sel() -> UniformSelector:
    return UniformSelector(np.random.default_rng(0))


@pytest.mark.parametrize("device", [
    pytest.param("cpu"),
    pytest.param(
        "cuda:0",
        marks=[pytest.mark.skipif(not decide_cuda(), reason="No GPU found.")])
])
def test_c_order(uniform_sel, rowmaj_arr, device):
    rowmaj_arr = rowmaj_arr.to(device=device)
    centers = uniform_sel.select(rowmaj_arr, None, 100)
    assert centers.stride() == (D, 1), "UniformSel changed input stride"
    assert centers.size() == (100, D), "UniformSel did not output correct size"
    assert centers.dtype == rowmaj_arr.dtype
    assert centers.device == rowmaj_arr.device


def test_cuda(uniform_sel, rowmaj_arr):
    centers = uniform_sel.select(rowmaj_arr, None, 100)
    assert centers.stride() == (D, 1), "UniformSel changed input stride"
    assert centers.size() == (100, D), "UniformSel did not output correct size"
    assert centers.dtype == rowmaj_arr.dtype
Пример #21
0
                                   rtol=rtol)
        np.testing.assert_allclose(prec.dA.numpy(),
                                   gpu_prec.dA.cpu().numpy(),
                                   rtol=rtol)
        np.testing.assert_allclose(prec.fC.numpy(),
                                   gpu_prec.fC.cpu().numpy(),
                                   rtol=rtol * 10)
        assert gpu_prec.fC.device == gpu_mat.device, "Device changed unexpectedly"

        assert_invariant_on_TT(gpu_prec, gpu_gram, tol=rtol)
        assert_invariant_on_AT(prec, gram, la, tol=rtol)
        assert_invariant_on_T(prec, gram, tol=rtol * 10)
        assert_invariant_on_prec(prec, N, gram, la, tol=rtol * 10)


@unittest.skipIf(not decide_cuda(), "No GPU found.")
def test_cpu_gpu_equality(mat, kernel, gram):
    la = 12.3

    mat = fix_mat(mat, dtype=np.float64, order="F", copy=True)

    opt = FalkonOptions(compute_arch_speed=False,
                        use_cpu=False,
                        cpu_preconditioner=False)
    prec_gpu = FalkonPreconditioner(la, kernel, opt)
    prec_gpu.init(mat)

    opt = dataclasses.replace(opt, use_cpu=True, cpu_preconditioner=True)
    prec_cpu = FalkonPreconditioner(la, kernel, opt)
    prec_cpu.init(mat)
Пример #22
0
def mat():
    return gen_random(M, M, 'float64', F=True, seed=10)


@pytest.fixture(scope="module")
def arr():
    return gen_random(M, T, 'float64', F=True, seed=12)


@pytest.mark.parametrize("order", ["C", "F"])
@pytest.mark.parametrize("dtype", [np.float32, np.float64])
@pytest.mark.parametrize("lower", [True, False], ids=["lower", "upper"])
@pytest.mark.parametrize("transpose", [True, False], ids=["transpose", "no_transpose"])
@pytest.mark.parametrize("device", [
    pytest.param("cpu"),
    pytest.param("cuda:0", marks=[pytest.mark.skipif(not decide_cuda(), reason="No GPU found.")])])
def test_trsm_wrapper(mat, arr, dtype, order, device, lower, transpose):
    rtol = 1e-2 if dtype == np.float32 else 1e-11

    n_mat = move_tensor(fix_mat(mat, dtype=dtype, order=order, copy=True), device=device)
    n_arr = move_tensor(fix_mat(arr, dtype=dtype, order=order, copy=True), device=device)

    expected = sclb.dtrsm(1e-2, mat, arr, side=0, lower=lower, trans_a=transpose, overwrite_b=0)

    if device.startswith("cuda") and order == "C":
        with pytest.raises(ValueError):
            actual = trsm(n_arr, n_mat, alpha=1e-2, lower=lower, transpose=transpose)
    else:
        actual = trsm(n_arr, n_mat, alpha=1e-2, lower=lower, transpose=transpose)
        np.testing.assert_allclose(expected, actual.cpu().numpy(), rtol=rtol)
Пример #23
0
@pytest.fixture(scope="module")
def w() -> torch.Tensor:
    return torch.from_numpy(gen_random(n, t, 'float64', False, seed=92))


@pytest.fixture(scope="module")
def rtol() -> dict:
    return {
        torch.float32: 1e-5,
        torch.float64: 1e-12,
    }


@pytest.mark.parametrize("cpu", [
    pytest.param(True),
    pytest.param(False, marks=[pytest.mark.skipif(not decide_cuda(), reason="No GPU found.")])
], ids=["cpu", "gpu"])
class AbstractKernelTester(abc.ABC):
    max_mem = 2 * 2**20
    basic_options = FalkonOptions(debug=True, compute_arch_speed=False,
                                  max_cpu_mem=max_mem, max_gpu_mem=max_mem)

    @pytest.fixture(scope="class")
    def exp_v(self, exp_k: np.ndarray, v: torch.Tensor) -> np.ndarray:
        return exp_k @ v.numpy()

    @pytest.fixture(scope="class")
    def exp_dv(self, exp_k: np.ndarray, v: torch.Tensor) -> np.ndarray:
        return exp_k.T @ (exp_k @ v.numpy())

    @pytest.fixture(scope="class")
Пример #24
0
class TestMatMul():
    @pytest.fixture(scope="class")
    def mat1(self):
        return torch.randn(200, 10)

    @pytest.fixture(scope="class")
    def mat2(self):
        return torch.randn(10, 100)

    @pytest.fixture(scope="class")
    def expected(self, mat1, mat2):
        return mat1 @ mat2

    @pytest.mark.parametrize("device", [
        "cpu",
        pytest.param("cuda:0",
                     marks=pytest.mark.skipif(not decide_cuda(),
                                              reason="No GPU found."))
    ])
    def test_matmul_zeros(self, mat1, mat2, expected, device):
        mat1_zero_csr = SparseTensor.from_scipy(
            scipy.sparse.csr_matrix(
                torch.zeros_like(mat1).numpy())).to(device=device)
        mat2_csc = SparseTensor.from_scipy(
            scipy.sparse.csc_matrix(mat2.numpy())).to(device=device)
        out = torch.empty_like(expected).to(device)
        sparse_matmul(mat1_zero_csr, mat2_csc, out)
        assert torch.all(out == 0.0)

        mat1_csr = SparseTensor.from_scipy(
            scipy.sparse.csr_matrix(mat1.numpy())).to(device=device)
        mat2_zero_csc = SparseTensor.from_scipy(
            scipy.sparse.csc_matrix(
                torch.zeros_like(mat2).numpy())).to(device=device)
        out = torch.empty_like(expected).to(device=device)
        sparse_matmul(mat1_csr, mat2_zero_csc, out)
        assert torch.all(out == 0.0)

    def test_cpu_matmul_wrong_format(self, mat1, mat2, expected):
        out = torch.empty_like(expected)
        mat1_csr = SparseTensor.from_scipy(scipy.sparse.csr_matrix(mat1))
        mat2_csr = SparseTensor.from_scipy(scipy.sparse.csr_matrix(mat2))
        with pytest.raises(ValueError) as exc_info:
            sparse_matmul(mat1_csr, mat2_csr, out)
        assert str(exc_info.value).startswith("B must be CSC matrix")
        mat1_csc = SparseTensor.from_scipy(scipy.sparse.csc_matrix(mat1))
        with pytest.raises(ValueError) as exc_info:
            sparse_matmul(mat1_csc, mat2_csr, out)
        assert str(exc_info.value).startswith("A must be CSR matrix")

    def test_cpu_matmul(self, mat1, mat2, expected):
        out = torch.empty_like(expected)
        mat1_csr = SparseTensor.from_scipy(scipy.sparse.csr_matrix(mat1))
        mat2_csc = SparseTensor.from_scipy(scipy.sparse.csc_matrix(mat2))
        sparse_matmul(mat1_csr, mat2_csc, out)

        torch.testing.assert_allclose(out, expected)

    @pytest.mark.skipif(not decide_cuda(), reason="No GPU found.")
    def test_cuda_matmul_wrong_format(self, mat1, mat2, expected):
        dev = torch.device("cuda:0")
        out = torch.empty_like(expected).to(device=dev)
        mat1_csr = SparseTensor.from_scipy(
            scipy.sparse.csr_matrix(mat1)).to(device=dev)
        mat2_csc = SparseTensor.from_scipy(
            scipy.sparse.csc_matrix(mat2)).to(device=dev)
        with pytest.raises(ValueError) as exc_info:
            sparse_matmul(mat1_csr, mat2_csc, out)
        assert str(exc_info.value).startswith("B must be CSR matrix")
        mat1_csc = SparseTensor.from_scipy(scipy.sparse.csc_matrix(mat1))
        with pytest.raises(ValueError) as exc_info:
            sparse_matmul(mat1_csc, mat2_csc, out)
        assert str(exc_info.value).startswith("A must be CSR matrix")

    @pytest.mark.skipif(not decide_cuda(), reason="No GPU found.")
    def test_cuda_matmul(self, mat1, mat2, expected):
        dev = torch.device("cuda:0")
        out = create_fortran(expected.shape, expected.dtype, dev)
        mat1_csr = SparseTensor.from_scipy(
            scipy.sparse.csr_matrix(mat1)).to(device=dev)
        mat2_csr = SparseTensor.from_scipy(
            scipy.sparse.csr_matrix(mat2)).to(device=dev)
        sparse_matmul(mat1_csr, mat2_csr, out)

        torch.testing.assert_allclose(out.cpu(), expected)
Пример #25
0
            torch.from_numpy(exp).to(dtype=act.dtype))

        out = torch.empty(csr_mat.shape[0],
                          dtype=csr_mat.dtype,
                          device=csr_mat.device)
        act = function(csr_mat, out=out)
        assert out.data_ptr() == act.data_ptr()
        torch.testing.assert_allclose(
            act,
            torch.from_numpy(exp).to(dtype=act.dtype).reshape(-1))


@pytest.mark.parametrize("device", [
    "cpu",
    pytest.param("cuda:0",
                 marks=pytest.mark.skipif(not decide_cuda(),
                                          reason="No GPU found."))
])
class TestMyTranspose():
    def test_simple_transpose(self, device, csr_mat):
        arr = csr_mat.to(device=device)
        tr_arr = arr.transpose_csc()
        assert tr_arr.shape == (
            2, 3), "expected transpose shape to be %s, but found %s" % (
                (2, 3), tr_arr.shape)
        tr_mat = tr_arr.to_scipy().tocoo()
        assert tr_mat.row.tolist() == [
            1, 0, 1, 0
        ], "expected rows %s, but found %s" % ([1, 0, 1, 0
                                                ], tr_mat.row.tolist())
        assert tr_mat.col.tolist() == [
Пример #26
0
class TestFalkon:
    def test_classif(self, cls_data):
        X, Y = cls_data
        kernel = kernels.GaussianKernel(2.0)
        torch.manual_seed(13)
        np.random.seed(13)

        def error_fn(t, p):
            return 100 * torch.sum(t * p <= 0).to(
                torch.float32) / t.shape[0], "c-err"

        opt = FalkonOptions(use_cpu=True, keops_active="no", debug=True)

        flk = Falkon(kernel=kernel,
                     penalty=1e-6,
                     M=500,
                     seed=10,
                     options=opt,
                     error_fn=error_fn)
        flk.fit(X, Y)
        preds = flk.predict(X)
        err = error_fn(preds, Y)[0]
        assert err < 5

    def test_multiclass(self, multicls_data):
        X, Y = multicls_data
        kernel = kernels.GaussianKernel(10.0)

        def error_fn(t, p):
            t = torch.argmax(t, dim=1)
            p = torch.argmax(p, dim=1)
            return torch.mean((t.reshape(-1, ) != p.reshape(-1, )).to(
                torch.float64)), "multic-err"

        opt = FalkonOptions(use_cpu=True, keops_active="no", debug=True)

        flk = Falkon(kernel=kernel,
                     penalty=1e-6,
                     M=500,
                     seed=10,
                     options=opt,
                     error_fn=error_fn)
        flk.fit(X, Y)
        preds = flk.predict(X)
        err = error_fn(preds, Y)[0]
        assert err < 0.23

    def test_regression(self, reg_data):
        Xtr, Ytr, Xts, Yts = reg_data
        kernel = kernels.GaussianKernel(20.0)

        def error_fn(t, p):
            return torch.sqrt(torch.mean((t - p)**2)), "RMSE"

        opt = FalkonOptions(use_cpu=True, keops_active="no", debug=True)

        flk = Falkon(kernel=kernel,
                     penalty=1e-6,
                     M=500,
                     seed=10,
                     options=opt,
                     error_fn=error_fn)
        flk.fit(Xtr, Ytr, Xts=Xts, Yts=Yts)

        assert flk.predict(Xts).shape == (Yts.shape[0], 1)
        ts_err = error_fn(flk.predict(Xts), Yts)[0]
        tr_err = error_fn(flk.predict(Xtr), Ytr)[0]
        assert tr_err < ts_err
        assert ts_err < 2.5

    @pytest.mark.skipif(not decide_cuda(), reason="No GPU found.")
    def test_cuda_predict(self, reg_data):
        Xtr, Ytr, Xts, Yts = reg_data
        kernel = kernels.GaussianKernel(20.0)

        def error_fn(t, p):
            return torch.sqrt(torch.mean((t - p)**2)), "RMSE"

        opt = FalkonOptions(use_cpu=False,
                            keops_active="no",
                            debug=True,
                            min_cuda_pc_size_64=1,
                            min_cuda_iter_size_64=1)

        flk = Falkon(kernel=kernel,
                     penalty=1e-6,
                     M=500,
                     seed=10,
                     options=opt,
                     error_fn=error_fn)
        flk.fit(Xtr, Ytr, Xts=Xts, Yts=Yts)
        flk.to("cuda:0")

        cuda_ts_preds = flk.predict(Xts.to("cuda:0"))
        cuda_tr_preds = flk.predict(Xtr.to("cuda:0"))
        assert cuda_ts_preds.device.type == "cuda"
        assert cuda_ts_preds.shape == (Yts.shape[0], 1)
        ts_err = error_fn(cuda_ts_preds.cpu(), Yts)[0]
        tr_err = error_fn(cuda_tr_preds.cpu(), Ytr)[0]
        assert tr_err < ts_err
        assert ts_err < 2.5
Пример #27
0
class TestVecMulTriang:
    MAT_SIZE = 120

    @pytest.fixture(scope="class")
    def mat(self):
        return torch.from_numpy(
            gen_random(TestVecMulTriang.MAT_SIZE,
                       TestVecMulTriang.MAT_SIZE,
                       'float64',
                       False,
                       seed=91))

    @pytest.fixture(scope="class")
    def vec(self):
        return torch.from_numpy(
            gen_random(TestVecMulTriang.MAT_SIZE, 1, 'float64', False,
                       seed=91))

    @staticmethod
    def exp_vec_mul_triang(mat, vec, upper, side):
        if side == 0:
            vec = vec.reshape(-1, 1)
        else:
            vec = vec.reshape(1, -1)
        if upper:
            tri_mat = torch.triu(mat, diagonal=0)
            tri_idx = torch.triu_indices(mat.shape[0], mat.shape[1], offset=0)
        else:
            tri_mat = torch.tril(mat, diagonal=0)
            tri_idx = torch.tril_indices(mat.shape[0], mat.shape[1], offset=0)
        tri_mat *= vec
        exp = mat.clone()
        exp[tri_idx[0], tri_idx[1]] = tri_mat[tri_idx[0], tri_idx[1]]
        return exp

    @pytest.mark.parametrize("order", ["F", "C"])
    @pytest.mark.parametrize("upper", [True, False], ids=["upper", "lower"])
    @pytest.mark.parametrize("side", [0, 1], ids=["side0", "side1"])
    @pytest.mark.parametrize("device", [
        "cpu",
        pytest.param("cuda:0",
                     marks=[
                         pytest.mark.skipif(not decide_cuda(),
                                            reason="No GPU found.")
                     ])
    ])
    def test_all_combos(self, mat, vec, order, device, upper, side):
        exp_output = self.exp_vec_mul_triang(mat, vec, upper, side)

        vec = fix_mat(vec,
                      order=order,
                      dtype=np.float64,
                      numpy=False,
                      device=device)
        mat2 = fix_mat(mat,
                       order=order,
                       dtype=np.float64,
                       numpy=False,
                       device=device,
                       copy=True)
        out = vec_mul_triang(mat2, upper=upper, side=side,
                             multipliers=vec).cpu().numpy()
        np.testing.assert_allclose(exp_output.numpy(), out)
        assert out.flags["%s_CONTIGUOUS" %
                         order] is True, "Output is not %s-contiguous" % (
                             order)

        # Test with different vec orderings
        vec = vec.reshape(1, -1)
        mat2 = fix_mat(mat,
                       order=order,
                       dtype=np.float64,
                       numpy=False,
                       device=device,
                       copy=True)
        out = vec_mul_triang(mat2, upper=upper, side=side,
                             multipliers=vec).cpu().numpy()
        np.testing.assert_allclose(exp_output.numpy(),
                                   out,
                                   err_msg="Vec row ordering failed")
        vec = vec.reshape(-1)
        mat2 = fix_mat(mat,
                       order=order,
                       dtype=np.float64,
                       numpy=False,
                       device=device,
                       copy=True)
        out = vec_mul_triang(mat2, upper=upper, side=side,
                             multipliers=vec).cpu().numpy()
        np.testing.assert_allclose(exp_output.numpy(),
                                   out,
                                   err_msg="Vec 1D ordering failed")

    @pytest.mark.benchmark
    @pytest.mark.skipif(not decide_cuda(), reason="No GPU found.")
    def test_large(self):
        t = 20_000
        num_rep = 5
        mat = torch.from_numpy(gen_random(t, t, np.float32, F=False, seed=123))
        vec = torch.from_numpy(
            gen_random(t, 1, np.float32, F=False, seed=124).reshape((-1, )))

        mat_cuda = mat.cuda()
        vec_cuda = vec.cuda()

        cpu_times = []
        for i in range(num_rep):
            t_s = time.time()
            out_cpu = vec_mul_triang(mat, vec, True, 1)
            cpu_times.append(time.time() - t_s)

        gpu_times = []
        for i in range(num_rep):
            t_s = time.time()
            out_cuda = vec_mul_triang(mat_cuda, vec_cuda, True, 1)
            torch.cuda.synchronize()
            gpu_times.append(time.time() - t_s)

        print("mat size %d - t_cpu: %.4fs -- t_cuda: %.4fs" %
              (t, np.min(cpu_times), np.min(gpu_times)))
        np.testing.assert_allclose(out_cpu, out_cuda.cpu().numpy())