示例#1
0
    def test_cuda_start(self, mat, kernel, gram, dtype, order):
        opt = dataclasses.replace(self.basic_opt,
                                  use_cpu=False,
                                  cpu_preconditioner=False)
        rtol = self.rtol[dtype]

        mat = fix_mat(mat, dtype=dtype, order=order, copy=True)
        gpu_mat = move_tensor(mat, "cuda:0")
        gram = fix_mat(gram, dtype=dtype, order=order, copy=True)
        gpu_gram = move_tensor(gram, "cuda:0")

        la = 1

        prec = FalkonPreconditioner(la, kernel, opt)
        prec.init(mat)

        gpu_prec = FalkonPreconditioner(la, kernel, opt)
        gpu_prec.init(gpu_mat)

        np.testing.assert_allclose(prec.dT.numpy(),
                                   gpu_prec.dT.cpu().numpy(),
                                   rtol=rtol)
        np.testing.assert_allclose(prec.dA.numpy(),
                                   gpu_prec.dA.cpu().numpy(),
                                   rtol=rtol)
        np.testing.assert_allclose(prec.fC.numpy(),
                                   gpu_prec.fC.cpu().numpy(),
                                   rtol=rtol * 10)
        assert gpu_prec.fC.device == gpu_mat.device, "Device changed unexpectedly"

        assert_invariant_on_TT(gpu_prec, gpu_gram, tol=rtol)
        assert_invariant_on_AT(prec, gram, la, tol=rtol)
        assert_invariant_on_T(prec, gram, tol=rtol * 10)
        assert_invariant_on_prec(prec, N, gram, la, tol=rtol * 10)
示例#2
0
    def test_fmmv_input_device(self, A, B, v, Ao, Adt, Bo, Bdt, vo, vdt,
                               kernel, expected_fmmv):
        input_device = "cuda:0"
        A = fix_mat(A, order=Ao, dtype=Adt, device=input_device)
        B = fix_mat(B, order=Bo, dtype=Bdt, device=input_device)
        v = fix_mat(v, order=vo, dtype=vdt, device=input_device)

        opt = dataclasses.replace(self.basic_options, use_cpu=False)
        rtol = choose_on_dtype(A.dtype)

        # Test normal
        _run_fmmv_test(kernel.mmv,
                       expected_fmmv, (A, B, v),
                       out=None,
                       rtol=rtol,
                       opt=opt)
        # Test with out
        out = torch.empty(A.shape[0],
                          v.shape[1],
                          dtype=A.dtype,
                          device=input_device)
        _run_fmmv_test(kernel.mmv,
                       expected_fmmv, (A, B, v),
                       out=out,
                       rtol=rtol,
                       opt=opt)
示例#3
0
 def test_gpu_inputs_fail(self, A, B, v, kernel, expected_fmmv):
     A = fix_mat(A, order="C", dtype=n32, device="cuda:0")
     B = fix_mat(B, order="C", dtype=n32, device="cuda:0")
     v = fix_mat(v, order="C", dtype=n32, device="cpu")
     opt = dataclasses.replace(self.basic_options, use_cpu=False, max_gpu_mem=np.inf)
     rtol = choose_on_dtype(A.dtype)
     # Test normal
     with pytest.raises(RuntimeError):
         _run_fmmv_test(kernel.mmv, expected_fmmv, (A, B, v), out=None, rtol=rtol, opt=opt)
示例#4
0
 def test_gpu_inputs(self, A, B, v, kernel, expected_fmmv):
     A = fix_mat(A, order="C", dtype=n32).cuda()
     B = fix_mat(B, order="C", dtype=n32, device=A.device)
     v = fix_mat(v, order="C", dtype=n32, device=A.device)
     opt = dataclasses.replace(self.basic_options, use_cpu=False, max_gpu_mem=np.inf)
     rtol = choose_on_dtype(A.dtype)
     # Test normal
     _run_fmmv_test(kernel.mmv, expected_fmmv, (A, B, v), out=None, rtol=rtol, opt=opt)
     # Test with out
     out = torch.empty(A.shape[0], v.shape[1], dtype=A.dtype, device=A.device)
     _run_fmmv_test(kernel.mmv, expected_fmmv, (A, B, v), out=out, rtol=rtol, opt=opt)
示例#5
0
    def test_dfmmv(self, s_A, s_B, v, w, Adt, Bdt, vo, vdt, wo, wdt, kernel, s_e_dfmmv, cpu):
        A = fix_sparse_mat(s_A[0], dtype=Adt)
        B = fix_sparse_mat(s_B[0], dtype=Bdt)
        v = fix_mat(v, order=vo, dtype=vdt)
        w = fix_mat(w, order=wo, dtype=wdt)

        opt = dataclasses.replace(self.basic_options, use_cpu=cpu)
        rtol = choose_on_dtype(A.dtype)

        # Test normal
        _run_fmmv_test(kernel.dmmv, s_e_dfmmv, (A, B, v, w), out=None, rtol=rtol, opt=opt)
        # Test with out
        out = torch.empty(m, t, dtype=A.dtype)
        _run_fmmv_test(kernel.dmmv, s_e_dfmmv, (A, B, v, w), out=out, rtol=rtol, opt=opt)
示例#6
0
def test_trsm_wrapper(mat, arr, dtype, order, device, lower, transpose):
    rtol = 1e-2 if dtype == np.float32 else 1e-11

    n_mat = move_tensor(fix_mat(mat, dtype=dtype, order=order, copy=True), device=device)
    n_arr = move_tensor(fix_mat(arr, dtype=dtype, order=order, copy=True), device=device)

    expected = sclb.dtrsm(1e-2, mat, arr, side=0, lower=lower, trans_a=transpose, overwrite_b=0)

    if device.startswith("cuda") and order == "C":
        with pytest.raises(ValueError):
            actual = trsm(n_arr, n_mat, alpha=1e-2, lower=lower, transpose=transpose)
    else:
        actual = trsm(n_arr, n_mat, alpha=1e-2, lower=lower, transpose=transpose)
        np.testing.assert_allclose(expected, actual.cpu().numpy(), rtol=rtol)
示例#7
0
    def test_zero_lambda(self, mat, kernel, gram, cpu):
        opt = dataclasses.replace(self.basic_opt,
                                  use_cpu=cpu,
                                  cpu_preconditioner=cpu)
        mat = fix_mat(mat, dtype=np.float64, order="K", copy=True)
        gram = fix_mat(gram, dtype=np.float64, order="K", copy=True)

        la = 0
        prec = FalkonPreconditioner(la, kernel, opt)
        prec.init(mat)
        assert_invariant_on_TT(prec, gram, tol=1e-10)
        assert_invariant_on_AT(prec, gram, la, tol=1e-10)
        assert_invariant_on_T(prec, gram, tol=1e-9)
        assert_invariant_on_prec(prec, N, gram, la, tol=1e-8)
示例#8
0
    def test_mul(self, mat, upper, preserve_diag, order, device):
        inpt1 = fix_mat(mat,
                        dtype=mat.dtype,
                        order=order,
                        copy=True,
                        numpy=True)

        k = 1 if preserve_diag else 0
        if upper:
            tri_fn = partial(np.triu, k=k)
            other_tri_fn = partial(np.tril, k=k - 1)
        else:
            tri_fn = partial(np.tril, k=-k)
            other_tri_fn = partial(np.triu, k=-k + 1)

        inpt1 = torch.from_numpy(inpt1)
        inpt1_dev = create_same_stride(inpt1.shape, inpt1, inpt1.dtype, device)
        inpt1_dev.copy_(inpt1)
        mul_triang(inpt1_dev,
                   upper=upper,
                   preserve_diag=preserve_diag,
                   multiplier=10**6)
        inpt1 = inpt1_dev.cpu().numpy()

        assert np.mean(tri_fn(inpt1)) > 10**5
        assert np.mean(other_tri_fn(inpt1)) < 1
示例#9
0
    def test_zero(self, mat, upper, preserve_diag, order, device):
        inpt1 = fix_mat(mat,
                        dtype=mat.dtype,
                        order=order,
                        copy=True,
                        numpy=True)
        inpt2 = inpt1.copy(order="K")

        k = 1 if preserve_diag else 0
        if upper:
            tri_fn = partial(np.triu, k=k)
        else:
            tri_fn = partial(np.tril, k=-k)

        inpt1 = torch.from_numpy(inpt1)
        inpt1_dev = create_same_stride(inpt1.shape, inpt1, inpt1.dtype, device)
        inpt1_dev.copy_(inpt1)
        mul_triang(inpt1_dev,
                   upper=upper,
                   preserve_diag=preserve_diag,
                   multiplier=0)
        inpt1 = inpt1_dev.cpu().numpy()

        assert np.sum(tri_fn(inpt1)) == 0

        if preserve_diag:
            inpt2_dev = inpt1_dev
            inpt2_dev.copy_(torch.from_numpy(inpt2))
            zero_triang(inpt2_dev, upper=upper)
            inpt2 = inpt2_dev.cpu().numpy()
            np.testing.assert_allclose(inpt1, inpt2)
示例#10
0
    def test_up(self, mat, order, dtype, device):
        mat = fix_mat(mat, order=order, dtype=dtype, numpy=True)
        mat_up = mat.copy(order="K")
        # Lower triangle of mat_up is 0
        mat_up[np.tril_indices(self.t, -1)] = 0
        # Create device matrix
        mat_up = torch.from_numpy(mat_up)
        mat_up_dev = move_tensor(mat_up, device)

        copy_triang(mat_up_dev, upper=True)
        mat_up = mat_up_dev.cpu().numpy()

        assert np.sum(mat_up == 0) == 0
        np.testing.assert_array_equal(np.triu(mat), np.triu(mat_up))
        np.testing.assert_array_equal(np.tril(mat_up), np.triu(mat_up).T)
        np.testing.assert_array_equal(np.diag(mat), np.diag(mat_up))

        # Reset and try with `upper=False`
        mat_up[np.tril_indices(self.t, -1)] = 0
        mat_up_dev.copy_(torch.from_numpy(mat_up))

        copy_triang(mat_up_dev, upper=False)  # Only the diagonal will be set.

        mat_up = mat_up_dev.cpu().numpy()
        np.testing.assert_array_equal(np.diag(mat), np.diag(mat_up))
示例#11
0
    def test_low(self, mat, order, dtype, device):
        mat = fix_mat(mat, order=order, dtype=dtype, numpy=True)
        mat_low = mat.copy(order="K")
        # Upper triangle of mat_low is 0
        mat_low[np.triu_indices(self.t, 1)] = 0

        # Create device matrix
        mat_low = torch.from_numpy(mat_low)
        mat_low_dev = move_tensor(mat_low, device)

        # Run copy
        copy_triang(mat_low_dev, upper=False)

        # Make checks on CPU
        mat_low = mat_low_dev.cpu().numpy()
        assert np.sum(mat_low == 0) == 0
        np.testing.assert_array_equal(np.tril(mat), np.tril(mat_low))
        np.testing.assert_array_equal(np.triu(mat_low), np.tril(mat_low).T)
        np.testing.assert_array_equal(np.diag(mat), np.diag(mat_low))

        # Reset and try with `upper=True`
        mat_low[np.triu_indices(self.t, 1)] = 0
        mat_low_dev.copy_(torch.from_numpy(mat_low))

        copy_triang(mat_low_dev, upper=True)  # Only the diagonal will be set

        mat_low = mat_low_dev.cpu().numpy()
        np.testing.assert_array_equal(np.diag(mat), np.diag(mat_low))
示例#12
0
    def test_simple(self, mat, kernel, gram, cpu, dtype, order):
        opt = dataclasses.replace(self.basic_opt,
                                  use_cpu=cpu,
                                  cpu_preconditioner=cpu)
        rtol = self.rtol[dtype]

        mat = fix_mat(mat, dtype=dtype, order=order, copy=True)
        gram = fix_mat(gram, dtype=dtype, order=order, copy=True)

        la = 100
        prec = FalkonPreconditioner(la, kernel, opt)
        prec.init(mat)
        assert_invariant_on_TT(prec, gram, tol=rtol)
        assert_invariant_on_AT(prec, gram, la, tol=rtol)
        assert_invariant_on_T(prec, gram, tol=rtol * 10)
        assert_invariant_on_prec(prec, N, gram, la, tol=rtol * 10)
示例#13
0
    def test_trsm(self, mat, vec, solution, alpha, dtype, order_v, order_A,
                  device):
        mat = move_tensor(fix_mat(mat, dtype, order_A, copy=True, numpy=False),
                          device=device)
        vec = move_tensor(fix_mat(vec, dtype, order_v, copy=True, numpy=False),
                          device=device)

        sol_vec, lower, trans = solution
        out = trsm(vec, mat, alpha, lower=int(lower), transpose=int(trans))

        assert out.data_ptr() != vec.data_ptr(), "Vec was overwritten."
        assert out.device == vec.device, "Output device is incorrect."
        assert out.stride() == vec.stride(), "Stride was modified."
        assert out.dtype == vec.dtype, "Dtype was modified."
        np.testing.assert_allclose(sol_vec,
                                   out.cpu().numpy(),
                                   rtol=self.rtol[dtype])
示例#14
0
    def test_all_combos(self, mat, vec, order, device, upper, side):
        exp_output = self.exp_vec_mul_triang(mat, vec, upper, side)

        vec = fix_mat(vec,
                      order=order,
                      dtype=np.float64,
                      numpy=False,
                      device=device)
        mat2 = fix_mat(mat,
                       order=order,
                       dtype=np.float64,
                       numpy=False,
                       device=device,
                       copy=True)
        out = vec_mul_triang(mat2, upper=upper, side=side,
                             multipliers=vec).cpu().numpy()
        np.testing.assert_allclose(exp_output.numpy(), out)
        assert out.flags["%s_CONTIGUOUS" %
                         order] is True, "Output is not %s-contiguous" % (
                             order)

        # Test with different vec orderings
        vec = vec.reshape(1, -1)
        mat2 = fix_mat(mat,
                       order=order,
                       dtype=np.float64,
                       numpy=False,
                       device=device,
                       copy=True)
        out = vec_mul_triang(mat2, upper=upper, side=side,
                             multipliers=vec).cpu().numpy()
        np.testing.assert_allclose(exp_output.numpy(),
                                   out,
                                   err_msg="Vec row ordering failed")
        vec = vec.reshape(-1)
        mat2 = fix_mat(mat,
                       order=order,
                       dtype=np.float64,
                       numpy=False,
                       device=device,
                       copy=True)
        out = vec_mul_triang(mat2, upper=upper, side=side,
                             multipliers=vec).cpu().numpy()
        np.testing.assert_allclose(exp_output.numpy(),
                                   out,
                                   err_msg="Vec 1D ordering failed")
示例#15
0
    def test_fmmv(self, s_A, s_B, v, Adt, Bdt, vo, vdt, kernel, s_expected_fmmv, cpu):
        A = fix_sparse_mat(s_A[0], dtype=Adt)
        B = fix_sparse_mat(s_B[0], dtype=Bdt)
        v = fix_mat(v, dtype=vdt, order=vo, copy=True)

        opt = dataclasses.replace(self.basic_options, use_cpu=cpu)
        rtol = choose_on_dtype(A.dtype)

        # Test normal
        _run_fmmv_test(kernel.mmv, s_expected_fmmv, (A, B, v), out=None, rtol=rtol, opt=opt)
        # Test with out
        out = torch.empty(A.shape[0], v.shape[1], dtype=A.dtype)
        _run_fmmv_test(kernel.mmv, s_expected_fmmv, (A, B, v), out=out, rtol=rtol, opt=opt)
示例#16
0
    def test_rect(self, rect, order, dtype):
        from falkon.la_helpers.cuda_la_helpers import cuda_transpose
        mat = fix_mat(rect, order=order, dtype=dtype, copy=True, numpy=True)
        exp_mat_out = np.copy(mat.T, order=order)

        mat = move_tensor(torch.from_numpy(mat), "cuda:0")
        mat_out = move_tensor(torch.from_numpy(exp_mat_out), "cuda:0")
        mat_out.fill_(0.0)

        cuda_transpose(input=mat, output=mat_out)

        mat_out = move_tensor(mat_out, "cpu").numpy()
        assert mat_out.strides == exp_mat_out.strides
        np.testing.assert_allclose(exp_mat_out, mat_out)
示例#17
0
    def test_up(self, mat, order, dtype):
        mat = fix_mat(mat, order=order, dtype=dtype, numpy=True)
        mat_up = mat.copy(order="K")
        # Upper triangle of mat_low is 0
        mat_up[np.tril_indices(self.t, -1)] = 0
        copy_triang(mat_up, upper=True)

        assert np.sum(mat_up == 0) == 0
        np.testing.assert_array_equal(np.triu(mat), np.triu(mat_up))
        np.testing.assert_array_equal(np.tril(mat_up), np.triu(mat_up).T)
        np.testing.assert_array_equal(np.diag(mat), np.diag(mat_up))

        # Reset and try with `upper=False`
        mat_up[np.tril_indices(self.t, -1)] = 0
        copy_triang(mat_up, upper=False)  # Only the diagonal will be set.
        np.testing.assert_array_equal(np.diag(mat), np.diag(mat_up))
示例#18
0
    def test_lower(self, mat, exp_lower, clean, overwrite, order, dtype):
        mat = fix_mat(mat, order=order, dtype=dtype, copy=False, numpy=True)
        inpt = mat.copy(order="K")

        our_chol = potrf(inpt, upper=False, clean=clean, overwrite=overwrite)
        if overwrite:
            assert inpt.ctypes.data == our_chol.ctypes.data, "Overwriting failed"

        if clean:
            np.testing.assert_allclose(exp_lower,
                                       our_chol,
                                       rtol=self.rtol[dtype])
            assert np.triu(our_chol, 1).sum() == 0
        else:
            np.testing.assert_allclose(exp_lower,
                                       np.tril(our_chol),
                                       rtol=self.rtol[dtype])
            np.testing.assert_allclose(np.triu(mat, 1), np.triu(our_chol, 1))
示例#19
0
def test_cpu_gpu_equality(mat, kernel, gram):
    la = 12.3

    mat = fix_mat(mat, dtype=np.float64, order="F", copy=True)

    opt = FalkonOptions(compute_arch_speed=False,
                        use_cpu=False,
                        cpu_preconditioner=False)
    prec_gpu = FalkonPreconditioner(la, kernel, opt)
    prec_gpu.init(mat)

    opt = dataclasses.replace(opt, use_cpu=True, cpu_preconditioner=True)
    prec_cpu = FalkonPreconditioner(la, kernel, opt)
    prec_cpu.init(mat)

    np.testing.assert_allclose(prec_cpu.fC,
                               prec_gpu.fC,
                               rtol=1e-10,
                               atol=1e-10)
    np.testing.assert_allclose(prec_cpu.dA, prec_gpu.dA, rtol=1e-10)
    np.testing.assert_allclose(prec_cpu.dT, prec_gpu.dT, rtol=1e-10)
示例#20
0
    def test_upper(self, mat, vec, order):
        mat = fix_mat(mat, order=order, dtype=mat.dtype, numpy=True, copy=True)

        out = vec_mul_triang(mat.copy(order="K"),
                             upper=True,
                             side=0,
                             multipliers=vec)
        exp = np.array([[0, 0, 0], [2, 2, 4], [6, 6, 4]], dtype=np.float32)
        np.testing.assert_allclose(exp, out)
        assert out.flags["%s_CONTIGUOUS" %
                         order] is True, "Output is not %s-contiguous" % (
                             order)

        out = vec_mul_triang(mat.copy(order="K"),
                             upper=True,
                             side=1,
                             multipliers=vec)
        exp = np.array([[0, 1, 0.5], [2, 2, 2], [6, 6, 4]], dtype=np.float32)
        np.testing.assert_allclose(exp, out)
        assert out.flags["%s_CONTIGUOUS" %
                         order] is True, "Output is not %s-contiguous" % (
                             order)
示例#21
0
    def test_zero(self, mat, upper, preserve_diag, order):
        inpt1 = fix_mat(mat,
                        dtype=mat.dtype,
                        order=order,
                        copy=True,
                        numpy=True)
        inpt2 = inpt1.copy(order="K")

        k = 1 if preserve_diag else 0
        if upper:
            tri_fn = partial(np.triu, k=k)
        else:
            tri_fn = partial(np.tril, k=-k)

        mul_triang(inpt1,
                   upper=upper,
                   preserve_diag=preserve_diag,
                   multiplier=0)
        assert np.sum(tri_fn(inpt1)) == 0

        if preserve_diag:
            zero_triang(inpt2, upper=upper)
            np.testing.assert_allclose(inpt1, inpt2)
示例#22
0
 def convert(dtype, order=None, sparse=False):
     if sparse:
         return s_B[0].to(dtype=numpy_to_torch_type(dtype))
     return fix_mat(B, dtype=dtype, order=order)
示例#23
0
 def convert(dtype, order):
     return fix_mat(w, dtype=dtype, order=order)
示例#24
0
 def getter(order, dtype, device="cpu"):
     return fix_mat(matrix,
                    dtype=dtype,
                    order=order,
                    copy=True,
                    device=device)
示例#25
0
 def getter(order, dtype):
     return fix_mat(matrix, dtype=dtype, order=order, copy=True)