Exemplo n.º 1
0
def test_transform_correctness(thread, transform_type, inverse, i32_conversion, constant_memory):

    if not transform_supported(thread.device_params, transform_type):
        pytest.skip()

    batch_shape = (128,)

    if transform_type == 'FFT':
        transform = fft512(use_constant_memory=constant_memory)
        transform_ref = tr_fft.fft_transform_ref
    else:
        transform = ntt1024(use_constant_memory=constant_memory)
        transform_ref = tr_ntt.ntt_transform_ref

    comp = Transform(
        transform, batch_shape,
        inverse=inverse, i32_conversion=i32_conversion, transforms_per_block=1,
        ).compile(thread)

    a = get_test_array(comp.parameter.input.shape, comp.parameter.input.dtype)

    a_dev = thread.to_device(a)
    res_dev = thread.empty_like(comp.parameter.output)

    comp(res_dev, a_dev)
    res_test = res_dev.get()

    res_ref = transform_ref(a, inverse=inverse, i32_conversion=i32_conversion)

    if numpy.issubdtype(res_dev.dtype, numpy.integer):
        assert (res_test == res_ref).all()
    else:
        assert numpy.allclose(res_test, res_ref)
Exemplo n.º 2
0
def test_transforms_per_block_performance(thread, transform_type,
                                          heavy_performance_load,
                                          transforms_per_block):

    if not transform_supported(thread.device_params, transform_type):
        pytest.skip()

    max_tpb = max_supported_transforms_per_block(thread.device_params,
                                                 transform_type)
    if transforms_per_block > max_tpb:
        pytest.skip()

    size = 4096 if heavy_performance_load else 64

    rng = numpy.random.RandomState()
    secret_key, cloud_key = make_key_pair(thread,
                                          rng,
                                          transform_type=transform_type)

    perf_params = PerformanceParameters(
        secret_key.params,
        single_kernel_bootstrap=False,
        transforms_per_block=transforms_per_block)

    results = check_performance(thread, (secret_key, cloud_key),
                                perf_params,
                                shape=size)
    print()
    print(check_performance_str(results))
Exemplo n.º 3
0
def test_constant_mem_performance(
        thread, transform_type, single_kernel_bootstrap, heavy_performance_load,
        use_constant_memory):

    if not transform_supported(thread.device_params, transform_type):
        pytest.skip()

    # We want to test the effect of using constant memory on the bootstrap calculation.
    # A single-kernel bootstrap uses the `use_constant_memory_multi_iter` option,
    # and a multi-kernel bootstrap uses the `use_constant_memory_single_iter` option.
    kwds = dict(single_kernel_bootstrap=single_kernel_bootstrap)
    if single_kernel_bootstrap:
        kwds.update(dict(use_constant_memory_multi_iter=use_constant_memory))
    else:
        kwds.update(dict(use_constant_memory_single_iter=use_constant_memory))

    size = 4096 if heavy_performance_load else 64

    rng = numpy.random.RandomState()
    secret_key, cloud_key = make_key_pair(thread, rng, transform_type=transform_type)

    # TODO: instead of creating a whole key and then checking if the parameters are supported,
    # we can just create a parameter object separately.
    if (single_kernel_bootstrap
            and not single_kernel_bootstrap_supported(secret_key.params, thread.device_params)):
        pytest.skip()

    perf_params = PerformanceParameters(secret_key.params, **kwds)

    results = check_performance(thread, (secret_key, cloud_key), perf_params, shape=size)
    print()
    print(check_performance_str(results))
Exemplo n.º 4
0
def test_single_kernel_bs_performance(
        thread, transform_type, single_kernel_bootstrap,
        test_function_name, heavy_performance_load):

    if not transform_supported(thread.device_params, transform_type):
        pytest.skip()

    test_function = dict(
        NAND=(gate_nand, nand_ref, 2),
        MUX=(gate_mux, mux_ref, 3),
        uint_min=(uint_min, uint_min_ref, 2),
        )[test_function_name]

    if test_function_name == 'uint_min':
        shape = (128, 32) if heavy_performance_load else (4, 16)
    else:
        shape = 4096 if heavy_performance_load else 64

    rng = numpy.random.RandomState()
    secret_key, cloud_key = make_key_pair(thread, rng, transform_type=transform_type)

    # TODO: instead of creating a whole key and then checking if the parameters are supported,
    # we can just create a parameter object separately.
    if (single_kernel_bootstrap
            and not single_kernel_bootstrap_supported(secret_key.params, thread.device_params)):
        pytest.skip()

    perf_params = PerformanceParameters(
        secret_key.params, single_kernel_bootstrap=single_kernel_bootstrap)

    results = check_performance(
        thread, (secret_key, cloud_key), perf_params, shape=shape, test_function=test_function)
    print()
    print(check_performance_str(results))
Exemplo n.º 5
0
def test_polynomial_multiplication(thread, transform_type):
    if not transform_supported(thread.device_params, transform_type):
        pytest.skip()

    batch_shape = (10,)

    if transform_type == 'FFT':
        transform = fft512()
        transform_ref = tr_fft.fft_transform_ref
        tr_mul_ref = tr_fft.fft_transformed_mul_ref
    else:
        transform = ntt1024()
        transform_ref = tr_ntt.ntt_transform_ref
        tr_mul_ref = tr_ntt.ntt_transformed_mul_ref

    tr_forward = Transform(
        transform, batch_shape,
        inverse=False, i32_conversion=True, transforms_per_block=1,
        ).compile(thread)
    tr_inverse = Transform(
        transform, batch_shape,
        inverse=True, i32_conversion=True, transforms_per_block=1,
        ).compile(thread)

    a = numpy.random.randint(-2**31, 2**31, size=batch_shape + (1024,), dtype=numpy.int32)
    b = numpy.random.randint(-1000, 1000, size=batch_shape + (1024,), dtype=numpy.int32)

    a_dev = thread.to_device(a)
    b_dev = thread.to_device(b)
    a_tr_dev = thread.empty_like(tr_forward.parameter.output)
    b_tr_dev = thread.empty_like(tr_forward.parameter.output)
    res_dev = thread.empty_like(tr_inverse.parameter.output)

    tr_forward(a_tr_dev, a_dev)
    tr_forward(b_tr_dev, b_dev)
    res_tr = tr_mul_ref(a_tr_dev.get(), b_tr_dev.get())
    res_tr_dev = thread.to_device(res_tr)
    tr_inverse(res_dev, res_tr_dev)

    res_test = res_dev.get()
    res_ref = poly_mul_ref(a, b)

    assert (res_test == res_ref).all()
Exemplo n.º 6
0
def test_fft_performance(thread, transforms_per_block, constant_memory, heavy_performance_load):

    if not transform_supported(thread.device_params, 'FFT'):
        pytest.skip()

    if transforms_per_block > max_supported_transforms_per_block(thread.device_params, 'FFT'):
        pytest.skip()

    is_cuda = thread.api.get_id() == cuda_id()

    batch_shape = (2**14,)
    a = get_test_array(batch_shape + (512,), numpy.complex128)

    kernel_repetitions = 100 if heavy_performance_load else 5

    a_dev = thread.to_device(a)
    res_dev = thread.empty_like(a_dev)

    res_ref = tr_fft.fft_transform_ref(a)

    transform = fft512(use_constant_memory=constant_memory)

    fft_comp = Transform(
        transform, batch_shape, transforms_per_block=transforms_per_block,
        ).compile(thread)
    fft_comp_repeated = Transform(
        transform, batch_shape, transforms_per_block=transforms_per_block,
        kernel_repetitions=kernel_repetitions).compile(thread)

    # Quick check of correctness
    fft_comp(res_dev, a_dev)
    res_test = res_dev.get()
    assert numpy.allclose(res_test, res_ref)

    # Test performance
    times, times_str = get_times(thread, fft_comp_repeated, res_dev, a_dev)
    print("\n{backend}, {trnum} per block, test --- {times}".format(
        times=times_str,
        backend='cuda' if is_cuda else 'ocl ',
        trnum=transforms_per_block))
Exemplo n.º 7
0
def test_ntt_performance(thread, transforms_per_block, constant_memory, heavy_performance_load):

    if not transform_supported(thread.device_params, 'NTT'):
        pytest.skip()

    if transforms_per_block > max_supported_transforms_per_block(thread.device_params, 'NTT'):
        pytest.skip()

    is_cuda = thread.api.get_id() == cuda_id()

    methods = list(itertools.product(
        ['cuda_asm', 'c'], # base method
        ['cuda_asm', 'c_from_asm', 'c'], # mul method
        ['cuda_asm', 'c_from_asm', 'c'] # lsh method
        ))

    if not is_cuda:
        # filter out all usage of CUDA asm if we're on OpenCL
        methods = [ms for ms in methods if 'cuda_asm' not in ms]

    batch_shape = (2**14,)
    a = get_test_array(batch_shape + (1024,), "ff_number")

    kernel_repetitions = 100 if heavy_performance_load else 5

    a_dev = thread.to_device(a)
    res_dev = thread.empty_like(a_dev)

    # TODO: compute a reference NTT when it's fast enough on CPU
    #res_ref = tr_ntt.ntt_transform_ref(a)

    print()
    min_times = []
    for base_method, mul_method, lsh_method in methods:

        transform = ntt1024(
            base_method=base_method, mul_method=mul_method, lsh_method=lsh_method,
            use_constant_memory=constant_memory)

        ntt_comp = Transform(
            transform, batch_shape, transforms_per_block=transforms_per_block,
            ).compile(thread)
        ntt_comp_repeated = Transform(
            transform, batch_shape, transforms_per_block=transforms_per_block,
            kernel_repetitions=kernel_repetitions).compile(thread)

        # TODO: compute a reference NTT when it's fast enough on CPU
        # Quick check of correctness
        #ntt_comp(res_dev, a_dev)
        #res_test = res_dev.get()
        #assert (res_test == res_ref).all()

        # Test performance
        times, times_str = get_times(thread, ntt_comp_repeated, res_dev, a_dev)
        print("  base: {bm}, mul: {mm}, lsh: {lm}".format(
            bm=base_method, mm=mul_method, lm=lsh_method))
        print("  {backend}, {trnum} per block, test --- {times}".format(
            times=times_str,
            backend='cuda' if is_cuda else 'ocl ',
            trnum=transforms_per_block))

        min_times.append((times.min(), base_method, mul_method, lsh_method))

    best = min(min_times, key=lambda t: t[0])
    time_best, base_method, mul_method, lsh_method = best
    print("Best time: {tb:.4f} for [base: {bm}, mul: {mm}, lsh: {lm}]".format(
        tb=time_best, bm=base_method, mm=mul_method, lm=lsh_method
        ))
Exemplo n.º 8
0
def test_transform_type(thread, transform_type):
    if not transform_supported(thread.device_params, transform_type):
        pytest.skip()
    rng = numpy.random.RandomState()
    key_pair = make_key_pair(thread, rng, transform_type=transform_type)
    check_gate(thread, key_pair, 2, gate_nand, nand_ref)
Exemplo n.º 9
0
def test_transform_type(thread, transform_type):
    if not transform_supported(thread.device_params, transform_type):
        pytest.skip()
    rng = DeterministicRNG()
    key_pair = make_key_pair(thread, rng, transform_type=transform_type)
    check_gate(thread, key_pair, 2, gate_nand, nand_ref)