示例#1
0
 def test_sparse():
     from lbmpy.creationfunctions import create_lb_collision_rule
     from pystencils import get_code_str
     g = ListLbGenerator(create_lb_collision_rule())
     kernel_code = get_code_str(g.kernel())
     assert 'num_cells' in kernel_code
     setter_code = get_code_str(g.setter_ast())
     assert 'num_cells' in setter_code
     getter_code = get_code_str(g.getter_ast())
     assert 'num_cells' in getter_code
示例#2
0
def test_sqrt_of_integer():
    """Regression test for bug where sqrt(3) was classified as integer"""
    f = ps.fields("f: [1D]")
    tmp = sp.symbols("tmp")

    assignments = [ps.Assignment(tmp, sp.sqrt(3)),
                   ps.Assignment(f[0], tmp)]
    arr_double = np.array([1], dtype=np.float64)
    kernel = ps.create_kernel(assignments).compile()
    kernel(f=arr_double)
    assert 1.7 < arr_double[0] < 1.8

    f = ps.fields("f: float32[1D]")
    tmp = sp.symbols("tmp")

    assignments = [ps.Assignment(tmp, sp.sqrt(3)),
                   ps.Assignment(f[0], tmp)]
    arr_single = np.array([1], dtype=np.float32)
    config = ps.CreateKernelConfig(data_type="float32")
    kernel = ps.create_kernel(assignments, config=config).compile()
    kernel(f=arr_single)

    code = ps.get_code_str(kernel.ast)
    # ps.show_code(kernel.ast)
    # 1.7320508075688772935  --> it is actually correct to round to ...773. This was wrong before !282
    assert "1.7320508075688773f" in code
    assert 1.7 < arr_single[0] < 1.8
示例#3
0
def test_product(default_assignment_simplifications):

    k = ps.TypedSymbol('k', create_type('int64'))

    sum = sympy.Product(k, (k, 1, 10))
    expanded_sum = sum.doit()

    print(sum)
    print(expanded_sum)

    x = ps.fields('x: int64[1d]')

    assignments = ps.AssignmentCollection({x.center(): sum})

    config = ps.CreateKernelConfig(
        default_assignment_simplifications=default_assignment_simplifications)

    ast = ps.create_kernel(assignments, config=config)
    code = ps.get_code_str(ast)
    kernel = ast.compile()

    print(code)
    if default_assignment_simplifications is False:
        assert 'int64_t product' in code

    array = np.zeros((10, ), np.int64)

    kernel(x=array)

    assert np.allclose(array, int(expanded_sum) * np.ones_like(array))
示例#4
0
def test_sum_use_float(default_assignment_simplifications):

    sum = sympy.Sum(sp.abc.k, (sp.abc.k, 1, 100))
    expanded_sum = sum.doit()

    print(sum)
    print(expanded_sum)

    x = ps.fields('x: float32[1d]')

    assignments = ps.AssignmentCollection({x.center(): sum})

    config = ps.CreateKernelConfig(
        default_assignment_simplifications=default_assignment_simplifications,
        data_type=create_type('float32'))
    ast = ps.create_kernel(assignments, config=config)
    code = ps.get_code_str(ast)
    kernel = ast.compile()

    print(code)
    if default_assignment_simplifications is False:
        assert 'float sum' in code

    array = np.zeros((10, ), np.float32)

    kernel(x=array)

    assert np.allclose(array, int(expanded_sum) * np.ones_like(array))
示例#5
0
def generate_shared_object(output_folder=None,
                           source_files=None,
                           show_code=False,
                           framework_module_class=TorchModule,
                           generate_code_only=False,
                           update_repo_files=False):

    object_cache = get_cache_config()['object_cache']

    module_name = 'pyronn_torch_cpp'

    if not output_folder:
        output_folder = join(dirname(__file__), '..', '..', 'generated_files')

    if not source_files:
        source_files = glob(join(dirname(__file__), 'PYRO-NN-Layers', '*.cu.cc'))

    cuda_sources = []
    makedirs(join(object_cache, module_name), exist_ok=True)
    rmtree(join(object_cache, module_name, 'helper_headers'), ignore_errors=True)
    copytree(join(dirname(__file__), 'PYRO-NN-Layers', 'helper_headers'),
             join(object_cache, module_name, 'helper_headers'))
    if update_repo_files:
        rmtree(join(output_folder, 'helper_headers'), ignore_errors=True)
        copytree(join(dirname(__file__), 'PYRO-NN-Layers', 'helper_headers'),
                 join(output_folder, 'helper_headers'))

    for s in source_files:
        dst = join(object_cache, module_name, basename(s).replace('.cu.cc', '.cu'))
        copyfile(s, dst)  # Torch only accepts *.cu as CUDA
        cuda_sources.append(dst)
        if update_repo_files:
            dst = join(output_folder, basename(s).replace('.cu.cc', '.cu'))
            copyfile(s, dst)  # Torch only accepts *.cu as CUDA


    module = framework_module_class(module_name, FUNCTIONS.values())

    if show_code:
        pystencils.show_code(module, custom_backend=FrameworkIntegrationPrinter())

    if generate_code_only:
        return module

    extension = module.compile(extra_source_files=cuda_sources,
                               extra_cuda_flags=['-arch=sm_35'],
                               with_cuda=True,
                               compile_module_name=module_name)

    shared_object_file = module.compiled_file
    copyfile(shared_object_file, join(output_folder, module_name + '.so'))

    if update_repo_files:
        with open(join(output_folder, 'pyronn_torch.cpp'), 'w') as f:
            f.write(pystencils.get_code_str(module, custom_backend=FrameworkIntegrationPrinter()))

    return extension
示例#6
0
def test_integer_comparision():
    f = ps.fields("f [2D]")
    d = sp.Symbol("dir")

    ur = ps.Assignment(f[0, 0], sp.Piecewise((0, sp.Equality(d, 1)), (f[0, 0], True)))

    ast = ps.create_kernel(ur)
    code = ps.get_code_str(ast)

    assert "_data_f_00[_stride_f_1*ctr_1] = ((((dir) == (1))) ? (0.0): (_data_f_00[_stride_f_1*ctr_1]));" in code
示例#7
0
def test_abs():
    x, y, z = ps.fields('x, y, z:  float64[2d]')

    default_int_type = create_type('int64')

    assignments = ps.AssignmentCollection(
        {x[0, 0]: sympy.Abs(cast_func(y[0, 0], default_int_type))})

    config = ps.CreateKernelConfig(target=ps.Target.GPU)
    ast = ps.create_kernel(assignments, config=config)
    code = ps.get_code_str(ast)
    print(code)
    assert 'fabs(' not in code
示例#8
0
def test_split_inner_loop():
    dst = ps.fields('dst(8): double[2D]')
    s = sp.symbols('s_:8')
    x = sp.symbols('x')
    subexpressions = []
    main = [
        Assignment(dst[0, 0](0), s[0]),
        Assignment(dst[0, 0](1), s[1]),
        Assignment(dst[0, 0](2), s[2]),
        Assignment(dst[0, 0](3), s[3]),
        Assignment(dst[0, 0](4), s[4]),
        Assignment(dst[0, 0](5), s[5]),
        Assignment(dst[0, 0](6), s[6]),
        Assignment(dst[0, 0](7), s[7]),
        Assignment(x, sum(s))
    ]
    ac = AssignmentCollection(main, subexpressions)
    split_groups = [[dst[0, 0](0), dst[0, 0](1)], [dst[0, 0](2), dst[0, 0](3)],
                    [dst[0, 0](4), dst[0, 0](5)],
                    [dst[0, 0](6), dst[0, 0](7), x]]
    ac.simplification_hints['split_groups'] = split_groups
    ast = ps.create_kernel(ac)

    code = ps.get_code_str(ast)
    # we have four inner loops as indicated in split groups (4 elements) plus one outer loop
    assert code.count('for') == 5
    ast = ps.create_kernel(ac, target=ps.Target.GPU)

    code = ps.get_code_str(ast)
    # on GPUs is wouldn't be good to use loop splitting
    assert code.count('for') == 0

    ac = AssignmentCollection(main, subexpressions)
    ast = ps.create_kernel(ac)

    code = ps.get_code_str(ast)
    # one inner loop and one outer loop
    assert code.count('for') == 2
示例#9
0
def test_creation(method_enum, double_precision):
    """Simple test that makes sure that only float variables are created"""
    lbm_config = LBMConfig(method=method_enum, relaxation_rate=1.5)
    config = ps.CreateKernelConfig(
        data_type="float64" if double_precision else "float32")
    func = create_lb_function(lbm_config=lbm_config, config=config)
    code = ps.get_code_str(func)

    if double_precision:
        assert 'float' not in code
        assert 'double' in code
    else:
        assert 'double' not in code
        assert 'float' in code
示例#10
0
def test_complex_numbers_64(assignment, target):
    ast = pystencils.create_kernel(assignment,
                                   target=target,
                                   data_type='double')
    code = pystencils.get_code_str(ast)

    print(code)
    assert "Not supported" not in code

    if target == pystencils.Target.GPU:
        pytest.importorskip('pycuda')

    kernel = ast.compile()
    assert kernel is not None
示例#11
0
def test_scenario(method_enum, double_precision):
    lbm_config = LBMConfig(method=method_enum, relaxation_rate=1.5)
    config = ps.CreateKernelConfig(
        data_type="double" if double_precision else "float32")
    sc = create_lid_driven_cavity((16, 16, 8),
                                  lbm_config=lbm_config,
                                  config=config)
    sc.run(1)
    code = ps.get_code_str(sc.ast)

    if double_precision:
        assert 'float' not in code
        assert 'double' in code
    else:
        assert 'double' not in code
        assert 'float' in code
示例#12
0
def test_evaluate_constant_terms(target, simplification):
    if target == ps.Target.GPU:
        pytest.importorskip("pycuda")
    src, dst = ps.fields('src, dst:  float32[2d]')

    # Triggers Sympy's cos optimization
    assignments = ps.AssignmentCollection({src[0, 0]: -sp.cos(1) + dst[0, 0]})

    config = ps.CreateKernelConfig(
        target=target, default_assignment_simplifications=simplification)
    ast = ps.create_kernel(assignments, config=config)
    code = ps.get_code_str(ast)
    if simplification:
        assert 'cos(' not in code
    else:
        assert 'cos(' in code
    print(code)
示例#13
0
def test_aligned_and_nt_stores(instruction_set=instruction_set, openmp=False):
    domain_size = (24, 24)
    # create a datahandling object
    dh = ps.create_data_handling(domain_size,
                                 periodicity=(True, True),
                                 parallel=False,
                                 default_target=Target.CPU)

    # fields
    alignment = 'cacheline' if openmp else True
    g = dh.add_array("g", values_per_cell=1, alignment=alignment)
    dh.fill("g", 1.0, ghost_layers=True)
    f = dh.add_array("f", values_per_cell=1, alignment=alignment)
    dh.fill("f", 0.0, ghost_layers=True)
    opt = {
        'instruction_set': instruction_set,
        'assume_aligned': True,
        'nontemporal': True,
        'assume_inner_stride_one': True
    }
    update_rule = [
        ps.Assignment(f.center(),
                      0.25 * (g[-1, 0] + g[1, 0] + g[0, -1] + g[0, 1]))
    ]
    config = ps.CreateKernelConfig(target=dh.default_target,
                                   cpu_vectorize_info=opt,
                                   cpu_openmp=openmp)
    ast = ps.create_kernel(update_rule, config=config)
    if instruction_set in ['sse'] or instruction_set.startswith('avx'):
        assert 'stream' in ast.instruction_set
        assert 'streamFence' in ast.instruction_set
    if instruction_set in ['neon', 'vsx'] or instruction_set.startswith('sve'):
        assert 'cachelineZero' in ast.instruction_set
    if instruction_set in ['vsx']:
        assert 'storeAAndFlushCacheline' in ast.instruction_set
    for instruction in [
            'stream', 'streamFence', 'cachelineZero',
            'storeAAndFlushCacheline', 'flushCacheline'
    ]:
        if instruction in ast.instruction_set:
            assert ast.instruction_set[instruction].split(
                '{')[0] in ps.get_code_str(ast)
    kernel = ast.compile()

    dh.run_kernel(kernel)
    np.testing.assert_equal(np.sum(dh.cpu_arrays['f']), np.prod(domain_size))
示例#14
0
def test_type_interference():
    x = pystencils.fields('x:  float32[3d]')
    assignments = pystencils.AssignmentCollection({
        a: cast_func(10, create_type('float64')),
        b: cast_func(10, create_type('uint16')),
        e: 11,
        c: b,
        f: c + b,
        d: c + b + x.center + e,
        x.center: c + b + x.center
    })

    ast = pystencils.create_kernel(assignments)

    code = str(pystencils.get_code_str(ast))
    assert 'double a' in code
    assert 'uint16_t b' in code
    assert 'uint16_t f' in code
    assert 'int64_t e' in code
示例#15
0
def test_sympy_optimizations(target, simplification):
    if target == ps.Target.GPU:
        pytest.importorskip("pycuda")
    src, dst = ps.fields('src, dst:  float32[2d]')

    # Triggers Sympy's expm1 optimization
    # Sympy's expm1 optimization is tedious to use and the behaviour is highly depended on the sympy version. In
    # some cases the exp expression has to be encapsulated in brackets or multiplied with 1 or 1.0
    # for sympy to work properly ...
    assignments = ps.AssignmentCollection(
        {src[0, 0]: 1.0 * (sp.exp(dst[0, 0]) - 1)})

    config = ps.CreateKernelConfig(
        target=target, default_assignment_simplifications=simplification)
    ast = ps.create_kernel(assignments, config=config)

    code = ps.get_code_str(ast)
    if simplification:
        assert 'expm1(' in code
    else:
        assert 'expm1(' not in code
示例#16
0
def test_sympy_assignment(default_assignment_simplifications):
    assignment = SympyAssignment(dst[0, 0](0),
                                 sp.log(x + 3) / sp.log(2) + sp.log(x**2 + 1))

    config = ps.CreateKernelConfig(
        default_assignment_simplifications=default_assignment_simplifications)
    ast = ps.create_kernel([assignment], config=config)
    code = ps.get_code_str(ast)

    if default_assignment_simplifications:
        assert 'log1p' in code
        # constant term is directly evaluated
        assert 'log2' not in code
    else:
        # no optimisations will be applied so the optimised version of log will not be in the code
        assert 'log1p' not in code
        assert 'log2' not in code

    assignment.replace(assignment.lhs, dst[0, 0](1))
    assignment.replace(assignment.rhs, sp.log(2))

    assert assignment.lhs == dst[0, 0](1)
    assert assignment.rhs == sp.log(2)
示例#17
0
def test_issue40(*_):
    """https://i10git.cs.fau.de/pycodegen/pystencils/-/issues/40"""
    opt = {
        'instruction_set': "avx512",
        'assume_aligned': False,
        'nontemporal': False,
        'assume_inner_stride_one': True
    }

    src = ps.fields("src(1): double[2D]", layout='fzyx')
    eq = [
        ps.Assignment(sp.Symbol('rho'), 1.0),
        ps.Assignment(src[0, 0](0),
                      sp.Rational(4, 9) * sp.Symbol('rho'))
    ]

    config = ps.CreateKernelConfig(target=Target.CPU,
                                   cpu_vectorize_info=opt,
                                   data_type='float64')
    ast = ps.create_kernel(eq, config=config)

    code = ps.get_code_str(ast)
    assert 'epi32' not in code
示例#18
0
 def code(self):
     return pystencils.get_code_str(self.ast)