Exemplo n.º 1
0
def generate_loopy_kernel(slate_expr, tsfc_parameters=None):
    cpu_time = time.time()
    if len(slate_expr.ufl_domains()) > 1:
        raise NotImplementedError("Multiple domains not implemented.")

    Citations().register("Gibson2018")

    # Create a loopy builder for the Slate expression,
    # e.g. contains the loopy kernels coming from TSFC
    gem_expr, var2terminal = slate_to_gem(slate_expr)

    scalar_type = tsfc_parameters["scalar_type"]
    slate_loopy, output_arg = gem_to_loopy(gem_expr, var2terminal, scalar_type)

    builder = LocalLoopyKernelBuilder(expression=slate_expr,
                                      tsfc_parameters=tsfc_parameters)

    loopy_merged = merge_loopy(slate_loopy, output_arg, builder, var2terminal)
    loopy_merged = loopy.register_function_id_to_in_knl_callable_mapper(
        loopy_merged, inv_fn_lookup)
    loopy_merged = loopy.register_function_id_to_in_knl_callable_mapper(
        loopy_merged, solve_fn_lookup)

    # WORKAROUND: Generate code directly from the loopy kernel here,
    # then attach code as a c-string to the op2kernel
    code = loopy.generate_code_v2(loopy_merged).device_code()
    code = code.replace(f'void {loopy_merged.name}',
                        f'static void {loopy_merged.name}')
    loopykernel = op2.Kernel(code,
                             loopy_merged.name,
                             include_dirs=BLASLAPACK_INCLUDE.split(),
                             ldargs=BLASLAPACK_LIB.split())

    kinfo = KernelInfo(
        kernel=loopykernel,
        integral_type=
        "cell",  # slate can only do things as contributions to the cell integrals
        oriented=builder.bag.needs_cell_orientations,
        subdomain_id="otherwise",
        domain_number=0,
        coefficient_map=tuple(range(len(slate_expr.coefficients()))),
        needs_cell_facets=builder.bag.needs_cell_facets,
        pass_layer_arg=builder.bag.needs_mesh_layers,
        needs_cell_sizes=builder.bag.needs_cell_sizes)

    # Cache the resulting kernel
    # Slate kernels are never split, so indicate that with None in the index slot.
    idx = tuple([None] * slate_expr.rank)
    logger.info(GREEN % "compile_slate_expression finished in %g seconds.",
                time.time() - cpu_time)
    return (SplitKernel(idx, kinfo), )
Exemplo n.º 2
0
def generate_loopy_kernel(slate_expr, tsfc_parameters=None):
    cpu_time = time.time()
    if len(slate_expr.ufl_domains()) > 1:
        raise NotImplementedError("Multiple domains not implemented.")

    Citations().register("Gibson2018")

    # Create a loopy builder for the Slate expression,
    # e.g. contains the loopy kernels coming from TSFC
    gem_expr, var2terminal = slate_to_gem(slate_expr)

    scalar_type = tsfc_parameters["scalar_type"]
    slate_loopy, output_arg = gem_to_loopy(gem_expr, var2terminal, scalar_type)

    builder = LocalLoopyKernelBuilder(expression=slate_expr,
                                      tsfc_parameters=tsfc_parameters)

    name = "slate_wrapper"
    loopy_merged = merge_loopy(slate_loopy, output_arg, builder, var2terminal,
                               name)
    loopy_merged = loopy.register_callable(loopy_merged, INVCallable.name,
                                           INVCallable())
    loopy_merged = loopy.register_callable(loopy_merged, SolveCallable.name,
                                           SolveCallable())

    loopykernel = op2.Kernel(loopy_merged,
                             name,
                             include_dirs=BLASLAPACK_INCLUDE.split(),
                             ldargs=BLASLAPACK_LIB.split())

    kinfo = KernelInfo(
        kernel=loopykernel,
        integral_type=
        "cell",  # slate can only do things as contributions to the cell integrals
        oriented=builder.bag.needs_cell_orientations,
        subdomain_id="otherwise",
        domain_number=0,
        coefficient_map=tuple(range(len(slate_expr.coefficients()))),
        needs_cell_facets=builder.bag.needs_cell_facets,
        pass_layer_arg=builder.bag.needs_mesh_layers,
        needs_cell_sizes=builder.bag.needs_cell_sizes)

    # Cache the resulting kernel
    # Slate kernels are never split, so indicate that with None in the index slot.
    idx = tuple([None] * slate_expr.rank)
    logger.info(GREEN % "compile_slate_expression finished in %g seconds.",
                time.time() - cpu_time)
    return (SplitKernel(idx, kinfo), )
Exemplo n.º 3
0
def generate_kernel(slate_expr, tsfc_parameters=None):
    cpu_time = time.time()
    # TODO: Get PyOP2 to write into mixed dats
    if slate_expr.is_mixed:
        raise NotImplementedError("Compiling mixed slate expressions")

    if len(slate_expr.ufl_domains()) > 1:
        raise NotImplementedError("Multiple domains not implemented.")

    Citations().register("Gibson2018")
    # Create a builder for the Slate expression
    builder = LocalKernelBuilder(expression=slate_expr,
                                 tsfc_parameters=tsfc_parameters)

    # Keep track of declared temporaries
    declared_temps = {}
    statements = []

    # Declare terminal tensor temporaries
    terminal_declarations = terminal_temporaries(builder, declared_temps)
    statements.extend(terminal_declarations)

    # Generate assembly calls for tensor assembly
    subkernel_calls = tensor_assembly_calls(builder)
    statements.extend(subkernel_calls)

    # Create coefficient temporaries if necessary
    if builder.coefficient_vecs:
        coefficient_temps = coefficient_temporaries(builder, declared_temps)
        statements.extend(coefficient_temps)

    # Create auxiliary temporaries/expressions (if necessary)
    statements.extend(auxiliary_expressions(builder, declared_temps))

    # Generate the kernel information with complete AST
    kinfo = generate_kernel_ast(builder, statements, declared_temps)

    # Cache the resulting kernel
    idx = tuple([0] * slate_expr.rank)
    logger.info(GREEN % "compile_slate_expression finished in %g seconds.",
                time.time() - cpu_time)
    return (SplitKernel(idx, kinfo), )