def generate_c_naive_from_accumlate_node(pipe, polyrep, node, body, cparam_map): part_id = node.user_get_expr().get_op_arg(0).get_id() poly_part = isl_get_id_user(part_id) dom_len = len(poly_part.comp.func.reductionVariables) # FIXME: this has to be changed similar to Expression Node cvar_map = cvariables_from_variables_and_sched( node, poly_part.comp.func.reductionVariables, poly_part.sched) expr = generate_c_expr(pipe, poly_part.expr.expression, cparam_map, cvar_map) prologue = [] array_ref = generate_c_expr(pipe, poly_part.expr.accumulate_ref, cparam_map, cvar_map, prologue_stmts=prologue) assign = genc.CAssign(array_ref, array_ref + expr) if prologue is not None: for s in prologue: body.add(s) if poly_part.pred: ccond = generate_c_cond(pipe, poly_part.pred, cparam_map, cvar_map) cif = genc.CIfThen(ccond) with cif.if_block as ifblock: ifblock.add(assign) body.add(cif) else: body.add(assign)
def generate_reduction_scan_loops(pipe, group, comp, pipe_body, cparam_map): """ generates code for Reduction class """ func = comp.func # Compute Reduction points in lexicographic order of reduction domain cvar_map = create_loop_variables(group, func.reductionVariables) # Generate loops. lbody is the body of the innermost loop. lbody = \ create_perfect_nested_loop(pipe, group, pipe_body, func.reductionVariables, func.reductionDomain, cparam_map, cvar_map) # Convert function definition into a C expression and add it to loop body for case in func.defn: if (isinstance(case, Reduce)): case_expr = generate_c_expr(pipe, case.expression, cparam_map, cvar_map) ref_args = case.accumulate_ref.arguments accum_ref = generate_c_expr(pipe, obj(*ref_args), cparam_map, cvar_map) assign = genc.CAssign(accum_ref, accum_ref + case_expr) lbody.add(assign, False) elif (isinstance(case, Case)): c_cond = generate_c_cond(pipe, case.condition, cparam_map, cvar_map) cond_expr = generate_c_expr(pipe, case.expression, cparam_map, cvar_map) cif = genc.CIfThen(c_cond) if (isinstance(case.expression, Reduce)): ref_args = case.accumulate_ref.arguments accum_ref = generate_c_expr(pipe, func(*ref_args), cparam_map, cvar_map) assign = genc.CAssign(accum_ref, accum_ref + cond_expr) with cif.if_block as ifblock: ifblock.add(assign)
def generate_function_scan_loops(pipe, group, comp, pipe_body, cparam_map): """ generates code for Function class """ func = comp.func # Compute function points in lexicographic order of domain cvar_map = create_loop_variables(group, func.variables) # Generate loops. lbody is the body of the innermost loop. lbody = \ create_perfect_nested_loop(pipe, group, pipe_body, func.variables, func.domain, cparam_map, cvar_map) arglist = func.variables # Convert function definition into a C expression and add it to # loop body for case in func.defn: if (isinstance(case, AbstractExpression)): case_expr = generate_c_expr(pipe, case, cparam_map, cvar_map) array_ref = generate_c_expr(pipe, func(*arglist), cparam_map, cvar_map) assign = genc.CAssign(array_ref, case_expr) lbody.add(assign, False) elif (isinstance(case, Case)): c_cond = generate_c_cond(pipe, case.condition, cparam_map, cvar_map) case_expr = generate_c_expr(pipe, case.expression, cparam_map, cvar_map) cif = genc.CIfThen(c_cond) if (isinstance(case.expression, AbstractExpression)): array_ref = generate_c_expr(pipe, func(*arglist), cparam_map, cvar_map) assign = genc.CAssign(array_ref, case_expr) # FIXME: aliased referencing works, but direct call to # add method fails with assertion on block._is_open() with cif.if_block as ifblock: ifblock.add(assign)
def create_perfect_nested_loop(pipe, group, pipe_body, variables, domains, cparam_map, cvar_map): lbody = pipe_body for i in range(0, len(variables)): var = cvar_map[variables[i]] # Convert lb and ub expressions to C expressions lb = generate_c_expr(pipe, domains[i].lowerBound, cparam_map, cvar_map) ub = generate_c_expr(pipe, domains[i].upperBound, cparam_map, cvar_map) var_decl = genc.CDeclaration(var.typ, var, lb) comp_op = '<=' cond = genc.CCond(var, comp_op, ub) incr = genc.CAssign(var, var + 1) loop = genc.CFor(var_decl, cond, incr) lbody.add(loop, False) lbody = loop.body return lbody
def generate_code_for_pipeline(pipeline, is_extern_c_func=False, are_io_void_ptrs=False): g_schedule = pipeline.group_schedule sorted_groups = sort_scheduled_objs(g_schedule) # Create a top level module for the pipeline m = genc.CModule('Pipeline') # 1. Add header files which are requried by the pipeline with m.includes as inc_block: inc_block.add(genc.CInclude('stdio.h')) inc_block.add(genc.CInclude('stdlib.h')) inc_block.add(genc.CInclude('malloc.h')) inc_block.add(genc.CInclude('cmath')) inc_block.add(genc.CInclude('string.h')) if 'pool_alloc' in pipeline.options: inc_block.add(genc.CInclude('simple_pool_allocator.h')) inc_block.add(genc.CMacroDecl(genc.c_macro_min)) inc_block.add(genc.CMacroDecl(genc.c_macro_max)) inc_block.add(genc.CMacroDecl(genc.c_macro_floord)) # 2. Add function blocks with m.funcs as func_block: # Maps from pipeline parameters and functions to c variables and # arrays. These maps are sent to each group for code generation. # They are updated during the code generation of each group to # include liveout functions of the group. cparam_map = {} # Dictonary with all the pipeline arguments pipeline_args = OrderedDict() # 2.1. Collect all the inputs and parameters of the pipeline and # add them as pipeline function arguments. params = [] for g in sorted_groups: params = params + g.getParameters() # Remove duplicates and sort by name params = list(set(params)) params.sort(key=lambda x: x.name) # 2.1. collect pipeline parameters for param in params: cvar_type = genc.TypeMap.convert(param.typ) cvar = genc.CVariable(cvar_type, param.name) # Bind parameters to C variables cparam_map[param] = cvar pipeline_args[cvar] = cvar.typ # 2.2. collect inputs inputs = sorted(pipeline.inputs, key=lambda x: x.name) for img in inputs: if are_io_void_ptrs: img_type = genc.c_void cptr = genc.CPointer(img_type, 1) cvar = genc.CVariable(cptr, img.name + '_void_arg') else: img_type = genc.TypeMap.convert(img.typ) cptr = genc.CPointer(img_type, 1) cvar = genc.CVariable(cptr, img.name) pipeline_args[cvar] = cvar.typ # 2.3. collect outputs outputs = sorted(pipeline.outputs, key=lambda x: x.name) pass_by_type = genc.CReference for out in outputs: if are_io_void_ptrs: out_typ = genc.c_void cptr = pass_by_type(out_typ, 1) cvar = genc.CVariable(cptr, out.name + '_void_arg') else: out_typ = genc.TypeMap.convert(out.typ) cptr = pass_by_type(out_typ, 1) cvar = genc.CVariable(cptr, out.name) pipeline_args[cvar] = cvar.typ # 2.4. function name and declaration cpipe_name = 'pipeline_' + pipeline.name cpipe = genc.CFunction(genc.c_void, cpipe_name, pipeline_args) cpipe_decl = genc.CFunctionDecl(cpipe, is_extern_c_func, are_io_void_ptrs) cpipe_body = genc.CFunctionBody(cpipe_decl) func_block.add(cpipe_body) # 2.5. function body with cpipe_body.body as pbody: # If the code being generated is going to be compiled as shared # library, and used by python (through ctypes), the i/o data array # pointers will be given as void pointers. These should be casted # to their respective types first. # 2.5.1. typecast the i/o array ptrs if are_io_void_ptrs: inouts = list(set(inputs) | set(outputs)) for inout in inouts: # actual input to be used var_type = genc.TypeMap.convert(inout.typ) var_ptr = genc.CPointer(var_type, 1) var = genc.CVariable(var_type, inout.name) var_decl = genc.CDeclaration(var_ptr, var) pbody.add(var_decl) # dummy void * input/output taken as argument dummy_type = genc.TypeMap.convert(inout.typ) dummy_ptr = genc.CPointer(dummy_type, 1) dummy_cast = genc.CCast(dummy_ptr, inout.name + '_void_arg') var_assign = genc.CAssign(var, dummy_cast) pbody.add(var_assign) # Boolean to check if the memory allocations should be done using # malloc or the custom pool allocator pooled = 'pool_alloc' in pipeline.options early_free = 'early_free' in pipeline.options # arrays allocated alloc_arrays = [] # output arrays - not to be de/allocated out_comps = [pipeline.func_map[func] for func in pipeline.outputs] out_arrays = [comp.array for comp in out_comps] # 3. generate code for each group, deallocate arrays not going to # be used by any group further free_list = [] for g in sorted_groups: generate_code_for_group(pipeline, g, pbody, out_arrays, alloc_arrays, cparam_map, outputs) if early_free: # deallocate arrays now for array in pipeline.free_arrays[g]: array.deallocate(pbody, pooled) else: free_list += pipeline.free_arrays[g] if not early_free: free_list = list(set(free_list)) # deallocate arrays now for array in free_list: array.deallocate(pbody, pooled) return m
def generate_c_naive_from_isl_ast(pipe, polyrep, node, body, cparam_map, pooled, perfect_loopnest, indent=0): if node.get_type() == isl._isl.ast_node_type.block: num_nodes = (node.block_get_children().n_ast_node()) for i in range(0, num_nodes): child = node.block_get_children().get_ast_node(i) generate_c_naive_from_isl_ast(pipe, polyrep, child, body, cparam_map, pooled, perfect_loopnest, indent + 1) else: if node.get_type() == isl._isl.ast_node_type.for_: # Convert lb and ub expressions to C expressions prologue = [] cond = isl_cond_to_cgen(node.for_get_cond(), prologue) var = isl_expr_to_cgen(node.for_get_iterator()) # *** log_loop_start(var, indent) var_inc = isl_expr_to_cgen(node.for_get_inc()) incr = genc.CAssign(var, var + var_inc) if prologue is not None: for s in prologue: body.add(s) prologue = [] init = isl_expr_to_cgen(node.for_get_init(), prologue) if prologue is not None: for s in prologue: body.add(s) var_decl = genc.CDeclaration(var.typ, var, init) loop = genc.CFor(var_decl, cond, incr) # Check if the loop is a parallel or a vector dimension by # examining the loop body. user_nodes = get_user_nodes_in_body(node.for_get_body()) dim_parallel = is_sched_dim_parallel(polyrep, user_nodes, var.name) dim_vector = is_sched_dim_vector(polyrep, user_nodes, var.name) arrays = get_arrays_for_user_nodes(pipe, polyrep, user_nodes) # number of loops in the perfectly nested loop n_ploops = len(perfect_loopnest) if dim_parallel: omp_par_str = "omp parallel for schedule(static)" if n_ploops > 1: outer_loop = perfect_loopnest[0] # outer loop if node == outer_loop: omp_par_str += " collapse(" + str(n_ploops) + ")" omp_pragma = genc.CPragma(omp_par_str) body.add(omp_pragma) if dim_vector: vec_pragma = genc.CPragma("ivdep") body.add(vec_pragma) body.add(loop) # Assuming only one parallel dimension and a whole lot # of other things. freelist = [] array_writers = pipe.array_writers # check if this loop is at the right level to allocate # thread-local scratchpads flat_scratch = 'flatten_scratchpad' in pipe.options scratchpad_loop = True if n_ploops >= 1: # innermost perfect loop if node != perfect_loopnest[n_ploops - 1]: scratchpad_loop = False else: scratchpad_loop = dim_parallel if scratchpad_loop: with loop.body as lbody: for array in arrays: # add a comment line with a list of comps using this # array. comment = add_users_as_comment(pipe, array) lbody.add(comment) #if array.is_constant_size(): if array.is_constant_size() or True: array_decl = genc.CArrayDecl(array, flat_scratch) lbody.add(array_decl) else: array_ptr = genc.CPointer(array.typ, 1) array_decl = genc.CDeclaration(array_ptr, array) lbody.add(array_decl) array.allocate_contiguous(lbody) freelist.append(array) with loop.body as lbody: generate_c_naive_from_isl_ast(pipe, polyrep, node.for_get_body(), lbody, cparam_map, pooled, perfect_loopnest, indent + 1) # Deallocate storage for array in freelist: array.deallocate(lbody, pooled) # *** log_loop_end(indent) if node.get_type() == isl._isl.ast_node_type.if_: if_cond = isl_cond_to_cgen(node.if_get_cond()) if node.if_has_else(): cif_else = genc.CIfThenElse(if_cond) with cif_else.if_block as ifblock: generate_c_naive_from_isl_ast(pipe, polyrep, node.if_get_then(), ifblock, cparam_map, pooled, perfect_loopnest, indent + 1) with cif_else.else_block as elseblock: generate_c_naive_from_isl_ast(pipe, polyrep, node.if_get_else(), elseblock, cparam_map, pooled, perfect_loopnest, indent + 1) body.add(cif_else) else: cif = genc.CIfThen(if_cond) with cif.if_block as ifblock: generate_c_naive_from_isl_ast(pipe, polyrep, node.if_get_then(), ifblock, cparam_map, pooled, perfect_loopnest, indent + 1) body.add(cif) if node.get_type() == isl._isl.ast_node_type.user: # The first argument is the computation object. # Retrieving the polyPart. part_id = node.user_get_expr().get_op_arg(0).get_id() poly_part = isl_get_id_user(part_id) if isinstance(poly_part.expr, Reduce): generate_c_naive_from_accumlate_node(pipe, polyrep, node, body, cparam_map) elif isinstance(poly_part.expr, AbstractExpression): generate_c_naive_from_expression_node(pipe, polyrep, node, body, cparam_map) else: assert ("Invalid pipeline stage type:"+str(poly_part.expr) \ and False)
def generate_c_naive_from_expression_node(pipe, polyrep, node, body, cparam_map): part_id = node.user_get_expr().get_op_arg(0).get_id() poly_part = isl_get_id_user(part_id) variables = poly_part.comp.func.variables dom_len = len(variables) # Get the mapping to the array array = poly_part.comp.array scratch = poly_part.comp.scratch acc_scratch = [False for i in range(0, dom_len)] for i in range(0, dom_len): if i in poly_part.dim_tile_info: if (poly_part.dim_tile_info[i][0] != 'none'): acc_scratch[i] = True cvar_map = \ cvariables_from_variables_and_sched(node, variables, poly_part.sched) arglist = [] scratch_map = {} for i in range(0, dom_len): acc_expr = variables[i] - \ poly_part.comp.func.domain[i].lowerBound if acc_scratch[i]: var_name = variables[i].name #dim = \ # poly_part.sched.find_dim_by_name(isl._isl.dim_type.in_, # '_Acc_' + var_name) #dim_rem = \ # poly_part.sched.find_dim_by_name(isl._isl.dim_type.in_, # '_Rem_' + var_name) mul_rem = \ poly_part.sched.find_dim_by_name(isl._isl.dim_type.in_, '_Mul_' + var_name) #org_var = Variable(Int, '_Acc_' + var_name) #rem_var = Variable(Int, '_Rem_' + var_name) #cvar_map[org_var] = \ # isl_expr_to_cgen(node.user_get_expr().get_op_arg(dim+1)) #cvar_map[rem_var] = \ # isl_expr_to_cgen(node.user_get_expr().get_op_arg(dim_rem+1)) mul_var = Variable(Int, '_Mul_' + var_name) cvar_map[mul_var] = \ isl_expr_to_cgen(node.user_get_expr().get_op_arg(mul_rem+1)) scratch_map[variables[i]] = (mul_var) if scratch[i]: acc_expr = (mul_var) c_expr = \ generate_c_expr(pipe, acc_expr, cparam_map, cvar_map, scratch_map) arglist.append(c_expr) prologue = [] expr = generate_c_expr(pipe, poly_part.expr, cparam_map, cvar_map, scratch_map, prologue_stmts=prologue) assign = genc.CAssign(array(*arglist), expr) if prologue is not None: for s in prologue: body.add(s) if poly_part.pred: ccond = generate_c_cond(pipe, poly_part.pred, cparam_map, cvar_map, scratch_map) cif = genc.CIfThen(ccond) with cif.if_block as ifblock: ifblock.add(assign) body.add(cif) #var = genc.CVariable(genc.c_int, "_c_" + poly_part.comp.func.name) #incr = genc.CAssign(var, var + 1) #body.add(incr) else: body.add(assign)