def visit_AugAssign(self, node): node.value = self.visit(node.value) if not self.vectorize: node.target = self.visit(node.target) return node if util.contains_symbol(node.target, self.loop_var): return simd_macros.mm256_store_ps( node.target, C.BinaryOp(self.visit(node.target), node.op, node.value)) elif isinstance(node.op, C.Op.Add) and isinstance(node.value, C.BinaryOp) and \ isinstance(node.value.op, C.Op.Mul): # if not isinstance(node.target, C.SymbolRef): # node.value = C.FunctionCall(C.SymbolRef("vsum"), [node.value]) # return node # else: return C.Assign( node.target, C.FunctionCall( C.SymbolRef("_mm256_fmadd_ps"), [node.value.left, node.value.right, node.target])) elif isinstance(node.op, C.Op.Add) and isinstance( node.value, C.FunctionCall): # TODO: Verfiy it's a vector intrinsic return C.Assign( node.target, C.FunctionCall(C.SymbolRef("_mm256_add_ps"), [node.value, node.target])) elif isinstance(node.target, C.BinaryOp) and isinstance( node.target.op, C.Op.ArrayRef): raise NotImplementedError() node.target = self.visit(node.target) return node
def test_recursive_fold(self): tree = C.Assign( C.SymbolRef("c"), C.Add(C.Add(C.Constant(2), C.Constant(-2)), C.SymbolRef("b"))) tree = ConstantFold().visit(tree) self.assertEqual( str(tree), str(C.Assign(C.SymbolRef("c"), C.SymbolRef("b"))))
def transform(self, py_ast, program_cfg): arg_cfg, tune_cfg = program_cfg tree = PyBasicConversions().visit(py_ast) param_dict = {} tree.body[0].params.append(C.SymbolRef("retval", arg_cfg[0]())) # Annotate arguments for param, type in zip(tree.body[0].params, arg_cfg): param.type = type() param_dict[param.name] = type._dtype_ length = np.prod(arg_cfg[0]._shape_) transformer = MapTransformer("i", param_dict, "retval") body = list(map(transformer.visit, tree.body[0].defn)) tree.body[0].defn = [C.For( C.Assign(C.SymbolRef("i", ct.c_int()), C.Constant(0)), C.Lt(C.SymbolRef("i"), C.Constant(length)), C.PostInc(C.SymbolRef("i")), body=body, pragma="ivdep" )] tree = DeclarationFiller().visit(tree) defns = [] tree = HwachaVectorize(param_dict, defns).visit(tree) file_body = [ StringTemplate("#include <stdlib.h>"), StringTemplate("#include <stdint.h>"), StringTemplate("#include <assert.h>"), StringTemplate("extern \"C\" void __hwacha_body(void);"), ] file_body.extend(defns) file_body.append(tree) return [CFile("generated", file_body)]
def gen_loop_nest(self, loopvars, cfg): body = [] node = C.For( C.Assign(C.SymbolRef(loopvars[0], ct.c_int()), C.Constant(0)), C.Lt(C.SymbolRef(loopvars[0]), C.Constant(cfg.shape[0])), C.PostInc(C.SymbolRef(loopvars[0])), body) curr_node = node for loopvar, dim in zip(loopvars[1:], cfg.shape[1:]): curr_node = C.For( C.Assign(C.SymbolRef(loopvar, ct.c_int()), C.Constant(0)), C.Lt(C.SymbolRef(loopvar), C.Constant(dim)), C.PostInc(C.SymbolRef(loopvar)), []) body.append(curr_node) body = curr_node.body self.loop_shape_map[loopvars] = cfg.shape return node, curr_node
def visit(self, node): node = super().visit(node) if hasattr(node, 'body'): # [collector.visit(s) for s in node.body] new_body = [] seen = {} stores = [] collector = VectorLoadCollector() for s in node.body: collector.visit(s) for stmt in collector.loads.keys(): if stmt not in seen: reg = self._gen_register() load_node, number, func = collector.loads[stmt] seen[stmt] = (reg, load_node, func) self.sym[reg] = get_simd_type()() new_body.append( C.Assign( C.SymbolRef(reg, get_simd_type()()), C.FunctionCall(C.SymbolRef(func), [load_node]))) if isinstance( s, C.FunctionCall ) and "_mm" in s.func.name and "_store" in s.func.name: if s.args[0].codegen() in seen: stores.append((s.args[0], seen[s.args[0].codegen()][0], s.func.name)) s = C.Assign(C.SymbolRef(seen[s.args[0].codegen()][0]), s.args[1]) for stmt in seen.keys(): reg, load_node, func = seen[stmt] replacer = VectorLoadReplacer( C.FunctionCall(C.SymbolRef(func), [load_node]).codegen(), C.SymbolRef(reg)) s = replacer.visit(s) new_body.append(s) for target, value, name in stores: if "epi32" in name: new_body.append(store_epi32(target, C.SymbolRef(value))) elif "ps" in name: new_body.append(store_ps(target, C.SymbolRef(value))) else: assert (false) node.body = util.flatten(new_body) return node
def visit_For(self, node): node.body = util.flatten([self.visit(s) for s in node.body]) if node.init.left.name == self.enclosing_loop_var: new_body = [] added_code = False prefetch_count = self.prefetch_count for stmt in node.body: new_body.append(stmt) if prefetch_count > 0 and isinstance(stmt, C.BinaryOp) and isinstance(stmt.op, C.Op.Assign) and \ isinstance(stmt.right, C.FunctionCall) and "_mm" in stmt.right.func.name \ and ("_load_" in stmt.right.func.name or "_set1" in stmt.right.func.name or "_broadcast" in stmt.right.func.name): ast.dump(stmt.right.args[0]) if check_name(stmt.right.args[0], self.prefetch_field): array_ref = deepcopy(stmt.right.args[0]) new_array_ref = self.rewrite_arg(array_ref) where_to_add = new_body prefetch_count -= 1 if node.init.left.name != self.prefetch_dest_loop: where_to_add = HoistPrefetch.escape_body added_code = True where_to_add.append( C.FunctionCall( C.SymbolRef(prefetch_symbol_table[ self.cacheline_hint]), [ C.Add(new_array_ref, C.SymbolRef("prefetch_offset_var")) ])) where_to_add.append( C.Assign( C.SymbolRef("prefetch_offset_var"), C.Add(C.SymbolRef("prefetch_offset_var"), C.Constant(self.prefetch_offset)))) if added_code: InitPrefetcher.init_body.append( C.Assign( C.SymbolRef("prefetch_offset_var", ctypes.c_int()), C.Constant(0))) node.body = new_body return node
def visit_AugAssign(self, node): node.value = self.visit(node.value) if util.contains_symbol(node.target, self.loop_var): if not util.contains_symbol(node.target.right, self.loop_var): target = self.visit(deepcopy(node.target)) curr_node = node.target idx = 1 while curr_node.left.right.name != self.loop_var: curr_node = curr_node.left idx += 1 curr_node.left = curr_node.left.left node.target = C.ArrayRef(node.target, C.SymbolRef(self.loop_var)) while not isinstance(curr_node, C.SymbolRef): curr_node = curr_node.left if curr_node.name in self.transposed_buffers and self.transposed_buffers[ curr_node.name] != idx: raise NotImplementedError() self.transposed_buffers[curr_node.name] = idx curr_node.name += "_transposed" if isinstance(node.target.right, C.Constant) and node.target.value == 0.0: return store_ps(node.target.left, C.BinaryOp(target, node.op, node.value)) else: return store_ps(C.Ref(node.target), C.BinaryOp(target, node.op, node.value)) else: if isinstance(node.target.right, C.Constant) and node.target.value == 0.0: return store_ps( node.target.left, C.BinaryOp(self.visit(node.target), node.op, node.value)) else: return store_ps( C.Ref(node.target), C.BinaryOp(self.visit(node.target), node.op, node.value)) elif isinstance(node.op, C.Op.Add) and isinstance( node.value, C.FunctionCall): # TODO: Verfiy it's a vector intrinsic return C.Assign( node.target, C.FunctionCall(C.SymbolRef("_mm256_add_ps"), [node.value, node.target])) elif isinstance(node.target, C.BinaryOp) and isinstance( node.target.op, C.Op.ArrayRef): raise NotImplementedError(node) node.target = self.visit(node.target) return node
def visit_For(self, node): node.body = util.flatten([s for s in node.body]) new_body = [] for stmt in node.body: if isinstance(stmt, C.FunctionCall) and "_mm" in stmt.func.name \ and "_store" in stmt.func.name and inReplaceMapSource(stmt.args[0], self.replace_map): if isinstance(stmt.args[1], C.SymbolRef): sym_arr_ref = extract_reference(stmt.args) store_in_du_map(sym_arr_ref) reg = stmt.args[1] self.seen[reg.name] = None new_body.append(stmt) elif isinstance(stmt.args[1], C.FunctionCall) and "_mm" in stmt.func.name: tmp = self._gen_register() new_body.append(C.Assign(C.SymbolRef(tmp, get_simd_type()()), deepcopy(stmt.args[1]))) new_body.append(C.FunctionCall(C.SymbolRef(stmt.func.name), [stmt.args[0],C.SymbolRef(tmp, None)])) sym_arr_ref = extract_reference(C.FunctionCall(C.SymbolRef(stmt.func.name), [stmt.args[0],C.SymbolRef(tmp, None)]).args) store_in_du_map(sym_arr_ref) # if stmt.args[0].type: # self.seen[reg.name] = stmt.args[0].type #else: self.seen[tmp] = None elif isinstance(stmt, C.BinaryOp) and \ isinstance(stmt.op, C.Op.Assign) and \ isinstance(stmt.left, C.SymbolRef) and \ isinstance(stmt.right, C.FunctionCall) and "_mm" in stmt.right.func.name and "_load" in stmt.right.func.name and inReplaceMapSink(stmt.right.args[0], self.replace_map): #print(stmt.right.args[0]) source = get_alias(stmt.right.args, self.replace_map) #print(source) if (source is not None): sym_arr_ref = construct_arr_reference(source, deepcopy(stmt.right.args)) if in_du_map(sym_arr_ref): reg = get_register(sym_arr_ref) #print(reg.name) if str(reg.name) in self.seen: #print(reg.name) sym_map[stmt.left.name] = reg else: new_body.append(stmt) else: new_body.append(stmt) else: new_body.append(stmt) else: new_body.append(stmt) node.body = util.flatten([self.visit(s) for s in new_body]) return node
def visit(self, node): node = super().visit(node) if hasattr(node, 'body'): # [collector.visit(s) for s in node.body] newbody = [] for s in node.body: if isinstance(s, C.BinaryOp) and isinstance(s.op, C.Op.Assign): # Anand - needs more work 27th June 2017 if isinstance(s.left, C.SymbolRef) and (s.left.type is not None) and s.left.name in self.variables \ and s.left.name not in self.defs: y = self._gen_register() new_stmt = C.Assign( C.SymbolRef(y, get_simd_type(s.left.type)()), broadcast_ss(C.SymbolRef(s.left.name, None), s.left.type)) newbody.append(s) newbody.append(new_stmt) self.defs[s.left.name] = C.SymbolRef(y, None) self.symbol_table[y] = get_simd_type(s.left.type)() else: for i in self.defs: s = replace_symbol(i, self.defs[i], s) if (isinstance(s.left.type, get_simd_type(ctypes.c_int())) or isinstance( s.left.type, get_simd_type( ctypes.c_float()))) and isinstance( s.right, C.SymbolRef): s.right = broadcast_ss( C.SymbolRef(s.right.name, None), s.left.type) elif isinstance(s.left, C.SymbolRef) and s.left.name in self.symbol_table and\ (isinstance(self.symbol_table[s.left.name], get_simd_type(ctypes.c_int())) or isinstance(self.symbol_table[s.left.name], get_simd_type(ctypes.c_float()))) and isinstance(s.right, C.SymbolRef): s.right = broadcast_ss( C.SymbolRef(s.right.name, None), self.symbol_table[s.left.name]) newbody.append(s) else: for i in self.defs: s = replace_symbol(i, self.defs[i], s) newbody.append(s) node.body = util.flatten(newbody) return node
def visit_FunctionDecl(self, node): new_body = [] count = 0 for statement in node.defn: if isinstance(statement, ast.For) or isinstance(statement, C.For): pre = C.SubAssign(C.ArrayRef(C.SymbolRef('times'), C.Constant(count)),C.FunctionCall('omp_get_wtime', [])) post = C.AddAssign(C.ArrayRef(C.SymbolRef('times'), C.Constant(count)),C.FunctionCall('omp_get_wtime', [])) new_body.append(pre) new_body.append(statement) new_body.append(post) count = count + 1 else: new_body.append(statement) memset = C.Assign(C.SymbolRef('times'), C.FunctionCall(C.SymbolRef('calloc_doubles'),[C.Constant(count)])) new_body.insert(0, memset) new_body.insert(0, C.Assign(C.SymbolRef("*times", ctypes.c_double()), C.Constant(0))) for i in range(0,count): print_stmt = C.FunctionCall(C.SymbolRef('printf'),[C.String("\ttimes[%d] = %g\\n"), C.Constant(i), C.ArrayRef(C.SymbolRef('times'), C.Constant(i))]) new_body.append(print_stmt) node.defn = new_body return node
def gen_vector_cmp_instruction(dest, src1, src2, type_map, symbol_map): src1_type = get_type(src1, type_map, symbol_map) src2_type = get_type(src2, type_map, symbol_map) assert (src1_type is not None) assert (src2_type is not None) if isinstance(src1_type, simd.types.m256) and isinstance( src2_type, simd.types.m256): return C.Assign( dest, C.FunctionCall( C.SymbolRef("_mm256_cmp_ps_mask"), [src1, src2, C.SymbolRef("_MM_CMPINT_GT", None)])) elif isinstance(src1_type, simd.types.m512) and isinstance( src2_type, simd.types.m512): return C.Assign( dest, C.FunctionCall( C.SymbolRef("_mm512_cmp_ps_mask"), [src1, src2, C.SymbolRef("_MM_CMPINT_GT", None)])) elif isinstance(src1_type, simd.types.m256i) and isinstance( src2_type, simd.types.m256i): return C.Assign( dest, C.FunctionCall( C.SymbolRef("_mm256_cmp_epi32_mask"), [src1, src2, C.SymbolRef("_MM_CMPINT_GT", None)])) elif isinstance(src1_type, simd.types.m512i) and isinstance( src2_type, simd.types.m512i): return C.Assign( dest, C.FunctionCall( C.SymbolRef("_mm512_cmp_epi32_mask"), [src1, src2, C.SymbolRef("_MM_CMPINT_GT", None)])) else: assert (False)
def visit_For(self, node): """ Find the innermost loop to insert a load and store of the target register target is either "value" or "grad" depending on direction """ node.body = [self.visit(s) for s in node.body] if node.init.left.name == self.target_loop_var: for var, seen in self.seen.values(): node.body.insert(0, C.Assign( C.SymbolRef(var, ctypes.c_float()), seen )) # we only store the value register as "grad" is only read by definition if self.target == "value": for var, seen in self.seen.values(): node.body.append( C.Assign( seen, C.SymbolRef(var) )) return node
def gen_mask_move_instruction(dest, src1, selector, src2, type_map, symbol_map): src1_type = get_type(src1, type_map, symbol_map) src2_type = get_type(src2, type_map, symbol_map) #assert(src1_type == src2_type) assert (src1_type is not None) assert (src2_type is not None) if isinstance(src1_type, simd.types.m256) and isinstance( src2_type, simd.types.m256): return C.Assign( dest, C.FunctionCall(C.SymbolRef("_mm256_mask_mov_ps"), [src1, selector, src2])) elif isinstance(src1_type, simd.types.m512) and isinstance( src2_type, simd.types.m512): return C.Assign( dest, C.FunctionCall(C.SymbolRef("_mm512_mask_mov_ps"), [src1, selector, src2])) elif isinstance(src1_type, simd.types.m256i) and isinstance( src2_type, simd.types.m256i): return C.Assign( dest, C.FunctionCall(C.SymbolRef("_mm256_mask_mov_epi32"), [src1, selector, src2])) elif isinstance(src1_type, simd.types.m512i) and isinstance( src2_type, simd.types.m512i): return C.Assign( dest, C.FunctionCall(C.SymbolRef("_mm512_mask_mov_epi32"), [src1, selector, src2])) else: assert (False)
def block_loop(self, node): loopvar = node.init.left.name loopvar += loopvar self.nest.insert( 0, C.For( C.Assign(C.SymbolRef(loopvar, node.init.left.type), node.init.right), C.Lt(C.SymbolRef(loopvar), node.test.right), C.AddAssign(C.SymbolRef(loopvar), C.Constant(self.block_factor)), [None])) node.init.right = C.SymbolRef(loopvar) node.test.right = C.FunctionCall(C.SymbolRef("fmin"), [ C.Add(C.SymbolRef(loopvar), C.Constant(self.block_factor)), node.test.right ])
def visit_FunctionDef(self, node): self.decls = {} node.defn = [self.visit(s) for s in node.body] new_params = [] for param in node.args.args: if sys.version_info > (3, 0): _id = param.arg else: _id = param.id if _id == 'self': continue value = self.symbol_table[_id] if isinstance(value, Array): _type = np.ctypeslib.ndpointer(value.dtype, value.ndim, value.shape)() else: _type = get_ctype(value) new_params.append(C.SymbolRef(_id, _type)) for name, value in self.decls.items(): if isinstance(value, Array): type = np.ctypeslib.ndpointer(value.dtype, value.ndim, value.shape)() value = value.ctypes.data new_params.append(C.SymbolRef(name, type)) else: if value is True: value = 1 type = ct.c_int() elif value is False: value = 0 type = ct.c_int() else: type = get_ctype(value) node.body.insert( 0, C.Assign(C.SymbolRef(name, type), C.Constant(value))) node.args.args = new_params return node
def visit_RangeDim(self, node): iter = node.child_for.iter ensemble = node.ensemble ndim = node.mapping.ndim dim = iter.args[1].n offset = node.mapping.get_offset(dim) step = node.mapping.get_step(dim) length = len(node.mapping.shape[dim]) if isinstance(iter, ast.Call) and iter.func.id == "range_dim": loop_var = node.child_for.target.id body = [] body += [self.visit(s) for s in node.child_for.body] # FIXME: This check does not cover general cases #ANAND-special casing for LRN, needs refactoring if isinstance(self.ensemble, latte.ensemble.LRNEnsemble ) and length < latte.config.SIMDWIDTH: if ( self.direction == "forward" and "inputs" in self.ensemble.tiling_info and any(dim == x[0] for x in self.ensemble.tiling_info["inputs"]) ) or (self.direction in ["backward", "update_internal"] and "grad_inputs" in self.ensemble.tiling_info and any( dim == x[0] for x in self.ensemble.tiling_info["grad_inputs"])): body = [ UpdateInputIndices( loop_var + "_outer", C.Div( C.Add( C.SymbolRef(loop_var), C.SymbolRef( "_input_offset_{}_inner".format(dim + 1))), C.Constant(latte.config.SIMDWIDTH))).visit(s) for s in body ] body = [ UpdateInputIndices( "_input_offset_{}_inner".format(dim + 1), C.Constant(0)).visit(s) for s in body ] body = [ UpdateInputIndices( loop_var + "_inner", C.Mod( C.Add( C.SymbolRef(loop_var), C.SymbolRef( "_input_offset_{}_inner".format(dim + 1))), C.Constant(latte.config.SIMDWIDTH))).visit(s) for s in body ] return C.For( C.Assign(C.SymbolRef(loop_var, ctypes.c_int()), C.Constant(0)), C.Lt(C.SymbolRef(loop_var), C.Constant(length)), C.AddAssign(C.SymbolRef(loop_var), C.Constant(1)), body, # "unroll_and_jam({})".format(length) # "unroll" ) else: body = [ UpdateInputIndices( loop_var, C.Mul(C.SymbolRef(loop_var), C.Constant(step))).visit(s) for s in body ] return C.For( C.Assign(C.SymbolRef(loop_var, ctypes.c_int()), C.Constant(0)), C.Lt(C.SymbolRef(loop_var), C.Constant(length)), C.AddAssign(C.SymbolRef(loop_var), C.Constant(1)), body, # "unroll_and_jam({})".format(length) # "unroll" ) elif ( self.direction == "forward" and "inputs" in self.ensemble.tiling_info and any(dim == x[0] for x in self.ensemble.tiling_info["inputs"]) ) or (self.direction in ["backward", "update_internal"] and "grad_inputs" in self.ensemble.tiling_info and any(dim == x[0] for x in self.ensemble.tiling_info["grad_inputs"])): outer_loop = C.For( C.Assign(C.SymbolRef(loop_var + "_outer", ctypes.c_int()), C.Constant(0)), C.Lt(C.SymbolRef(loop_var + "_outer"), C.Constant(length // latte.config.SIMDWIDTH)), C.AddAssign(C.SymbolRef(loop_var + "_outer"), C.Constant(1)), []) self.tiled_loops.append(outer_loop) if self.direction == "forward" and length < latte.config.SIMDWIDTH: inner_loop = C.For( C.Assign( C.SymbolRef(loop_var + "_inner", ctypes.c_int()), C.Constant(0)), C.Lt(C.SymbolRef(loop_var + "_inner"), C.Constant(length)), C.AddAssign(C.SymbolRef(loop_var + "_inner"), C.Constant(1)), body, ) else: inner_loop = C.For( C.Assign( C.SymbolRef(loop_var + "_inner", ctypes.c_int()), C.Constant(0)), C.Lt(C.SymbolRef(loop_var + "_inner"), C.Constant(latte.config.SIMDWIDTH)), C.AddAssign(C.SymbolRef(loop_var + "_inner"), C.Constant(1)), body, ) return inner_loop else: body = [ UpdateInputIndices( loop_var, C.Mul(C.SymbolRef(loop_var), C.Constant(step))).visit(s) for s in body ] return C.For( C.Assign(C.SymbolRef(loop_var, ctypes.c_int()), C.Constant(0)), C.Lt(C.SymbolRef(loop_var), C.Constant(length)), C.AddAssign(C.SymbolRef(loop_var), C.Constant(1)), body, # "unroll_and_jam({})".format(length) # "unroll" ) raise NotImplementedError()
def visit_For(self, node): if hasattr(node, 'parallel') and node.parallel: # Supports depth one nesting with collapse loopvar1 = node.init.left.name looplen1 = node.test.right to_return = [] if all(isinstance(s, C.For) and hasattr(s, 'parallel') and s.parallel for s in node.body): for s in node.body: body = s.body kernel_args = set() loopvar2 = s.init.left.name looplen2 = s.test.right kernel_name = self._gen_unique_kernel_name() params = self.collect_args_and_insert_casts(kernel_args, body) body.insert(0, C.Assign( C.SymbolRef(loopvar1, ctypes.c_int()), C.FunctionCall(C.SymbolRef("get_global_id"), [C.Constant(0)]) )) body.insert(0, C.Assign( C.SymbolRef(loopvar2, ctypes.c_int()), C.FunctionCall(C.SymbolRef("get_global_id"), [C.Constant(1)]) )) kernel_src = C.FunctionDecl(None, C.SymbolRef(kernel_name), params, body) kernel_src.set_kernel() self.build_kernel(kernel_src, kernel_name, kernel_args) to_return.append(StringTemplate( """ size_t global_size_{kernel_name}[2] = {{{looplen1}, {looplen2}}}; clEnqueueNDRangeKernel(queue, {kernel_name}, 2, NULL, global_size_{kernel_name}, NULL, 0, NULL, NULL); clFinish(queue); """.format( kernel_name=kernel_name, looplen1=looplen1, looplen2=looplen2) )) else: kernel_args = set() body = node.body kernel_name = self._gen_unique_kernel_name() params = self.collect_args_and_insert_casts(kernel_args, body) body.insert(0, C.Assign( C.SymbolRef(loopvar1, ctypes.c_int()), C.FunctionCall(C.SymbolRef("get_global_id"), [C.Constant(0)]) )) kernel_src = C.FunctionDecl(None, C.SymbolRef(kernel_name), params, body) kernel_src.set_kernel() self.build_kernel(kernel_src, kernel_name, kernel_args) to_return.append(StringTemplate( """ size_t global_size_{kernel_name}[1] = {{{looplen1}}}; clEnqueueNDRangeKernel(queue, {kernel_name}, 1, NULL, global_size_{kernel_name}, NULL, 0, NULL, NULL); clFinish(queue); """.format( kernel_name=kernel_name, looplen1=looplen1) )) if hasattr(node, 'reduce_vars') and len(node.reduce_vars) > 0: for var in node.reduce_vars: size = np.prod(self.buffers[var].shape[1:]) to_return.append(self._gen_reduce_for_loop(node, var, size)) return to_return else: raise NotImplementedError(node) return node
def visit_Return(self, node): node.value = self.visit(node.value) return C.Assign(C.ArrayRef(C.SymbolRef(self.retval_name), C.SymbolRef(self.loopvar)), node.value)
def visit_Return(self, node): value = self.visit(node.value) return C.Assign(StringTemplate(self.target.get_element()), value)
def visit_For(self, node): for j in range(1, self.factor): UnrollStatementsNoJam.new_body[j] = [] # UnrollStatementsNoJam.new_body={} #for i in node.body: #new_body_cpy = deepcopy(UnrollStatementsNoJam.new_body) #node.body = [self.visit(s) for s in node.body] newbody = [] for s in node.body: temp = deepcopy(UnrollStatementsNoJam.new_body) t = self.visit(s) stmt2 = deepcopy(t) stmt = deepcopy(t) if self.unroll_type == 0: s = util.replace_symbol( self.target_var, C.Add(C.SymbolRef(self.target_var), C.Constant(0)), stmt) else: s = util.replace_symbol( self.target_var, C.Add( C.Mul(C.Constant(self.factor), C.SymbolRef(self.target_var)), C.Constant(0)), stmt) newbody.append(t) if not isinstance(t, C.For): for i in range(1, self.factor): stmt = deepcopy(stmt2) if self.unroll_type == 0: if i in UnrollStatementsNoJam.new_body: UnrollStatementsNoJam.new_body[i].append( util.replace_symbol( self.target_var, C.Add(C.SymbolRef(self.target_var), C.Constant(i)), stmt)) else: UnrollStatementsNoJam.new_body[i] = [ util.replace_symbol( self.target_var, C.Add(C.SymbolRef(self.target_var), C.Constant(i)), stmt) ] elif self.unroll_type == 1: if i in UnrollStatementsNoJam.new_body: UnrollStatementsNoJam.new_body[i].append( util.replace_symbol( self.target_var, C.Add( C.Mul(C.Constant(self.factor), C.SymbolRef(self.target_var)), C.Constant(i)), stmt)) else: UnrollStatementsNoJam.new_body[i] = [ util.replace_symbol( self.target_var, C.Add( C.Mul(C.Constant(self.factor), C.SymbolRef(self.target_var)), C.Constant(i)), stmt) ] else: assert (false) else: var = t.init.left.name #if var != self.target_var: for j in range(1, self.factor): temp[j].append( C.For( C.Assign(C.SymbolRef(var, ctypes.c_int()), C.Constant(0)), C.Lt(C.SymbolRef(var), C.Constant(t.test.right.value)), C.AddAssign(C.SymbolRef(var), C.Constant(t.incr.value.value)), UnrollStatementsNoJam.new_body[j])) UnrollStatementsNoJam.new_body = deepcopy(temp) node.body = newbody return node
def gen_for(loopvar, start, end, body, pragma=""): return C.For( C.Assign(C.SymbolRef(loopvar, ctypes.c_int()), C.Constant(start)), C.Lt(C.SymbolRef(loopvar), C.Constant(end)), C.PostInc(C.SymbolRef(loopvar)), body, pragma)
def visit_Call(self, node): if isinstance(node.func, ast.Name) and node.func.id == 'len': target = self.eval_in_table(node.args[0]) return C.Constant(len(target)) if self.table_contains(node.func): fn = self.eval_in_table(node.func) params = [] args = [] for arg in node.args: if isinstance(arg, ast.Subscript): value = self.eval_in_table(arg.value) if isinstance(arg.slice.value, ast.Tuple): index = self.eval_with_loop(arg.slice.value.elts) else: index = self.eval_with_loop([arg.slice.value]) params.append(value[index]) arg = self.visit(arg) if isinstance(value[index], Array): arg = C.Ref(arg) args.append(arg) elif isinstance(arg, ast.Attribute): value = self.eval_in_table(arg) params.append(value) else: arg = self.visit(arg) if isinstance(arg, C.SymbolRef): params.append(self.decls[arg.name]) args.append(arg) elif isinstance(arg, ast.Tuple): elts = () for elt in arg.elts: if isinstance(elt, C.SymbolRef): elts += (self.eval_in_table(elt), ) else: elts += (elt, ) params.append(elts) if hasattr(fn, 'specialized_dispatch'): if fn.num_args: trimmed = params[:fn.num_args] else: trimmed = params fn = fn.fn(*params) params = trimmed cfg = fn._specializer.get_program_config(params, {}) dir_name = fn._specializer.config_to_dirname(cfg) result = fn._specializer.get_transform_result(cfg, dir_name, cache=False) block = C.Block() cfile = result[0] func = cfile.find(C.FunctionDecl, name=cfile.name) cfile.body = [s for s in cfile.body if s is not func] self.files.extend(cfile.body) block.body = func.defn for arg, param in zip(args, func.params): block.body.insert(0, C.Assign(param, arg)) return block # node.args = args # node.func = ast.Name(result[0].body[-1].name, ast.Load()) else: node.args = [self.visit(arg) for arg in node.args] return node