def create_prange_closure(env, prange_node, body, target): # Find referenced and assigned variables v = VariableFindingVisitor() v.visitlist(body) # Determine privates and reductions. Shared variables will be handled by # the closure support. privates = set(v.assigned) - set(v.reductions) reductions = v.reductions if isinstance(target, ast.Name) and target.id in reductions: # Remove target variable from reductions if present reductions.pop(target.id) privates.add(target.id) privates_struct_type = numba.struct([]) privates_struct = ast.Name('__numba_privates', ast.Param()) args = [privates_struct] func_def = ast.FunctionDef(name=templating.temp_name("prange_body"), args=ast.arguments(args=args, vararg=None, kwarg=None, defaults=[]), body=copy.deepcopy(body), decorator_list=[]) # Update outlined prange body closure func_signature = void(privates_struct_type.ref()) # func_signature.struct_by_reference = True need_closure_wrapper = False locals_dict = {'__numba_privates': privates_struct_type.ref()} func_env = env.translation.make_partial_env( func_def, func_signature=func_signature, need_closure_wrapper=need_closure_wrapper, locals=locals_dict, ) # Update prange node prange_node.func_env = func_env prange_node.privates_struct_type = privates_struct_type prange_node.privates = privates prange_node.reductions = reductions prange_node.func_def = func_def
def create_prange_closure(env, prange_node, body, target): # Find referenced and assigned variables v = VariableFindingVisitor() v.visitlist(body) # Determine privates and reductions. Shared variables will be handled by # the closure support. privates = set(v.assigned) - set(v.reductions) reductions = v.reductions if isinstance(target, ast.Name) and target.id in reductions: # Remove target variable from reductions if present reductions.pop(target.id) privates.add(target.id) privates_struct_type = numba.struct([]) privates_struct = ast.Name('__numba_privates', ast.Param()) args = [privates_struct] func_def = ast.FunctionDef(name=templating.temp_name("prange_body"), args=ast.arguments(args=args, vararg=None, kwarg=None, defaults=[]), body=copy.deepcopy(body), decorator_list=[]) # Update outlined prange body closure func_signature = void(privates_struct_type.ref()) # func_signature.struct_by_reference = True need_closure_wrapper = False locals_dict = { '__numba_privates': privates_struct_type.ref() } func_env = env.translation.make_partial_env( func_def, func_signature=func_signature, need_closure_wrapper=need_closure_wrapper, locals=locals_dict, ) # Update prange node prange_node.func_env = func_env prange_node.privates_struct_type = privates_struct_type prange_node.privates = privates prange_node.reductions = reductions prange_node.func_def = func_def
def register_array_expression(self, node, lhs=None): super(ArrayExpressionRewriteNative, self).register_array_expression( node, lhs) # llvm_module = llvm.core.Module.new(temp_name("array_expression_module")) # llvm_module = self.env.llvm_context.module lhs_type = lhs.type if lhs else node.type is_expr = lhs is None if node.type.is_array and lhs_type.ndim < node.type.ndim: # TODO: this is valid in NumPy if the leading dimensions of the # TODO: RHS have extent 1 raise error.NumbaError( node, "Right hand side must have a " "dimensionality <= %d" % lhs_type.ndim) # Create ufunc scalar kernel ufunc_ast, signature, ufunc_builder = get_py_ufunc_ast(self.env, lhs, node) # Compile ufunc scalar kernel with numba ast.fix_missing_locations(ufunc_ast) # func_env = self.env.crnt.inherit( # func=None, ast=ufunc_ast, func_signature=signature, # wrap=False, #link=False, #llvm_module=llvm_module, # ) # pipeline.run_env(self.env, func_env) #, pipeline_name='codegen') func_env, (_, _, _) = pipeline.run_pipeline2( self.env, None, ufunc_ast, signature, function_globals=self.env.crnt.function_globals, wrap=False, link=False, nopython=True, #llvm_module=llvm_module, # pipeline_name='codegen', ) llvm_module = func_env.llvm_module operands = ufunc_builder.operands operands = [nodes.CloneableNode(operand) for operand in operands] if lhs is not None: lhs = nodes.CloneableNode(lhs) broadcast_operands = [lhs] + operands lhs = lhs.clone else: broadcast_operands = operands[:] shape = slicenodes.BroadcastNode(lhs_type, broadcast_operands) operands = [op.clone for op in operands] if lhs is None and self.nopython: raise error.NumbaError( node, "Cannot allocate new memory in nopython context") elif lhs is None: # TODO: determine best output order at runtime shape = shape.cloneable lhs = nodes.ArrayNewEmptyNode(lhs_type, shape.clone, lhs_type.is_f_contig).cloneable # Build minivect wrapper kernel context = NumbaStaticArgsContext() context.llvm_module = llvm_module # context.llvm_ee = self.env.llvm_context.execution_engine b = context.astbuilder variables = [b.variable(name_node.type, "op%d" % i) for i, name_node in enumerate([lhs] + operands)] miniargs = [b.funcarg(variable) for variable in variables] body = miniutils.build_kernel_call(func_env.lfunc.name, signature, miniargs, b) minikernel = b.function_from_numpy( temp_name("array_expression"), body, miniargs) lminikernel, = context.run_simple(minikernel, specializers.StridedSpecializer) # lminikernel.linkage = llvm.core.LINKAGE_LINKONCE_ODR # pipeline.run_env(self.env, func_env, pipeline_name='post_codegen') # llvm_module.verify() del func_env assert lminikernel.module is llvm_module # print("---------") # print(llvm_module) # print("~~~~~~~~~~~~") lminikernel = self.env.llvm_context.link(lminikernel) # Build call to minivect kernel operands.insert(0, lhs) args = [shape] scalar_args = [] for operand in operands: if operand.type.is_array: data_p = self.array_attr(operand, 'data') data_p = nodes.CoercionNode(data_p, operand.type.dtype.pointer()) if not isinstance(operand, nodes.CloneNode): operand = nodes.CloneNode(operand) strides_p = self.array_attr(operand, 'strides') args.extend((data_p, strides_p)) else: scalar_args.append(operand) args.extend(scalar_args) result = nodes.NativeCallNode(minikernel.type, args, lminikernel) # Use native slicing in array expressions slicenodes.mark_nopython(ast.Suite(body=result.args)) if not is_expr: # a[:] = b[:] * c[:] return result # b[:] * c[:], return new array as expression return nodes.ExpressionNode(stmts=[result], expr=lhs.clone)
def register_array_expression(self, node, lhs=None): super(ArrayExpressionRewriteNative, self).register_array_expression( node, lhs) lhs_type = lhs.type if lhs else node.type is_expr = lhs is None if node.type.is_array and lhs_type.ndim < node.type.ndim: # TODO: this is valid in NumPy if the leading dimensions of the # TODO: RHS have extent 1 raise error.NumbaError( node, "Right hand side must have a " "dimensionality <= %d" % lhs_type.ndim) # Create ufunc scalar kernel ufunc_ast, signature, ufunc_builder = self.get_py_ufunc_ast(lhs, node) signature.struct_by_reference = True # Compile ufunc scalar kernel with numba ast.fix_missing_locations(ufunc_ast) func_env, (_, _, _) = pipeline.run_pipeline2( self.env, None, ufunc_ast, signature, function_globals={}, ) # Manual linking lfunc = func_env.lfunc # print lfunc operands = ufunc_builder.operands functions.keep_alive(self.func, lfunc) operands = [nodes.CloneableNode(operand) for operand in operands] if lhs is not None: lhs = nodes.CloneableNode(lhs) broadcast_operands = [lhs] + operands lhs = lhs.clone else: broadcast_operands = operands[:] shape = slicenodes.BroadcastNode(lhs_type, broadcast_operands) operands = [op.clone for op in operands] if lhs is None and self.nopython: raise error.NumbaError( node, "Cannot allocate new memory in nopython context") elif lhs is None: # TODO: determine best output order at runtime shape = shape.cloneable lhs = nodes.ArrayNewEmptyNode(lhs_type, shape.clone, lhs_type.is_f_contig).cloneable # Build minivect wrapper kernel context = NumbaproStaticArgsContext() context.llvm_module = self.env.llvm_context.module # context.debug = True context.optimize_broadcasting = False b = context.astbuilder variables = [b.variable(name_node.type, "op%d" % i) for i, name_node in enumerate([lhs] + operands)] miniargs = [b.funcarg(variable) for variable in variables] body = miniutils.build_kernel_call(lfunc.name, signature, miniargs, b) minikernel = b.function_from_numpy( templating.temp_name("array_expression"), body, miniargs) lminikernel, ctypes_kernel = context.run_simple( minikernel, specializers.StridedSpecializer) # Build call to minivect kernel operands.insert(0, lhs) args = [shape] scalar_args = [] for operand in operands: if operand.type.is_array: data_p = self.array_attr(operand, 'data') data_p = nodes.CoercionNode(data_p, operand.type.dtype.pointer()) if not isinstance(operand, nodes.CloneNode): operand = nodes.CloneNode(operand) strides_p = self.array_attr(operand, 'strides') args.extend((data_p, strides_p)) else: scalar_args.append(operand) args.extend(scalar_args) result = nodes.NativeCallNode(minikernel.type, args, lminikernel) # Use native slicing in array expressions slicenodes.mark_nopython(ast.Suite(body=result.args)) if not is_expr: # a[:] = b[:] * c[:] return result # b[:] * c[:], return new array as expression return nodes.ExpressionNode(stmts=[result], expr=lhs.clone)