class Flags(utils.ConfigOptions): # These options are all false by default, but the defaults are # different with the @jit decorator (see targets.options.TargetOptions). OPTIONS = { # Enable loop-lifting 'enable_looplift': False, # Enable pyobject mode (in general) 'enable_pyobject': False, # Enable pyobject mode inside lifted loops 'enable_pyobject_looplift': False, # Enable SSA: 'enable_ssa': True, # Force pyobject mode inside the whole function 'force_pyobject': False, # Release GIL inside the native function 'release_gil': False, 'no_compile': False, 'debuginfo': False, 'boundscheck': False, 'forceinline': False, 'no_cpython_wrapper': False, 'no_cfunc_wrapper': False, # Enable automatic parallel optimization, can be fine-tuned by taking # a dictionary of sub-options instead of a boolean, see parfor.py for # detail. 'auto_parallel': cpu.ParallelOptions(False), 'nrt': False, 'no_rewrites': False, 'error_model': 'python', 'fastmath': cpu.FastMathOptions(False), 'noalias': False, 'inline': cpu.InlineOptions('never'), }
def _run_parfor(cls, test_func, args, swap_map=None): # TODO: refactor this with get_optimized_numba_ir() where this is # copied from typingctx = typing.Context() targetctx = cpu.CPUContext(typingctx) test_ir = compiler.run_frontend(test_func) options = cpu.ParallelOptions(True) tp = MyPipeline(typingctx, targetctx, args, test_ir) with cpu_target.nested_context(typingctx, targetctx): typingctx.refresh() targetctx.refresh() inline_pass = inline_closurecall.InlineClosureCallPass( tp.state.func_ir, options, typed=True ) inline_pass.run() rewrites.rewrite_registry.apply("before-inference", tp.state) untyped_passes.ReconstructSSA().run_pass(tp.state) ( tp.state.typemap, tp.state.return_type, tp.state.calltypes, _ ) = typed_passes.type_inference_stage( tp.state.typingctx, tp.state.func_ir, tp.state.args, None ) typed_passes.PreLowerStripPhis().run_pass(tp.state) diagnostics = numba.parfors.parfor.ParforDiagnostics() preparfor_pass = numba.parfors.parfor.PreParforPass( tp.state.func_ir, tp.state.typemap, tp.state.calltypes, tp.state.typingctx, options, swapped=diagnostics.replaced_fns, replace_functions_map=swap_map, ) preparfor_pass.run() rewrites.rewrite_registry.apply("after-inference", tp.state) return tp, options, diagnostics, preparfor_pass
def test_no_copy_usm_shared(capfd): a = usmarray.ones(10, dtype=np.int64) b = np.ones(10, dtype=np.int64) # f = njit(fn) flags = compiler.Flags() flags.no_compile = True flags.no_cpython_wrapper = True flags.nrt = False flags.auto_parallel = cpu.ParallelOptions(True) typingctx = cpu_target.typing_context targetctx = cpu_target.target_context args = typingctx.resolve_argument_type(a) try: device = dpctl.SyclDevice("opencl:gpu:0") except ValueError: pytest.skip("Device not found") with dppy.offload_to_sycl_device(device): cres = compiler.compile_extra( typingctx=typingctx, targetctx=targetctx, func=fn, args=tuple([args]), return_type=args, flags=flags, locals={}, pipeline_class=DPPYCompiler, ) assert "DPCTLQueue_Memcpy" not in cres.library.get_llvm_str() args = typingctx.resolve_argument_type(b) cres = compiler.compile_extra( typingctx=typingctx, targetctx=targetctx, func=fn, args=tuple([args]), return_type=args, flags=flags, locals={}, pipeline_class=DPPYCompiler, ) assert "DPCTLQueue_Memcpy" in cres.library.get_llvm_str()
def run_frontend(func, inline_closures=False): """ Run the compiler frontend over the given Python function, and return the function's canonical Numba IR. If inline_closures is Truthy then closure inlining will be run """ # XXX make this a dedicated Pipeline? func_id = bytecode.FunctionIdentity.from_function(func) interp = interpreter.Interpreter(func_id) bc = bytecode.ByteCode(func_id=func_id) func_ir = interp.interpret(bc) if inline_closures: inline_pass = InlineClosureCallPass(func_ir, cpu.ParallelOptions(False), {}, False) inline_pass.run() post_proc = postproc.PostProcessor(func_ir) post_proc.run() return func_ir
def compile_parallel(self, func, arg_types): fast_pflags = Flags() fast_pflags.set('auto_parallel', cpu.ParallelOptions(True)) fast_pflags.set('nrt') fast_pflags.set('fastmath', cpu.FastMathOptions(True)) return compile_isolated(func, arg_types, flags=fast_pflags).entry_point
def assert_prune(self, func, args_tys, prune, *args, **kwargs): # This checks that the expected pruned branches have indeed been pruned. # func is a python function to assess # args_tys is the numba types arguments tuple # prune arg is a list, one entry per branch. The value in the entry is # encoded as follows: # True: using constant inference only, the True branch will be pruned # False: using constant inference only, the False branch will be pruned # None: under no circumstances should this branch be pruned # *args: the argument instances to pass to the function to check # execution is still valid post transform # **kwargs: # - flags: compiler.Flags instance to pass to `compile_isolated`, # permits use of e.g. object mode func_ir = compile_to_ir(func) before = func_ir.copy() if self._DEBUG: print("=" * 80) print("before inline") func_ir.dump() # run closure inlining to ensure that nonlocals in closures are visible inline_pass = InlineClosureCallPass( func_ir, cpu.ParallelOptions(False), ) inline_pass.run() # Remove all Dels, and re-run postproc post_proc = postproc.PostProcessor(func_ir) post_proc.run() rewrite_semantic_constants(func_ir, args_tys) if self._DEBUG: print("=" * 80) print("before prune") func_ir.dump() dead_branch_prune(func_ir, args_tys) after = func_ir if self._DEBUG: print("after prune") func_ir.dump() before_branches = self.find_branches(before) self.assertEqual(len(before_branches), len(prune)) # what is expected to be pruned expect_removed = [] for idx, prune in enumerate(prune): branch = before_branches[idx] if prune is True: expect_removed.append(branch.truebr) elif prune is False: expect_removed.append(branch.falsebr) elif prune is None: pass # nothing should be removed! elif prune == 'both': expect_removed.append(branch.falsebr) expect_removed.append(branch.truebr) else: assert 0, "unreachable" # compare labels original_labels = set([_ for _ in before.blocks.keys()]) new_labels = set([_ for _ in after.blocks.keys()]) # assert that the new labels are precisely the original less the # expected pruned labels try: self.assertEqual(new_labels, original_labels - set(expect_removed)) except AssertionError as e: print("new_labels", sorted(new_labels)) print("original_labels", sorted(original_labels)) print("expect_removed", sorted(expect_removed)) raise e supplied_flags = kwargs.pop('flags', False) compiler_kws = {'flags': supplied_flags} if supplied_flags else {} cres = compile_isolated(func, args_tys, **compiler_kws) if args is None: res = cres.entry_point() expected = func() else: res = cres.entry_point(*args) expected = func(*args) self.assertEqual(res, expected)
def compile_parallel(self, func, arg_types): fast_pflags = Flags() fast_pflags.auto_parallel = cpu.ParallelOptions(True) fast_pflags.nrt = True fast_pflags.fastmath = cpu.FastMathOptions(True) return compile_isolated(func, arg_types, flags=fast_pflags).entry_point
class Flags(TargetConfig): enable_looplift = Option( type=bool, default=False, doc="Enable loop-lifting", ) enable_pyobject = Option( type=bool, default=False, doc="Enable pyobject mode (in general)", ) enable_pyobject_looplift = Option( type=bool, default=False, doc="Enable pyobject mode inside lifted loops", ) enable_ssa = Option( type=bool, default=True, doc="Enable SSA", ) force_pyobject = Option( type=bool, default=False, doc="Force pyobject mode inside the whole function", ) release_gil = Option( type=bool, default=False, doc="Release GIL inside the native function", ) no_compile = Option( type=bool, default=False, doc="TODO", ) debuginfo = Option( type=bool, default=False, doc="TODO", ) boundscheck = Option( type=bool, default=False, doc="TODO", ) forceinline = Option( type=bool, default=False, doc="Force inlining of the function. Overrides _dbg_optnone.", ) no_cpython_wrapper = Option( type=bool, default=False, doc="TODO", ) no_cfunc_wrapper = Option( type=bool, default=False, doc="TODO", ) auto_parallel = Option( type=cpu.ParallelOptions, default=cpu.ParallelOptions(False), doc="""Enable automatic parallel optimization, can be fine-tuned by taking a dictionary of sub-options instead of a boolean, see parfor.py for detail""", ) nrt = Option( type=bool, default=False, doc="TODO", ) no_rewrites = Option( type=bool, default=False, doc="TODO", ) error_model = Option( type=str, default="python", doc="TODO", ) fastmath = Option( type=cpu.FastMathOptions, default=cpu.FastMathOptions(False), doc="TODO", ) noalias = Option( type=bool, default=False, doc="TODO", ) inline = Option( type=cpu.InlineOptions, default=cpu.InlineOptions("never"), doc="TODO", ) # Defines a new target option for tracking the "target backend". # This will be the XYZ in @jit(_target=XYZ). target_backend = Option( type=str, default="cpu", # if not set, default to CPU doc="backend") dbg_extend_lifetimes = Option( type=bool, default=False, doc=("Extend variable lifetime for debugging. " "This automatically turns on with debug=True."), ) dbg_optnone = Option( type=bool, default=False, doc=("Disable optimization for debug. " "Equivalent to adding optnone attribute in the LLVM Function."))
class Flags(TargetConfig): enable_looplift = Option( type=bool, default=False, doc="Enable loop-lifting", ) enable_pyobject = Option( type=bool, default=False, doc="Enable pyobject mode (in general)", ) enable_pyobject_looplift = Option( type=bool, default=False, doc="Enable pyobject mode inside lifted loops", ) enable_ssa = Option( type=bool, default=True, doc="Enable SSA", ) force_pyobject = Option( type=bool, default=False, doc="Force pyobject mode inside the whole function", ) release_gil = Option( type=bool, default=False, doc="Release GIL inside the native function", ) no_compile = Option( type=bool, default=False, doc="TODO", ) debuginfo = Option( type=bool, default=False, doc="TODO", ) boundscheck = Option( type=bool, default=False, doc="TODO", ) forceinline = Option( type=bool, default=False, doc="TODO", ) no_cpython_wrapper = Option( type=bool, default=False, doc="TODO", ) no_cfunc_wrapper = Option( type=bool, default=False, doc="TODO", ) auto_parallel = Option( type=cpu.ParallelOptions, default=cpu.ParallelOptions(False), doc="""Enable automatic parallel optimization, can be fine-tuned by taking a dictionary of sub-options instead of a boolean, see parfor.py for detail""", ) nrt = Option( type=bool, default=False, doc="TODO", ) no_rewrites = Option( type=bool, default=False, doc="TODO", ) error_model = Option( type=str, default="python", doc="TODO", ) fastmath = Option( type=cpu.FastMathOptions, default=cpu.FastMathOptions(False), doc="TODO", ) noalias = Option( type=bool, default=False, doc="TODO", ) inline = Option( type=cpu.InlineOptions, default=cpu.InlineOptions("never"), doc="TODO", )