Exemplo n.º 1
0
 def test_nonempty_list_create_no_jit(self):
     # See Issue #6001: https://github.com/numba/numba/issues/6001
     with override_config('DISABLE_JIT', True):
         with forbid_codegen():
             l = List([1, 2, 3])
             self.assertEqual(type(l), list)
             self.assertEqual(l, [1, 2, 3])
Exemplo n.º 2
0
    def run_compile(self, fnlist, parallelism='threading'):
        self._cache_dir = temp_directory(self.__class__.__name__)
        with override_config('CACHE_DIR', self._cache_dir):
            if parallelism == 'threading':
                thread_impl(fnlist)
            elif parallelism == 'multiprocessing_fork':
                fork_proc_impl(fnlist)
            elif parallelism == 'multiprocessing_forkserver':
                forkserver_proc_impl(fnlist)
            elif parallelism == 'multiprocessing_spawn':
                spawn_proc_impl(fnlist)
            elif parallelism == 'multiprocessing_default':
                default_proc_impl(fnlist)
            elif parallelism == 'random':
                ps = [thread_impl, spawn_proc_impl]
                if _HAVE_OS_FORK:
                    ps.append(fork_proc_impl)
                    ps.append(forkserver_proc_impl)

                random.shuffle(ps)
                for impl in ps:
                    impl(fnlist)
            else:
                raise ValueError(
                    'Unknown parallelism supplied %s' % parallelism)
    def test_usage(self):
        @njit
        def foo(n):
            c = 0
            for i in range(n):
                c += i
            return c

        with override_config('LLVM_PASS_TIMINGS', True):
            foo(10)

        md = foo.get_metadata(foo.signatures[0])
        timings = md['llvm_pass_timings']
        # Check: timing is of correct type
        self.assertIsInstance(timings, lpt.PassTimingsCollection)
        # Check: basic for __str__
        text = str(timings)
        self.assertIn("Module passes (full optimization)", text)
        # Check: there must be more than one record
        self.assertGreater(len(timings), 0)
        # Check: __getitem__
        last = timings[-1]
        self.assertIsInstance(last, lpt.NamedTimings)
        # Check: NamedTimings
        self.assertIsInstance(last.name, str)
        self.assertIsInstance(last.timings, lpt.ProcessedPassTimings)
Exemplo n.º 4
0
        def get_ir(extend_lifetimes):
            class IRPreservingCompiler(CompilerBase):
                def define_pipelines(self):
                    pm = DefaultPassBuilder.define_nopython_pipeline(
                        self.state)
                    pm.add_pass_after(PreserveIR, IRLegalization)
                    pm.finalize()
                    return [pm]

            @njit(pipeline_class=IRPreservingCompiler)
            def foo():
                a = 10
                b = 20
                c = a + b
                # a and b are now unused, standard behaviour is ir.Del for them here
                d = c / c
                return d

            with override_config('EXTEND_VARIABLE_LIFETIMES',
                                 extend_lifetimes):
                foo()
                cres = foo.overloads[foo.signatures[0]]
                func_ir = cres.metadata['preserved_ir']

            return func_ir
    def test_analyze(self):
        @njit
        def foo(n):
            c = 0
            for i in range(n):
                for j in range(i):
                    c += j
            return c

        with override_config('LLVM_PASS_TIMINGS', True):
            foo(10)

        md = foo.get_metadata(foo.signatures[0])
        timings_collection = md['llvm_pass_timings']
        # Check: get_total_time()
        self.assertIsInstance(timings_collection.get_total_time(), float)
        # Check: summary()
        self.assertIsInstance(timings_collection.summary(), str)
        # Check: list_longest_first() ordering
        longest_first = timings_collection.list_longest_first()
        self.assertEqual(len(longest_first), len(timings_collection))
        last = longest_first[0].timings.get_total_time()
        for rec in longest_first[1:]:
            cur = rec.timings.get_total_time()
            self.assertGreaterEqual(last, cur)
            cur = last
Exemplo n.º 6
0
def omitted_child_test_wrapper(result_queue, cache_dir, second_call):
    with override_config("CACHE_DIR", cache_dir):
        @njit(cache=True)
        def test(num=1000):
            return num

        try:
            output = test()
            # If we have a second call, we should have a cache hit.
            # Otherwise, we expect a cache miss.
            if second_call:
                assert test._cache_hits[test.signatures[0]] == 1, \
                    "Cache did not hit as expected"
                assert test._cache_misses[test.signatures[0]] == 0, \
                    "Cache has an unexpected miss"
            else:
                assert test._cache_misses[test.signatures[0]] == 1, \
                    "Cache did not miss as expected"
                assert test._cache_hits[test.signatures[0]] == 0, \
                    "Cache has an unexpected hit"
            success = True
        # Catch anything raised so it can be propagated
        except: # noqa: E722
            output = traceback.format_exc()
            success = False
        result_queue.put((success, output))
Exemplo n.º 7
0
    def test_jit_debug_simulator(self):
        # Ensure that the jit decorator accepts the debug kwarg when the
        # simulator is in use - see Issue #6615.
        with override_config('ENABLE_CUDASIM', 1):

            @cuda.jit(debug=True)
            def f(x):
                pass
Exemplo n.º 8
0
    def test_py_func_with_kwargs(self):
        with override_config('DISABLE_JIT', True):

            def method(x):
                return x

            jitted = jit(nopython=True)(method)
        self.assertEqual(jitted.py_func, method)
Exemplo n.º 9
0
    def test_decorated_function_with_kwargs(self):
        with override_config('DISABLE_JIT', True):
            @jit(nopython=True)
            def method(x):
                return x

        self.assertIsInstance(method, _DisableJitWrapper)
        self.assertIsNotNone(method.py_func)
        self.assertEqual(10, method(10))
Exemplo n.º 10
0
    def test_decorated_function_with_kwargs(self):
        with override_config('DISABLE_JIT', True):
            @jit(nopython=True)
            def method(x):
                return x

        self.assertIsInstance(method, DisableJitWrapper)
        self.assertIsNotNone(method.py_func)
        self.assertEqual(10, method(10))
Exemplo n.º 11
0
    def test_decorated_function_with_kwargs(self):
        with override_config('DISABLE_JIT', True):
            def method(x):
                return x
            jitted = jit(nopython=True)(method)

        self.assertEqual(jitted, method)
        self.assertEqual(10, method(10))
        self.assertEqual(10, jitted(10))
Exemplo n.º 12
0
    def test_decorated_function_with_kwargs(self):
        with override_config('DISABLE_JIT', True):
            def method(x):
                return x
            jitted = jit(nopython=True)(method)

        self.assertEqual(jitted, method)
        self.assertEqual(10, method(10))
        self.assertEqual(10, jitted(10))
Exemplo n.º 13
0
    def test_jitclass(self):
        with override_config('DISABLE_JIT', True):
            with forbid_codegen():
                SimpleJITClass = jitclass(simple_class_spec)(SimpleClass)

                obj = SimpleJITClass()
                self.assertPreciseEqual(obj.h, 5)

                cfunc = jit(nopython=True)(simple_class_user)
                self.assertPreciseEqual(cfunc(obj), 5)
Exemplo n.º 14
0
    def test_efficient_launch_configuration(self):
        @cuda.jit
        def kernel():
            pass

        with override_config('CUDA_LOW_OCCUPANCY_WARNINGS', 1):
            with warnings.catch_warnings(record=True) as w:
                kernel[256, 256]()

        self.assertEqual(len(w), 0)
Exemplo n.º 15
0
    def test_llvm_inliner_flag_conflict(self):
        # bar will be marked as 'alwaysinline', but when DEBUGINFO_DEFAULT is
        # set functions are marked as 'noinline' this results in a conflict.
        # baz will be marked as 'noinline' as a result of DEBUGINFO_DEFAULT

        @njit(forceinline=True)
        def bar(x):
            return math.sin(x)

        @njit(forceinline=False)
        def baz(x):
            return math.cos(x)

        @njit
        def foo(x):
            a = bar(x)
            b = baz(x)
            return a, b

        # check it compiles
        with override_config('DEBUGINFO_DEFAULT', 1):
            result = foo(np.pi)

        self.assertPreciseEqual(result, foo.py_func(np.pi))

        # check the LLVM IR has bar marked as 'alwaysinline' and baz as noinline
        full_ir = foo.inspect_llvm(foo.signatures[0])
        module = llvm.parse_assembly(full_ir)
        name = foo.overloads[foo.signatures[0]].fndesc.mangled_name
        funcs = [x for x in module.functions if x.name == name]
        self.assertEqual(len(funcs), 1)
        func = funcs[0]

        # find the function calls and save the associated statements
        f_names = []
        for blk in func.blocks:
            for stmt in blk.instructions:
                if stmt.opcode == 'call':
                    # stmt.function.name  This is the function being called
                    f_names.append(str(stmt).strip())

        # Need to check there's two specific things in the calls in the IR
        # 1. a call to the llvm.sin.f64 intrinsic, this is from the inlined bar
        # 2. a call to the baz function, this is from the noinline baz
        found_sin = False
        found_baz = False
        baz_name = baz.overloads[baz.signatures[0]].fndesc.mangled_name
        for x in f_names:
            if not found_sin and re.match('.*llvm.sin.f64.*', x):
                found_sin = True
            if not found_baz and re.match(f'.*{baz_name}.*', x):
                found_baz = True

        self.assertTrue(found_sin)
        self.assertTrue(found_baz)
Exemplo n.º 16
0
    def test_decorated_function(self):
        with override_config("DISABLE_JIT", True):

            def method(x):
                return x

            jitted = jit(method)

        self.assertEqual(jitted, method)
        self.assertEqual(10, method(10))
        self.assertEqual(10, jitted(10))
Exemplo n.º 17
0
def run_caching_overload_method(q, cache_dir):
    """
    Used by TestOverloadMethodCaching.test_caching_overload_method
    """
    with override_config('CACHE_DIR', cache_dir):
        arg = q.get()
        cfunc = jit(nopython=True, cache=True)(cache_overload_method_usecase)
        res = cfunc(arg)
        q.put(res)
        # Check cache stat
        _assert_cache_stats(cfunc, 1, 0)
Exemplo n.º 18
0
 def test_inner_function(self):
     with override_config('DUMP_ASSEMBLY', True):
         with captured_stdout() as out:
             cfunc = jit((types.int32,), nopython=True)(outer_simple)
     self.assertPreciseEqual(cfunc(1), 4)
     # Check the inner function was elided from the output (which also
     # guarantees it was inlined into the outer function).
     asm = out.getvalue()
     prefix = __name__
     self.assert_has_pattern('%s.outer_simple' % prefix, asm)
     self.assert_not_has_pattern('%s.inner' % prefix, asm)
Exemplo n.º 19
0
 def test_dump_ir_generator(self):
     with override_config('DUMP_IR', True):
         out = self.compile_simple_gen()
     self.check_debug_output(out, ['ir'])
     self.assertIn('--GENERATOR INFO: %s' % self.func_name, out)
     expected_gen_info = textwrap.dedent("""
         generator state variables: ['x', 'y']
         yield point #1: live variables = ['y'], weak live variables = ['x']
         yield point #2: live variables = [], weak live variables = ['y']
         """)
     self.assertIn(expected_gen_info, out)
Exemplo n.º 20
0
    def test_inefficient_launch_configuration(self):
        @cuda.jit
        def kernel():
            pass

        with override_config('CUDA_LOW_OCCUPANCY_WARNINGS', 1):
            with warnings.catch_warnings(record=True) as w:
                kernel[1, 1]()

        self.assertEqual(w[0].category, NumbaPerformanceWarning)
        self.assertIn('Grid size', str(w[0].message))
        self.assertIn('2 * SM count', str(w[0].message))
Exemplo n.º 21
0
    def test_bound_function_error_string(self):
        # See PR #5952
        def foo(x):
            x.max(-1)  # axis not supported

        with override_config('DEVELOPER_MODE', 1):
            with self.assertRaises(errors.TypingError) as raises:
                njit("void(int64[:,:])")(foo)

        excstr = str(raises.exception)
        self.assertIn("AssertionError()", excstr)
        self.assertIn("BoundFunction(array.max for array(int64, 2d, A))",
                      excstr)
Exemplo n.º 22
0
    def test_nowarn_on_device_array(self):
        @cuda.jit
        def foo(r, x):
            r[0] = x + 1

        N = 10
        ary = cuda.device_array(N, dtype=np.float32)

        with override_config('CUDA_WARN_ON_IMPLICIT_COPY', 1):
            with warnings.catch_warnings(record=True) as w:
                foo[1, N](ary, N)

        self.assertEqual(len(w), 0)
Exemplo n.º 23
0
 def test_multiple_inner_functions(self):
     # Same with multiple inner functions, and multiple calls to
     # the same inner function (inner()).  This checks that linking in
     # the same library/module twice doesn't produce linker errors.
     with override_config('DUMP_ASSEMBLY', True):
         with captured_stdout() as out:
             cfunc = jit((types.int32,), nopython=True)(outer_multiple)
     self.assertPreciseEqual(cfunc(1), 6)
     asm = out.getvalue()
     prefix = __name__
     self.assert_has_pattern('%s.outer_multiple' % prefix, asm)
     self.assert_not_has_pattern('%s.more' % prefix, asm)
     self.assert_not_has_pattern('%s.inner' % prefix, asm)
Exemplo n.º 24
0
    def test_devicearray_strict_strides(self):
        # From the reproducer in Issue #6824.

        with override_config('NPY_RELAXED_STRIDES_CHECKING', 0):
            # Construct a device array that is not contiguous because
            # the strides for the first axis (800) are not equal to
            # the strides * size (10 * 8 = 80) for the previous axis.
            arr = devicearray.DeviceNDArray((1, 10), (800, 8), np.float64)

            # Ensure we don't believe the array to be contiguous becase strides
            # checking is strict.
            self.assertFalse(arr.flags['C_CONTIGUOUS'])
            self.assertFalse(arr.flags['F_CONTIGUOUS'])
Exemplo n.º 25
0
    def test_c_f_contiguity_matches_numpy(self):
        # From the reproducer in Issue #4943.

        shapes = ((1, 4), (4, 1))
        orders = ('C', 'F')
        with override_config('NPY_RELAXED_STRIDES_CHECKING', 1):
            for shape, order in itertools.product(shapes, orders):
                arr = np.ndarray(shape, order=order)
                d_arr = cuda.to_device(arr)
                self.assertEqual(arr.flags['C_CONTIGUOUS'],
                                 d_arr.flags['C_CONTIGUOUS'])
                self.assertEqual(arr.flags['F_CONTIGUOUS'],
                                 d_arr.flags['F_CONTIGUOUS'])
Exemplo n.º 26
0
    def test_environment_override(self):
        with override_config('DEBUGINFO_DEFAULT', 1):
            # Using default value
            @jit(nopython=True)
            def foo(x):
                return x
            self._check(foo, sig=(types.int32,), expect=True)

            # User override default
            @jit(nopython=True, debug=False)
            def bar(x):
                return x
            self._check(bar, sig=(types.int32,), expect=False)
Exemplo n.º 27
0
    def run_compile(self, fnlist):
        self._cache_dir = temp_directory(self.__class__.__name__)
        with override_config('CACHE_DIR', self._cache_dir):

            def chooser():
                for _ in range(10):
                    fn = random.choice(fnlist)
                    fn()

            ths = [threading.Thread(target=chooser) for i in range(4)]
            for th in ths:
                th.start()
            for th in ths:
                th.join()
Exemplo n.º 28
0
    def test_devicearray_relaxed_strides(self):
        # From the reproducer in Issue #6824.

        with override_config('NPY_RELAXED_STRIDES_CHECKING', 1):
            # Construct a device array that is contiguous even though
            # the strides for the first axis (800) are not equal to
            # the strides * size (10 * 8 = 80) for the previous axis,
            # because the first axis size is 1.
            arr = devicearray.DeviceNDArray((1, 10), (800, 8), np.float64)

            # Ensure we still believe the array to be contiguous because
            # strides checking is relaxed.
            self.assertTrue(arr.flags['C_CONTIGUOUS'])
            self.assertTrue(arr.flags['F_CONTIGUOUS'])
Exemplo n.º 29
0
    def test_environment_override(self):
        with override_config("CUDA_DEBUGINFO_DEFAULT", 1):
            # Using default value
            @cuda.jit
            def foo(x):
                x[0] = 1

            self._check(foo, sig=(types.int32[:], ), expect=True)

            # User override default value
            @cuda.jit(debug=False)
            def bar(x):
                x[0] = 1

            self._check(bar, sig=(types.int32[:], ), expect=False)
Exemplo n.º 30
0
    def test_warn_on_host_array(self):
        @cuda.jit
        def foo(r, x):
            r[0] = x + 1

        N = 10
        arr_f32 = np.zeros(N, dtype=np.float32)
        with override_config('CUDA_WARN_ON_IMPLICIT_COPY', 1):
            with warnings.catch_warnings(record=True) as w:
                foo[1, N](arr_f32, N)

        self.assertEqual(w[0].category, NumbaPerformanceWarning)
        self.assertIn('Host array used in CUDA kernel will incur',
                      str(w[0].message))
        self.assertIn('copy overhead', str(w[0].message))
Exemplo n.º 31
0
    def test_environment_override(self):
        with override_config('CUDA_DEBUGINFO_DEFAULT', 1):
            # Using default value
            @cuda.jit
            def foo(x):
                x[0] = 1

            self._check(foo, sig=(types.int32[:],), expect=True)

            # User override default value
            @cuda.jit(debug=False)
            def bar(x):
                x[0] = 1

            self._check(bar, sig=(types.int32[:],), expect=False)
    def test_consume_sync_disabled(self):
        # Create a foreign array with a stream
        s = cuda.stream()
        f_arr = ForeignArray(cuda.device_array(10, stream=s))

        # Set sync to false before testing. The test suite should generally be
        # run with sync enabled, but stash the old value just in case it is
        # not.
        with override_config('CUDA_ARRAY_INTERFACE_SYNC', False):
            with patch.object(cuda.cudadrv.driver.Stream,
                              'synchronize',
                              return_value=None) as mock_sync:
                cuda.as_cuda_array(f_arr)

            # Ensure the synchronize method of a stream was not called
            mock_sync.assert_not_called()
Exemplo n.º 33
0
    def test_omitted_arg(self):
        # See issue 7726
        @njit(debug=True)
        def foo(missing=None):
            pass

        # check that it will actually compile (verifies DI emission is ok)
        with override_config('DEBUGINFO_DEFAULT', 1):
            foo()

        metadata = self._get_metadata(foo, sig=(types.Omitted(None), ))
        metadata_definition_map = self._get_metadata_map(metadata)

        # Find DISubroutineType
        tmp_disubr = []
        for md in metadata:
            if "DISubroutineType" in md:
                tmp_disubr.append(md)
        self.assertEqual(len(tmp_disubr), 1)
        disubr = tmp_disubr.pop()

        disubr_matched = re.match(r'.*!DISubroutineType\(types: ([!0-9]+)\)$',
                                  disubr)
        self.assertIsNotNone(disubr_matched)
        disubr_groups = disubr_matched.groups()
        self.assertEqual(len(disubr_groups), 1)
        disubr_meta = disubr_groups[0]

        # Find the types in the DISubroutineType arg list
        disubr_types = metadata_definition_map[disubr_meta]
        disubr_types_matched = re.match(r'!{(.*)}', disubr_types)
        self.assertIsNotNone(disubr_matched)
        disubr_types_groups = disubr_types_matched.groups()
        self.assertEqual(len(disubr_types_groups), 1)

        # fetch out and assert the last argument type, should be void *
        md_fn_arg = [x.strip() for x in disubr_types_groups[0].split(',')][-1]
        arg_ty = metadata_definition_map[md_fn_arg]
        expected_arg_ty = (r'^.*!DICompositeType\(tag: DW_TAG_structure_type, '
                           r'name: "Anonymous struct \({}\)", elements: '
                           r'(![0-9]+), identifier: "{}"\)')
        self.assertRegex(arg_ty, expected_arg_ty)
        md_base_ty = re.match(expected_arg_ty, arg_ty).groups()[0]
        base_ty = metadata_definition_map[md_base_ty]
        # expect ir.LiteralStructType([])
        self.assertEqual(base_ty, ('!{}'))
Exemplo n.º 34
0
 def test_vectorize(self):
     def foo(x):
         return x + math.sin(x)
     fastfoo = vectorize(fastmath=True)(foo)
     slowfoo = vectorize(foo)
     x = np.random.random(8).astype(np.float32)
     # capture the optimized llvm to check for fast flag
     with override_config('DUMP_OPTIMIZED', True):
         with captured_stdout() as slow_cap:
             expect = slowfoo(x)
         slowllvm = slow_cap.getvalue()
         with captured_stdout() as fast_cap:
             got = fastfoo(x)
         fastllvm = fast_cap.getvalue()
     np.testing.assert_almost_equal(expect, got)
     self.assertIn('fadd fast', fastllvm)
     self.assertIn('call fast', fastllvm)
     self.assertNotIn('fadd fast', slowllvm)
     self.assertNotIn('call fast', slowllvm)
Exemplo n.º 35
0
 def test_guvectorize(self):
     def foo(x, out):
         out[0] = x + math.sin(x)
     x = np.random.random(8).astype(np.float32)
     with override_config('DUMP_OPTIMIZED', True):
         types = ['(float32, float32[:])']
         sig = '()->()'
         with captured_stdout() as fast_cap:
             fastfoo = guvectorize(types, sig, fastmath=True)(foo)
         fastllvm = fast_cap.getvalue()
         with captured_stdout() as slow_cap:
             slowfoo = guvectorize(types, sig)(foo)
         slowllvm = slow_cap.getvalue()
     expect = slowfoo(x)
     got = fastfoo(x)
     np.testing.assert_almost_equal(expect, got)
     self.assertIn('fadd fast', fastllvm)
     self.assertIn('call fast', fastllvm)
     self.assertNotIn('fadd fast', slowllvm)
     self.assertNotIn('call fast', slowllvm)
Exemplo n.º 36
0
 def test_dump_assembly(self):
     with override_config('DUMP_ASSEMBLY', True):
         out = self.compile_simple_cuda()
     self.check_debug_output(out, ['assembly'])
Exemplo n.º 37
0
 def test_dump_llvm(self):
     with override_config('DUMP_LLVM', True):
         out = self.compile_simple_cuda()
     self.check_debug_output(out, ['llvm'])
Exemplo n.º 38
0
 def test_dump_cfg(self):
     with override_config('DUMP_CFG', True):
         out = self.compile_simple_cuda()
     self.check_debug_output(out, ['cfg'])
Exemplo n.º 39
0
 def test_dump_ir(self):
     with override_config('DUMP_IR', True):
         out = self.compile_simple_cuda()
     self.check_debug_output(out, ['ir'])
Exemplo n.º 40
0
 def test_py_func_with_kwargs(self):
     with override_config('DISABLE_JIT', True):
         def method(x):
             return x
         jitted = jit(nopython=True)(method)
     self.assertEqual(jitted.py_func, method)
Exemplo n.º 41
0
 def test_dump_bytecode(self):
     with override_config('DUMP_BYTECODE', True):
         out = self.compile_simple_cuda()
     self.check_debug_output(out, ['bytecode'])