def _test_closing_offline_cache_for_a_kernel(curr_arch, kernel, args, result): count_of_cache_file = len(listdir(tmp_offline_cache_file_path())) ti.init(arch=curr_arch, enable_fallback=False, offline_cache=False, offline_cache_file_path=tmp_offline_cache_file_path()) res1 = kernel(*args) assert len(listdir(tmp_offline_cache_file_path()) ) - count_of_cache_file == get_expected_num_cache_files() ti.init(arch=curr_arch, enable_fallback=False, offline_cache=False, offline_cache_file_path=tmp_offline_cache_file_path()) assert len(listdir(tmp_offline_cache_file_path()) ) - count_of_cache_file == get_expected_num_cache_files() res2 = kernel(*args) assert res1 == test_utils.approx(result) and res1 == test_utils.approx( res2) ti.reset() assert len(listdir(tmp_offline_cache_file_path()) ) - count_of_cache_file == get_expected_num_cache_files()
def test_single_bit_struct(physical_type, compute_type, quant_bits, test_case): ti.init(arch=ti.cpu, debug=True) qit1 = ti.types.quant.int(quant_bits[0], True, compute_type) qit2 = ti.types.quant.int(quant_bits[1], False, compute_type) qit3 = ti.types.quant.int(quant_bits[2], True, compute_type) a = ti.field(dtype=qit1) b = ti.field(dtype=qit2) c = ti.field(dtype=qit3) ti.root.bit_struct(num_bits=physical_type).place(a, b, c) @ti.kernel def set_val(test_val: ti.types.ndarray()): a[None] = test_val[0] b[None] = test_val[1] c[None] = test_val[2] @ti.kernel def verify_val(test_val: ti.types.ndarray()): assert a[None] == test_val[0] assert b[None] == test_val[1] assert c[None] == test_val[2] set_val(test_case) verify_val(test_case) ti.reset()
def test_ndarray_reader_and_writer_with_offline_cache(curr_arch, layout): count_of_cache_file = len(listdir(tmp_offline_cache_file_path())) def helper(): a = ti.Vector.ndarray(10, ti.i32, 5, layout=layout) for i in range(5): for j in range(4): a[i][j * j] = j * j assert a[0][9] == 9 assert a[1][0] == 0 assert a[2][1] == 1 assert a[3][4] == 4 assert a[4][9] == 9 assert len(listdir(tmp_offline_cache_file_path()) ) - count_of_cache_file == 0 * cache_files_num_per_kernel ti.init(arch=curr_arch, enable_fallback=False, **current_thread_ext_options()) helper() ti.init(arch=curr_arch, enable_fallback=False, **current_thread_ext_options()) assert len(listdir(tmp_offline_cache_file_path()) ) - count_of_cache_file == 2 * cache_files_num_per_kernel helper() ti.reset() assert len(listdir(tmp_offline_cache_file_path()) ) - count_of_cache_file == 2 * cache_files_num_per_kernel
def init(default_fp=None, default_ip=None, print_preprocessed=None, debug=None, **kwargs): if debug is None: debug = bool(int(os.environ.get('TI_DEBUG', '0'))) # Make a deepcopy in case these args reference to items from ti.cfg, which are # actually references. If no copy is made and the args are indeed references, # ti.reset() could override the args to their default values. default_fp = _deepcopy(default_fp) default_ip = _deepcopy(default_ip) kwargs = _deepcopy(kwargs) import taichi as ti ti.reset() if default_fp is not None: ti.get_runtime().set_default_fp(default_fp) if default_ip is not None: ti.get_runtime().set_default_ip(default_ip) if print_preprocessed is not None: ti.get_runtime().print_preprocessed = print_preprocessed if debug: ti.set_logging_level(ti.DEBUG) ti.cfg.debug = debug log_level = os.environ.get('TI_LOG_LEVEL', '') if log_level: ti.set_logging_level(log_level) for k, v in kwargs.items(): setattr(ti.cfg, k, v) ti.get_runtime().create_program()
def test_cond_grad(): ti.reset() ti.cfg.print_ir = True x = ti.var(ti.f32) y = ti.var(ti.f32) @ti.layout def place(): ti.root.dense(ti.i, 2).place(x, x.grad, y, y.grad) @ti.kernel def func(): for i in range(2): t = 0.0 if x[i] > 0: t = 1 / (x[i] + 1e-10) y[i] = t x[0] = 0 x[1] = 1 y.grad[0] = 1 y.grad[1] = 1 func() func.grad() assert x.grad[0] == 0 assert x.grad[1] == -1
def wrapped(*args, **kwargs): arch_params_sets = [ti.supported_archs(), *_test_features.values()] arch_params_combinations = list( itertools.product(*arch_params_sets)) for arch_params in arch_params_combinations: req_arch, req_params = arch_params[0], arch_params[1:] if (req_arch not in arch) or (req_arch in exclude): continue if not all( _ti_core.is_extension_supported(req_arch, e) for e in require): continue skip = False current_options = copy.deepcopy(options) for feature, param in zip(_test_features, req_params): value = param.value required_extensions = param.required_extensions if current_options.get(feature, value) != value or any( not _ti_core.is_extension_supported(req_arch, e) for e in required_extensions): skip = True else: # Fill in the missing feature current_options[feature] = value if skip: continue ti.init(arch=req_arch, **current_options) foo(*args, **kwargs) ti.reset()
def test_abs(): ti.reset() x = ti.var(ti.f32) y = ti.var(ti.f32) N = 16 @ti.layout def place(): ti.root.dense(ti.i, N).place(x) ti.root.dense(ti.i, N).place(y) ti.root.lazy_grad() @ti.kernel def func(): for i in range(N): x[i] = ti.abs(y[i]) for i in range(N): y[i] = i - 10 x.grad[i] = 1 func() func.grad() def sgn(x): if x > 0: return 1 if x < 0: return -1 return 0 for i in range(N): assert x[i] == abs(y[i]) assert y.grad[i] == sgn(y[i])
def test_llvm_gpu(): ti.reset() val = ti.var(ti.i32) f = ti.var(ti.f32) ti.cfg.use_llvm = True ti.cfg.arch = ti.cuda # ti.cfg.print_ir = True # ti.cfg.print_kernel_llvm_ir = True n = 16 @ti.layout def values(): ti.root.dense(ti.i, n).place(val, f) @ti.kernel def test(): for i in range(n): # ti.print(i) val[i] = i * 2 test() @ti.kernel def test2(): for i in range(n): val[i] += 1 test2() for i in range(n): # print(i, val[i], f[i]) assert val[i] == 1 + i * 2
def test_arg_load(): ti.reset() x = ti.var(ti.i32) y = ti.var(ti.f32) @ti.layout def layout(): ti.root.place(x, y) @ti.kernel def set_i32(v: ti.i32): x[None] = v @ti.kernel def set_f32(v: ti.f32): y[None] = v set_i32(123) assert x[None] == 123 set_i32(456) assert x[None] == 456 set_f32(0.125) assert y[None] == 0.125 set_f32(1.5) assert y[None] == 1.5
def test_snode_reader_and_writer_with_offline_cache(curr_arch): count_of_cache_file = len(listdir(tmp_offline_cache_file_path())) def helper(): x = ti.field(dtype=ti.f32, shape=()) y = ti.field(dtype=ti.f32, shape=()) x[None] = 3.14 y[None] = 4.14 assert x[None] == test_utils.approx(3.14) assert y[None] == test_utils.approx(4.14) x[None] = 6.28 y[None] = 7.28 assert x[None] == test_utils.approx(6.28) assert y[None] == test_utils.approx(7.28) assert len(listdir(tmp_offline_cache_file_path()) ) - count_of_cache_file == get_expected_num_cache_files() ti.init(arch=curr_arch, enable_fallback=False, **current_thread_ext_options()) helper() ti.init(arch=curr_arch, enable_fallback=False, **current_thread_ext_options()) assert len(listdir(tmp_offline_cache_file_path()) ) - count_of_cache_file == get_expected_num_cache_files([4]) helper() ti.reset() assert len(listdir(tmp_offline_cache_file_path()) ) - count_of_cache_file == get_expected_num_cache_files([4])
def test_loops(): for arch in [ti.x86_64, ti.cuda]: ti.reset() ti.cfg.arch = arch x = ti.var(ti.f32) y = ti.var(ti.f32) N = 512 @ti.layout def place(): ti.root.dense(ti.i, N).place(x) ti.root.dense(ti.i, N).place(y) ti.root.lazy_grad() for i in range(N // 2, N): y[i] = i - 300 @ti.kernel def func(): for i in range(N // 2 + 3, N): x[i] = ti.abs(y[i]) func() for i in range(N // 2 + 3): assert x[i] == 0 for i in range(N // 2 + 3, N): assert x[i] == abs(y[i])
def test_mod(): ti.reset() ti.cfg.use_llvm = True x = ti.var(ti.i32) y = ti.var(ti.i32) @ti.layout def place(): ti.root.dense(ti.i, 1).place(x, y) ti.root.lazy_grad() @ti.kernel def func(): y[0] = x[0] % 3 @ti.kernel def func2(): ti.atomic_add(y[0], x[0] % 3) func() func.grad() func2() func2.grad()
def test_polar_decomp(): ti.reset() dim = 2 m = ti.Matrix(dim, dim, ti.f32) r = ti.Matrix(dim, dim, ti.f32) s = ti.Matrix(dim, dim, ti.f32) I = ti.Matrix(dim, dim, ti.f32) D = ti.Matrix(dim, dim, ti.f32) @ti.layout def place(): ti.root.place(m, r, s, I, D) @ti.kernel def polar(): R, S = ti.polar_decompose(m[None]) r[None] = R s[None] = S m[None] = R @ S I[None] = R @ ti.transposed(R) D[None] = S - ti.transposed(S) for i in range(dim): for j in range(dim): m(i, j)[None] = i * 2 + j * 7 polar() for i in range(dim): for j in range(dim): assert m(i, j)[None] == approx(i * 2 + j * 7, abs=1e-5) assert I(i, j)[None] == approx(int(i == j), abs=1e-5) assert D(i, j)[None] == approx(0, abs=1e-5)
def test_calling_many_kernels(curr_arch): count_of_cache_file = len(listdir(tmp_offline_cache_file_path())) def helper(): for kernel, args, get_res, num_offloads in simple_kernels_to_test: assert (kernel(*args) == test_utils.approx(get_res(*args))) ti.init(arch=curr_arch, enable_fallback=False, **current_thread_ext_options()) helper() assert len(listdir(tmp_offline_cache_file_path()) ) - count_of_cache_file == get_expected_num_cache_files() ti.init(arch=curr_arch, enable_fallback=False, **current_thread_ext_options()) assert len(listdir(tmp_offline_cache_file_path()) ) - count_of_cache_file == get_expected_num_cache_files( [kern[3] for kern in simple_kernels_to_test]) helper() ti.reset() assert len(listdir(tmp_offline_cache_file_path()) ) - count_of_cache_file == get_expected_num_cache_files( [kern[3] for kern in simple_kernels_to_test])
def grad_test(tifunc, npfunc=None): if npfunc is None: npfunc = tifunc ti.reset() x = ti.var(ti.f32) y = ti.var(ti.f32) @ti.layout def place(): ti.root.dense(ti.i, 1).place(x, x.grad, y, y.grad) @ti.kernel def func(): for i in x: y[i] = tifunc(x[i]) v = 0.2 y.grad[0] = 1 x[0] = v func() func.grad() assert y[0] == approx(npfunc(v)) assert x.grad[0] == approx(grad(npfunc)(v))
def test_simle(): return for arch in [ti.x86_64, ti.cuda]: ti.reset() ti.cfg.use_llvm = True ti.cfg.arch = arch x = ti.var(ti.i32) n = 128 @ti.layout def place(): ti.root.dense(ti.i, n).place(x) @ti.kernel def func(): x[7] = 120 func() for i in range(n): if i == 7: assert x[i] == 0 else: assert x[i] == 120
def test_ad_reduce(): ti.reset() x = ti.var(ti.f32) loss = ti.var(ti.f32) N = 16 @ti.layout def place(): ti.root.place(loss, loss.grad).dense(ti.i, N).place(x, x.grad) @ti.kernel def func(): for i in x: loss.atomic_add(ti.sqr(x[i])) total_loss = 0 for i in range(N): x[i] = i total_loss += i * i loss.grad[None] = 1 func() func.grad() assert total_loss == approx(loss[None]) for i in range(N): assert x.grad[i] == approx(i * 2)
def _test_cpp(args): import taichi as ti # Cpp tests use the legacy non LLVM backend ti.reset() print("Running C++ tests...") task = ti.Task('test') return int(task.run(*args.files))
def test_complex_kernels(): for arch in [ti.x86_64, ti.cuda]: ti.reset() ti.cfg.arch = arch a = ti.var(ti.f32) b = ti.var(ti.f32) n = 128 @ti.layout def place(): ti.root.dense(ti.i, n).place(a, b) @ti.kernel def add(): for i in range(n): a[i] += 1 for i in range(n): b[i] += 2 for i in a: b[i] += 3 for i in b: a[i] += 1 for i in a: a[i] += 9 for i in range(n): a[i] = i + 1 b[i] = i + 2 add() for i in range(n): assert a[i] == i + 12 assert b[i] == i + 7
def test_cpp(): import taichi as ti # Cpp tests use the legacy non LLVM backend ti.reset() print("Running C++ tests...") task = ti.Task('test') return int(task.run(*sys.argv[2:]))
def test_numpy_loops(): for arch in [ti.x86_64, ti.cuda]: ti.reset() ti.cfg.arch = arch x = ti.var(ti.f32) y = ti.var(ti.f32) N = 512 @ti.layout def place(): ti.root.dense(ti.i, N).place(x) ti.root.dense(ti.i, N).place(y) ti.root.lazy_grad() for i in range(N // 2, N): y[i] = i - 300 import numpy as np begin = np.ones(1) * (N // 2 + 3) end = np.ones(1) * N @ti.kernel def func(): for i in range(begin, end): x[i] = ti.abs(y[i]) func() for i in range(N // 2 + 3): assert x[i] == 0 for i in range(N // 2 + 3, N): assert x[i] == abs(y[i])
def test_default_fp_ndarray(dtype): arch = ti.lang.impl.current_cfg().arch ti.reset() ti.init(arch=arch, default_fp=dtype) x = ti.Vector.ndarray(2, float, ()) assert x.dtype == impl.get_runtime().default_fp
def test_size1(): ti.reset() x = ti.var(ti.i32) @ti.layout def place(): ti.root.dense(ti.i, 1).place(x) x[0] = 1 assert x[0] == 1
def test_cpp(): import taichi as ti if not ti.core.with_cuda(): print("Skipping legacy tests (no GPU support)") return 0 # Cpp tests use the legacy non LLVM backend ti.reset() print("Running C++ tests...") task = ti.Task('test') return task.run(*sys.argv[2:])
def run(self): ti.init(kernel_profiler=True, arch=self.arch) print("TestCase[%s.%s.%s]" % (self.func.__name__, arch_name( self.arch), dtype2str[self.test_dtype])) for test_dsize in self.test_dsize_list: print("test_dsize = %s" % (size2str(test_dsize))) self.min_time_in_us.append( self.func(self.arch, self.test_dtype, test_dsize, MemoryBound.basic_repeat_times)) time.sleep(0.2) ti.reset()
def run(self): for case, plan in self.plan.items(): tag_list = plan['tags'] MetricType.init_taichi(self.arch, tag_list) _ms = self.funcs.get_func(tag_list)(self.arch, self.basic_repeat_times, **self._get_kwargs(tag_list)) plan['result'] = _ms print(f'{tag_list}={_ms}') ti.reset() rdict = {'results': self.plan, 'info': self.info} return rdict
def wrapped(*args, **kwargs): test_utils.mkdir_p(tmp_offline_cache_file_path()) ret = None try: ret = func(*args, **kwargs) except Exception as e: raise e finally: ti.reset() for f in listdir(tmp_offline_cache_file_path()): remove(join(tmp_offline_cache_file_path(), f)) rmdir(tmp_offline_cache_file_path()) return ret
def test_clear(): return ti.reset() x = ti.var(ti.i32) @ti.layout def place(): ti.root.dense(ti.i, 1).place(x) x[0] = 1 assert x[0] == 1 x.clear() assert x[0] == 0
def _run_cpp_test(test_filename, build_dir, gtest_option="", extra_env=None): ti.reset() print("Running C++ tests...") ti_lib_dir = os.path.join(ti.__path__[0], '_lib', 'runtime') fullpath = os.path.join(build_dir, test_filename) if os.path.exists(fullpath): env_copy = os.environ.copy() env_copy['TI_LIB_DIR'] = ti_lib_dir cmd = [fullpath] if gtest_option: cmd.append(gtest_option) if extra_env: env_copy.update(extra_env) subprocess.check_call(cmd, env=env_copy, cwd=build_dir)
def test_random_float(): for precision in [ti.f32, ti.f64]: ti.reset() n = 1024 x = ti.var(ti.f32, shape=(n, n)) @ti.kernel def fill(): for i in range(n): for j in range(n): x[i, j] = ti.random(precision) fill() X = x.to_numpy() for i in range(4): assert (X**i).mean() == approx(1 / (i + 1), rel=1e-2)