def kernel_profiler_total_time(): """Get elapsed time of all kernels recorded in KernelProfiler. Returns: time (float): total time in second. """ return get_default_kernel_profiler().get_total_time()
def set_kernel_profiler_toolkit(toolkit_name='default'): """Set the toolkit used by KernelProfiler. Currently, we only support toolkits: ``'default'`` and ``'cupti'``. Args: toolkit_name (str): string of toolkit name. Returns: status (bool): whether the setting is successful or not. Example:: >>> import taichi as ti >>> ti.init(arch=ti.cuda, kernel_profiler=True) >>> x = ti.field(ti.f32, shape=1024*1024) >>> @ti.kernel >>> def fill(): >>> for i in x: >>> x[i] = i >>> ti.set_kernel_profiler_toolkit('cupti') >>> for i in range(100): >>> fill() >>> ti.print_kernel_profile_info() >>> ti.set_kernel_profiler_toolkit('default') >>> for i in range(100): >>> fill() >>> ti.print_kernel_profile_info() """ return get_default_kernel_profiler().set_toolkit(toolkit_name)
def collect_kernel_profile_metrics(metric_list=default_cupti_metrics): """Set temporary metrics that will be collected by the CUPTI toolkit within this context. Args: metric_list (list): a list of :class:`~taichi.lang.CuptiMetric()` instances, default value: :data:`~taichi.lang.default_cupti_metrics`. Example:: >>> import taichi as ti >>> ti.init(kernel_profiler=True, arch=ti.cuda) >>> ti.set_kernel_profiler_toolkit('cupti') >>> num_elements = 128*1024*1024 >>> x = ti.field(ti.f32, shape=num_elements) >>> y = ti.field(ti.f32, shape=()) >>> y[None] = 0 >>> @ti.kernel >>> def reduction(): >>> for i in x: >>> y[None] += x[i] >>> # In the case of not pramater, Taichi will print its pre-defined metrics list >>> ti.get_predefined_cupti_metrics() >>> # get Taichi pre-defined metrics >>> profiling_metrics = ti.get_predefined_cupti_metrics('device_utilization') >>> global_op_atom = ti.CuptiMetric( >>> name='l1tex__t_set_accesses_pipe_lsu_mem_global_op_atom.sum', >>> header=' global.atom ', >>> format=' {:8.0f} ') >>> # add user defined metrics >>> profiling_metrics += [global_op_atom] >>> # metrics setting is temporary, and will be clear when exit from this context. >>> with ti.collect_kernel_profile_metrics(profiling_metrics): >>> for i in range(16): >>> reduction() >>> ti.print_kernel_profile_info('trace') Note: The configuration of the ``metric_list`` will be clear when exit from this context. """ get_default_kernel_profiler().set_metrics(metric_list) yield get_default_kernel_profiler() get_default_kernel_profiler().set_metrics()
def print_kernel_profile_info(mode='count'): """Print the profiling results of Taichi kernels. To enable this profiler, set ``kernel_profiler=True`` in ``ti.init()``. ``'count'`` mode: print the statistics (min,max,avg time) of launched kernels, ``'trace'`` mode: print the records of launched kernels with specific profiling metrics (time, memory load/store and core utilization etc.), and defaults to ``'count'``. Args: mode (str): the way to print profiling results. Example:: >>> import taichi as ti >>> ti.init(ti.cpu, kernel_profiler=True) >>> var = ti.field(ti.f32, shape=1) >>> @ti.kernel >>> def compute(): >>> var[0] = 1.0 >>> compute() >>> ti.print_kernel_profile_info() >>> # equivalent calls : >>> # ti.print_kernel_profile_info('count') >>> ti.print_kernel_profile_info('trace') Note: Currently the result of `KernelProfiler` could be incorrect on OpenGL backend due to its lack of support for `ti.sync()`. For advanced mode of `KernelProfiler`, please visit https://docs.taichi.graphics/docs/lang/articles/misc/profiler#advanced-mode. """ get_default_kernel_profiler().print_info(mode)
def query_kernel_profile_info(name): """Query kernel elapsed time(min,avg,max) on devices using the kernel name. To enable this profiler, set `kernel_profiler=True` in `ti.init`. Args: name (str): kernel name. Returns: KernelProfilerQueryResult (class): with member variables(counter, min, max, avg) Example:: >>> import taichi as ti >>> ti.init(ti.cpu, kernel_profiler=True) >>> n = 1024*1024 >>> var = ti.field(ti.f32, shape=n) >>> @ti.kernel >>> def fill(): >>> for i in range(n): >>> var[i] = 0.1 >>> fill() >>> ti.clear_kernel_profile_info() #[1] >>> for i in range(100): >>> fill() >>> query_result = ti.query_kernel_profile_info(fill.__name__) #[2] >>> print("kernel excuted times =",query_result.counter) >>> print("kernel elapsed time(min_in_ms) =",query_result.min) >>> print("kernel elapsed time(max_in_ms) =",query_result.max) >>> print("kernel elapsed time(avg_in_ms) =",query_result.avg) Note: [1] To get the correct result, query_kernel_profile_info() must be used in conjunction with clear_kernel_profile_info(). [2] Currently the result of `KernelProfiler` could be incorrect on OpenGL backend due to its lack of support for `ti.sync()`. """ return get_default_kernel_profiler().query_info(name)
def init(arch=None, default_fp=None, default_ip=None, _test_mode=False, enable_fallback=True, **kwargs): """Initializes the Taichi runtime. This should always be the entry point of your Taichi program. Most importantly, it sets the backend used throughout the program. Args: arch: Backend to use. This is usually :const:`~taichi.lang.cpu` or :const:`~taichi.lang.gpu`. default_fp (Optional[type]): Default floating-point type. default_ip (Optional[type]): Default integral type. **kwargs: Taichi provides highly customizable compilation through ``kwargs``, which allows for fine grained control of Taichi compiler behavior. Below we list some of the most frequently used ones. For a complete list, please check out https://github.com/taichi-dev/taichi/blob/master/taichi/program/compile_config.h. * ``cpu_max_num_threads`` (int): Sets the number of threads used by the CPU thread pool. * ``debug`` (bool): Enables the debug mode, under which Taichi does a few more things like boundary checks. * ``print_ir`` (bool): Prints the CHI IR of the Taichi kernels. * ``packed`` (bool): Enables the packed memory layout. See https://docs.taichi.graphics/lang/articles/advanced/layout. """ # Check version for users every 7 days if not disabled by users. skip = os.environ.get("TI_SKIP_VERSION_CHECK") if skip != 'ON': try_check_version() # Make a deepcopy in case these args reference to items from ti.cfg, which are # actually references. If no copy is made and the args are indeed references, # ti.reset() could override the args to their default values. default_fp = _deepcopy(default_fp) default_ip = _deepcopy(default_ip) kwargs = _deepcopy(kwargs) ti.reset() spec_cfg = _SpecialConfig() env_comp = _EnvironmentConfigurator(kwargs, ti.cfg) env_spec = _EnvironmentConfigurator(kwargs, spec_cfg) # configure default_fp/ip: # TODO: move these stuff to _SpecialConfig too: env_default_fp = os.environ.get("TI_DEFAULT_FP") if env_default_fp: if default_fp is not None: _ti_core.warn( f'ti.init argument "default_fp" overridden by environment variable TI_DEFAULT_FP={env_default_fp}' ) if env_default_fp == '32': default_fp = ti.f32 elif env_default_fp == '64': default_fp = ti.f64 elif env_default_fp is not None: raise ValueError( f'Invalid TI_DEFAULT_FP={env_default_fp}, should be 32 or 64') env_default_ip = os.environ.get("TI_DEFAULT_IP") if env_default_ip: if default_ip is not None: _ti_core.warn( f'ti.init argument "default_ip" overridden by environment variable TI_DEFAULT_IP={env_default_ip}' ) if env_default_ip == '32': default_ip = ti.i32 elif env_default_ip == '64': default_ip = ti.i64 elif env_default_ip is not None: raise ValueError( f'Invalid TI_DEFAULT_IP={env_default_ip}, should be 32 or 64') if default_fp is not None: impl.get_runtime().set_default_fp(default_fp) if default_ip is not None: impl.get_runtime().set_default_ip(default_ip) # submodule configurations (spec_cfg): env_spec.add('print_preprocessed') env_spec.add('log_level', str) env_spec.add('gdb_trigger') env_spec.add('excepthook') env_spec.add('experimental_real_function') env_spec.add('short_circuit_operators') # compiler configurations (ti.cfg): for key in dir(ti.cfg): if key in ['arch', 'default_fp', 'default_ip']: continue _cast = type(getattr(ti.cfg, key)) if _cast is bool: _cast = None env_comp.add(key, _cast) unexpected_keys = kwargs.keys() if len(unexpected_keys): raise KeyError( f'Unrecognized keyword argument(s) for ti.init: {", ".join(unexpected_keys)}' ) # dispatch configurations that are not in ti.cfg: if not _test_mode: ti.set_gdb_trigger(spec_cfg.gdb_trigger) impl.get_runtime().print_preprocessed = spec_cfg.print_preprocessed impl.get_runtime().experimental_real_function = \ spec_cfg.experimental_real_function impl.get_runtime().short_circuit_operators = \ spec_cfg.short_circuit_operators ti.set_logging_level(spec_cfg.log_level.lower()) if spec_cfg.excepthook: # TODO(#1405): add a way to restore old excepthook ti.enable_excepthook() # select arch (backend): env_arch = os.environ.get('TI_ARCH') if env_arch is not None: ti.info(f'Following TI_ARCH setting up for arch={env_arch}') arch = _ti_core.arch_from_name(env_arch) ti.cfg.arch = adaptive_arch_select(arch, enable_fallback, ti.cfg.use_gles) if ti.cfg.arch == cc: _ti_core.set_tmp_dir(locale_encode(prepare_sandbox())) print(f'[Taichi] Starting on arch={_ti_core.arch_name(ti.cfg.arch)}') # Torch based ndarray on opengl backend allocates memory on host instead of opengl backend. # So it won't work. if ti.cfg.arch == opengl and ti.cfg.ndarray_use_torch: ti.warn( 'Opengl backend doesn\'t support torch based ndarray. Setting ndarray_use_torch to False.' ) ti.cfg.ndarray_use_torch = False if _test_mode: return spec_cfg get_default_kernel_profiler().set_kernel_profiler_mode( ti.cfg.kernel_profiler) # create a new program: impl.get_runtime().create_program() ti.trace('Materializing runtime...') impl.get_runtime().prog.materialize_runtime() impl._root_fb = FieldsBuilder() if not os.environ.get("TI_DISABLE_SIGNAL_HANDLERS", False): impl.get_runtime()._register_signal_handlers() return None
def clear_kernel_profile_info(): """Clear all KernelProfiler records.""" get_default_kernel_profiler().clear_info()