def log_stacktraces(): if LoggedStackTraces is not None and LoggedStackTraces.num_to_print() > 0: ss = StringIO() # stack[-1] = Call to "traceback.format_stack()" # stack[-2] = Call to "return log_call(...)" # LoggedStackTraces.print(ss, skip_last=2, indent=0) LoggedStackTraces.print(ss, skip_last=1, indent=0) logger.info(ss.getvalue().rstrip())
def report_calls_per_sec(name, func, iterations): start_sec = time.time() for i in range(iterations): func() end_sec = time.time() calls_per_sec = (end_sec - start_sec) / float(iterations) logger.info( "> {name}: {calls_per_sec} calls/sec, measured over {iters} calls". format(name=name, calls_per_sec=calls_per_sec, iters=iterations))
def post_parse(self, bench_name): assert ( self.has_error or self.no_cuda_calls_expected ) or self.header is not None if self.no_cuda_calls_expected: logger.info("> Skip pretty cuda profile; didn't see any CUDA calls in {path}".format(path=self.profile_path(bench_name))) self.skip = True if self.has_error: logger.info("> Skip pretty cuda profile; WARNING: saw an ERROR in {path}".format(path=self.profile_path(bench_name))) self.skip = True
def rlscope_is_development_mode(): """ Install this script on our PATH in production mode. DON'T install this script on our PATH in development mode. """ logger.info( textwrap.dedent("""\ NOTE: this is a 'dummy' executable that tells us that RL-Scope was installed in development mode (i.e., "python setup.py develop") instead of from a wheel file. """.rstrip()))
def rlscope_pip_installed(): """ Install this script on our PATH in production mode. DON'T install this script on our PATH in development mode. """ logger.info( textwrap.dedent("""\ NOTE: this is a 'dummy' executable that tells us that RL-Scope was installed in production mode from a wheel file (i.e., "pip install rlscope==... -f ..."). """.rstrip()))
def _wait_for_workers(self): while True: self._join_finished_workers() if self.max_workers == ForkedProcessPool.UNLIMITED or len( self.active_workers) < self.max_workers: break if self.debug: logger.info("> Pool.sleep for {sec} sec".format( sec=ForkedProcessPool.WAIT_WORKERS_TIMEOUT_SEC)) time.sleep(ForkedProcessPool.WAIT_WORKERS_TIMEOUT_SEC)
def record_overhead_event(overhead_type, num_events): if py_config.DEBUG and py_config.DEBUG_RLSCOPE_LIB_CALLS: logger.info( _log_api_call_msg('record_overhead_event', overhead_type, num_events)) ret = _so.rlscope_record_overhead_event(_as_c_string(overhead_type), c_int(num_events)) if ret != TF_OK: raise RLScopeLibraryError(ret) return ret
def stop(self): if self.stopped: logger.info("> Already ran UnitTestDataDumper.stop; ignoring.") return assert not self.stopped # self.unwrapped_prof.stop() time_t = rlscope_timer.now_us() # self.old_stop() self.stop_t = time_t self._add_time(self.phase_end, self.cur_phase, time_t) self.stopped = True
def set_max_operations(operation, num_pushes): if py_config.DEBUG and py_config.DEBUG_RLSCOPE_LIB_CALLS: logger.info( _log_api_call_msg('set_max_operations', operation, num_pushes)) ret = _so.rlscope_set_max_operations( _as_c_string(operation), c_int(num_pushes), ) if ret != TF_OK: raise RLScopeLibraryError(ret) return ret
def unwrap_tensorflow_v2(): if py_config.DEBUG_WRAP_CLIB: logger.info( "> RL-Scope: Unwrapping module=tensorflow.python.pywrap_tfe") wrap_util.unwrap_lib(CFuncWrapper, import_libname='tensorflow.python.pywrap_tfe', wrap_libname='tensorflow.python.pywrap_tfe') wrap_util.unwrap_lib( CFuncWrapper, import_libname='tensorflow.python.client.pywrap_tf_session', wrap_libname='tensorflow.python.client.pywrap_tf_session')
def has_next_pass(): if py_config.DEBUG and py_config.DEBUG_RLSCOPE_LIB_CALLS: logger.info(_log_api_call_msg('has_next_pass')) has_next_pass = c_int(0) ret = _so.rlscope_has_next_pass(ctypes.byref(has_next_pass)) if ret != TF_OK: raise RLScopeLibraryError(ret) value = bool(has_next_pass.value) if py_config.DEBUG and py_config.DEBUG_RLSCOPE_LIB_CALLS: if value: logger.info(f"[PY_RLSCOPE_LIB] returned: {value}") return value
def wrap_tensorflow_v1(category=constants.CATEGORY_TF_API, debug=False): if py_config.DEBUG_WRAP_CLIB: logger.info( "> RL-Scope: Wrapping module=tensorflow call with category={category} annotations" .format(category=category, )) success = wrap_util.wrap_lib(CFuncWrapper, import_libname='tensorflow.pywrap_tensorflow', wrap_libname='tensorflow.pywrap_tensorflow', wrapper_args=(category, DEFAULT_PREFIX, debug), func_regex='^TF_') assert success
def __init__(self, func, category, prefix=DEFAULT_PREFIX, debug=False): # NOTE: to be as compatible as possible with intercepting existing code, # we forward setattr/getattr on this object back to the func we are wrapping # (e.g. func might be some weird SWIG object). super().__setattr__('func', func) super().__setattr__('prefix', prefix) super().__setattr__('category', category) super().__setattr__('debug', debug) name = self.wrapper_name(func.__name__) if py_config.DEBUG_WRAP_CLIB: logger.info("> call.name = {name}".format(name=name))
def wrap_module(module, category, debug=False, print_summary=True, **kwargs): if py_config.DEBUG_WRAP_CLIB: logger.info( "> RL-Scope: Wrapping module={mod} call with category={category} annotations" .format( mod=module, category=category, )) wrap_util.wrap_module(CFuncWrapper, module, wrapper_args=(category, DEFAULT_PREFIX, debug), print_summary=print_summary, **kwargs)
def unwrap_module(FuncWrapperKlass, module, print_summary=False): num_unwrapped = 0 for name in dir(module): unwrapped = unwrap_func(FuncWrapperKlass, module, name) if unwrapped: num_unwrapped += 1 if print_summary: logger.info( textwrap.dedent("""\ RL-Scope unwrapped {n} functions in {module} after tracing. """).format( n=num_unwrapped, module=module, ).rstrip())
def set_metadata(directory, process_name, machine_name, phase): if py_config.DEBUG and py_config.DEBUG_RLSCOPE_LIB_CALLS: logger.info( _log_api_call_msg('set_metadata', directory, process_name, machine_name, phase)) ret = _so.rlscope_set_metadata( _as_c_string(directory), _as_c_string(process_name), _as_c_string(machine_name), _as_c_string(phase), ) if ret != TF_OK: raise RLScopeLibraryError(ret) return ret
def _sample_cpu_total_resident_memory_bytes(self, procs): mem_infos = [] for proc in procs: try: mem_info = proc.memory_info() mem_infos.append(mem_info) except psutil.NoSuchProcess as e: if self.debug: logger.info( ("Tried to sample resident memory from proc={proc}, " "but it looks like it exited; skipping").format( proc=proc)) total_resident_memory_bytes = np.sum(m.rss for m in mem_infos) return total_resident_memory_bytes
def _parse_num_calls(self, bench_name): if self.is_dqn: data = self.load_microbench(bench_name) # bench_data = data[get_nvprof_name(self.bench_name)] bench_data = data[self.bench_name] num_calls = compute_num_calls_dqn(bench_data) elif 'num_calls' in self.config: num_calls = compute_num_calls(self.config) else: num_calls = 1 logger.info("> num_calls = {num_calls}".format( num_calls=num_calls)) return num_calls
def expr_already_ran(to_file, debug=False): if not _e(to_file): return False with open(to_file) as f: for lineno, line in enumerate(f, start=1): line = line.rstrip() if re.search(r'{success_line}'.format(success_line=EXPERIMENT_SUCCESS_LINE), line): if debug: logger.info("Saw \"{success_line}\" in {path} @ line {lineno}; skipping.".format( success_line=EXPERIMENT_SUCCESS_LINE, lineno=lineno, path=to_file)) return True return False
def run_cpp(self): args = self.args if shutil.which(py_config.CPP_UNIT_TEST_CMD) is None: logger.error( "Didn't find C++ test binary ({bin}) on PATH; have you run build_rlscope yet?" .format(bin=py_config.CPP_UNIT_TEST_CMD, )) sys.exit(1) cmd = [py_config.CPP_UNIT_TEST_CMD] if args.debug: cmd = ['gdb', '--args'] + cmd print_cmd(cmd) proc = subprocess.run(cmd) if proc.returncode != 0: logger.error("RL-Scope C++ unit tests failed") sys.exit(proc.returncode) logger.info("RL-Scope C++ unit tests PASSED")
def dump_machine_util(directory, trace_id, machine_util, debug): """ NOTE: Run in a separate thread/process; should NOT perform state modifications. """ # Q: Do both windows and linux multiprocessing.Process inherit CPU affinity...? if type(machine_util) != MachineUtilization: machine_util_str = machine_util machine_util = MachineUtilization() machine_util.ParseFromString(machine_util_str) trace_path = get_trace_path(directory, trace_id) with open(trace_path, 'wb') as f: f.write(machine_util.SerializeToString()) if debug: logger.info("> Dumped @ {path}".format(path=trace_path))
def wrap_entire_module(import_libname, category, debug=False, **kwargs): if py_config.DEBUG_WRAP_CLIB: logger.info( "> RL-Scope: Wrapping module={mod} call with category={category} annotations" .format( mod=import_libname, category=category, )) exec("import {import_lib}".format(import_lib=import_libname)) wrap_libname = import_libname lib = eval("{wrap_lib}".format(wrap_lib=wrap_libname)) assert lib is not None if import_libname in sys.modules: del sys.modules[import_libname] lib_wrapper = LibWrapper(lib, category, debug, **kwargs) # "import pybullet" will now return LibWrapper(pybullet) sys.modules[import_libname] = lib_wrapper
def _launch_utilization_sampler(self): util_cmdline = ['rls-util-sampler'] util_cmdline.extend(['--rlscope-directory', self.rlscope_directory]) # Sample memory-usage of the entire process tree rooted at ths process. util_cmdline.extend(['--rlscope-root-pid', str(os.getpid())]) if self.debug: util_cmdline.append('--rlscope-debug') # We make sure nvidia-smi runs fast at the VERY START of training # (to avoid false alarms when training is busy with the CPU/GPU). # util_cmdline.append('--skip-smi-check') if self.debug: log_cmd(util_cmdline) self.proc = subprocess.Popen(util_cmdline) self.proc_pid = self.proc.pid logger.info( "RL-Scope: CPU/GPU utilization sampler running @ pid={pid}".format( pid=self.proc_pid))
def submit(self, name, fn, *args, sync=False, **kwargs): if sync: return fn(*args, **kwargs) self._wait_for_workers() # proc = multiprocessing.Process(target=fn, name=name, args=args, kwargs=kwargs) # def fn_wrapper(*args, **kwargs): # if self.cpu_affinity is not None: # proc = psutil.Process(pid=os.getpid()) # proc.cpu_affinity(self.cpu_affinity) # return fn(*args, **kwargs) # proc = MyProcess(target=fn_wrapper, name=name, args=args, kwargs=kwargs) proc = MyProcess(target=fn, name=name, args=args, kwargs=kwargs) proc.start() if self.debug: logger.info( "> Pool(name={name}): submit pid={pid}, proc={proc}".format( name=self.name, pid=proc.pid, proc=proc)) self.active_workers.append(proc)
def _terminate_utilization_sampler(self, warn_terminated=True): assert self.proc_pid is not None logger.info( "RL-Scope: terminating CPU/GPU utilization sampler @ pid={pid}". format(pid=self.proc_pid)) try: proc = psutil.Process(self.proc_pid) except psutil.NoSuchProcess as e: if warn_terminated: logger.info( "RL-Scope: Warning; tried to terminate utilization sampler @ pid={pid} but it wasn't running" .format(pid=self.proc_pid)) return proc.terminate() self.proc = None self.proc_pid = None
def _generate_rlscope_init_py(self): with open(RLSCOPE_INIT_PY, 'w') as f: self.write_py_header(f) self.write(f, """ # RL-Scope pip package version: # $ pip install rlscope=={version} -f https://uoft-ecosystem.github.io/rlscope/whl __version__ = "{version}" __all__ = [] from rlscope import \\ version __all__.extend([ 'version', ]) """.format(version=self.rlscope_version, ), lstrip=False) logger.info("Output {path}".format(path=RLSCOPE_INIT_PY))
def add_experiment_config(self, path): """ add_fields(path) We expect to find experiment_config.json where the machine_util.*.proto files live. :param path: :return: """ assert is_training_progress_file(path) directory = _d(path) path = experiment.experiment_config_path(directory) if not _e(path): if self.debug: logger.info( "Didn't find {path}; skip adding experiment columns to csv" .format(path=path)) return None data = experiment.load_experiment_config(directory) return data
def wrap_lib(FuncWrapperKlass, import_libname, wrapper_args=tuple(), func_regex=None, wrap_libname=None): if wrap_libname is not None: assert import_libname == wrap_libname # wrapper_args = (category, prefix) if wrap_libname is None: wrap_libname = import_libname lib = None try: lib = importlib.import_module(import_libname) assert lib is not None # import tensorflow.pywrap_tensorflow if py_config.DEBUG_WRAP_CLIB: logger.info(' ... success') except (ImportError, NameError) as e: # Failed to import library; skip wrapping the library. logger.info( ' ... FAILED: cannot wrap module {lib}; stacktrace:'.format( lib=wrap_libname)) logger.info(e) return False wrap_module(FuncWrapperKlass, lib, wrapper_args, func_regex=func_regex) return True
def run(self): i = 0 for path in self.each_file(): if self.debug: logger.info("path[{i}] = {path}".format(i=i, path=path)) md = self.read_metadata(path) if md is None: logger.info( "WARNING: didn't find any metadata in {path}; SKIP.". format(path=path)) continue if self.debug: logger.info("> index: {path}".format(path=path)) entry = self.lookup_entry(md, path) relpath = os.path.relpath(path, self.directory) if self.is_venn_js_path(path): entry['venn_js_path'] = relpath elif self.is_overlap_js_path(path): entry['overlap_js_path'] = relpath elif self.is_js_path(path): entry['js_path'] = relpath else: raise NotImplementedError self.dump_plot_index_py()
def __getattr__(self, name): """ __getattr__ gets called is LibWrapper.name didn't exist. In that case, they're probably trying to call a function from the .so lib (self.lib). We wrap the function with CFuncWrapper(lib[name]), and set it as: LibWrapper[name] = CFuncWrapper(lib[name]) Subsequent calls to lib[name] WON'T come through here (since LibWrapper[name] is set). :param name: Name of a .so function they're trying to call (probably, could be constant though). :return: """ func = getattr(self.lib, name) if not callable(func): return func if self.debug: logger.info("Wrap: {name}".format(name=name)) func_wrapper = CFuncWrapper(func, self.category, self.prefix, self.debug) setattr(self, name, func_wrapper) return func_wrapper