def _collect_torch_profiling_data_if_profiler_enabled(self): if self.autograd_profiler_enabled is False: return if is_pt_1_8(): records = torch.autograd._disable_profiler_legacy() else: records = torch.autograd._disable_profiler() self.autograd_profiler_enabled = False if is_pt_1_7(): function_events = torch.autograd.profiler.EventList( torch.autograd.profiler.parse_event_records(records), use_cuda=self.use_cuda ) elif is_pt_1_8(): function_events = torch.autograd.profiler.EventList( torch.autograd.profiler.parse_legacy_records(records), use_cuda=self.use_cuda ) else: function_events = torch.autograd.profiler.EventList( torch.autograd.profiler.parse_cpu_trace(records), use_cuda=self.use_cuda ) for index, event in enumerate(function_events): if is_pt_1_8(): cpu_time = event.time_range.start + self.start_profiler_time_us duration = event.time_range.elapsed_us() / float(CONVERT_TO_MICROSECS) else: cpu_time = event.cpu_interval.start + self.start_profiler_time_us # event.cpu_interval.start is in microseconds duration = event.cpu_interval.elapsed_us() / float(CONVERT_TO_MICROSECS) # timestamp is expected in seconds for record_trace_events timestamp = cpu_time / float(CONVERT_TO_MICROSECS) self.record_trace_events( training_phase="cpu_functions", op_name=event.name, phase="X", timestamp=timestamp, duration=duration, tid=event.thread, step_num=self.step, device="cpu", ) for k in event.kernels: self.record_trace_events( training_phase="gpu_functions-dev:" + str(k.device), op_name=k.name, phase="X", timestamp=(k.interval.start + self.start_profiler_time_us) / float( CONVERT_TO_MICROSECS ), # timestamp expected is in seconds for record_trace_events duration=k.interval.elapsed_us() / float(CONVERT_TO_MICROSECS), tid=k.device, step_num=self.step, event_name=event.name, device=k.device, start_cpu_thread=event.thread, cpu_thread_start_time=cpu_time, )
def test_pytorch_profiler_rnn(pytorch_profiler_config_parser, out_dir): train_model(out_dir) lt = LocalAlgorithmMetricsReader(out_dir) lt.refresh_event_file_list() events = lt.get_events(0, time.time() * 1000000) print(f"Number of events {len(events)}") if is_pt_1_5(): assert len(events) <= 64 elif is_pt_1_6() or is_pt_1_7(): assert len(events) <= 85 shutil.rmtree(out_dir, ignore_errors=True)
def test_pytorch_profiler(pytorch_profiler_config_parser, out_dir): device = torch.device("cpu") model = Net().to(device) hook = Hook(out_dir=out_dir) hook.register_hook(model) optimizer = optim.SGD(model.parameters(), lr=0.001) train(model, device, optimizer, hook) hook.close() lt = LocalAlgorithmMetricsReader(out_dir) lt.refresh_event_file_list() events = lt.get_events(0, time.time() * 1000000) print(f"Number of events {len(events)}") if is_pt_1_5(): assert len(events) == 386 elif is_pt_1_6(): assert len(events) == 672 elif is_pt_1_7(): assert 220 <= len(events)
def forward_pre_hook(self, module, inputs): # Write the gradients of the past step if the writer is still available. if self.writer is not None: self._close_writers() self._close_tb_writer() if not self.prepared_collections: # at this point we need all collections to be ready # this may not be the case at creation of hook # as user's code after hook might add collections self._prepare_collections() self.prepared_collections = True self._increment_step() ## prepararing for step metrics # last operation can be forward( eval loop is running or multiple forward for example RNN can have multiple call to forward of module) # or last operation can be backward (train backward loop just finished and we are at forward again) # we will log all outstanding forward and backward events self.log_outstanding_timeline_metrics() self.step_event = self._TraceEventData( phase="Step:" + str(self.mode), op_name="Step:" + str(self.mode), start_time=time.time(), dur= 0, # end time of step_event will be updated every time a forward event or backward is called after this pid=os.getpid(), step_num=str(self.mode_steps[self.mode]), ) self.parent_forward_event = self._TraceEventData( phase="Forward", op_name=module._module_name, start_time=time.time(), dur= 0, # end time of parent_forward_event will be updated every time a forward event is called after this pid=os.getpid(), step_num=str(self.mode_steps[self.mode]), ) self.profiler_config_parser.load_config() self.profiler_config_parser.handle_step_start_python_profiling( self.mode, self.step) if (self.autograd_profiler_enabled and not self.profiler_config_parser. config.detailed_profiling_config.disabled): self._collect_torch_profiling_data_if_profiler_enabled() # should we re-enable profiling for this step? if (self.profiler_config_parser.should_save_metrics( MetricsCategory.DETAILED_PROFILING, self.step) and not self.autograd_profiler_enabled): self.autograd_profiler_enabled = True if is_pt_1_5(): torch.autograd._enable_profiler( torch.autograd.ProfilerConfig(self.profiler, False)) self.start_profiler_time_us = time.time( ) * CONVERT_TO_MICROSECS elif is_pt_1_7(): torch.autograd._enable_profiler( torch.autograd.ProfilerConfig(self.profiler, False, False, False)) self.start_profiler_time_us = time.time( ) * CONVERT_TO_MICROSECS elif is_pt_1_8(): torch.autograd._enable_profiler_legacy( torch.autograd.ProfilerConfig(self.profiler, False, False, False, False)) self.start_profiler_time_us = time.time( ) * CONVERT_TO_MICROSECS elif is_pt_1_6(): torch.autograd._enable_profiler( torch.autograd.ProfilerConfig(self.profiler, False, False)) self.start_profiler_time_us = time.time( ) * CONVERT_TO_MICROSECS else: self.logger.warn( f"The detailed profiling using autograd profiler is not supported for torch version " f"{torch.__version__}") self.autograd_profiler_enabled = False if self.is_smdataparallel_profiling: # Stop smdataparallel profiling at end step stop_smdataparallel_profiler( smdataparallel, self.profiler_config_parser.config.local_path) self.is_smdataparallel_profiling = False if self.profiler_config_parser.should_save_metrics( MetricsCategory.SMDATAPARALLEL_PROFILING, self.step): start_smdataparallel_profiler( smdataparallel, self.profiler_config_parser.config.local_path) self.is_smdataparallel_profiling = True if self._get_collections_to_save_for_step(): self._initialize_writers() self._log_params(module) if self.last_saved_step is not None and not self.exported_collections: self.export_collections() self.exported_collections = True self.first_forward_submodule_name = None