def _activation_stats_cb(self, module, inputs, output): """Record the activation sparsity of 'module' This is a callback from the forward() of 'module'. """ def to_np(stats): if isinstance(stats, tuple): return stats[0].detach().cpu().numpy() else: return stats.detach().cpu().numpy() # We get a batch of activations, from which we collect statistics if not output.is_contiguous(): output = output.contiguous() act = output.view(output.size(0), -1) batch_min_list = to_np(torch.min(act, dim=1)).tolist() batch_max_list = to_np(torch.max(act, dim=1)).tolist() batch_mean_list = to_np(torch.mean(act, dim=1)).tolist() # If activation contains only a single element, standard-deviation is meaningless (and std() returns NaN) # Return 0 instead if act.shape[0] == act.numel(): batch_std_list = to_np(torch.zeros(act.shape[0])).tolist() else: batch_std_list = to_np(torch.std(act, dim=1)).tolist() batch_l2_list = to_np(torch.norm(act, p=2, dim=1)).tolist() module.statistics_records['min'].extend(batch_min_list) module.statistics_records['max'].extend(batch_max_list) module.statistics_records['mean'].extend(batch_mean_list) module.statistics_records['std'].extend(batch_std_list) module.statistics_records['l2'].extend(batch_l2_list) module.statistics_records['shape'] = utils.size2str(output)
def purge_locals(self): """purge all local variables""" global max_vars temp_vars = self.temp_variables local_var_names = self.local_var_names num_locals = len(local_var_names) if config.debug and num_locals: local_vars = [ v for k, v in temp_vars.iteritems() if k in local_var_names ] max_vars = max(max_vars, num_locals) temp_mem = sum( sys.getsizeof(v) + (v.nbytes if isinstance(v, np.ndarray) else 0) for v in local_vars) avgsize = sum(v.dtype.itemsize if isinstance(v, np.ndarray) else 0 for v in local_vars) / num_locals if config.log_level in ("functions", "processes"): print(("purging {} variables (max {}), will free {} of memory " "(avg field size: {} b)".format(num_locals, max_vars, size2str(temp_mem), avgsize))) for var in local_var_names: del temp_vars[var]
def run(self): ti.init(kernel_profiler=True, arch=self.arch) print("TestCase[%s.%s.%s]" % (self.func.__name__, arch_name( self.arch), dtype2str[self.test_dtype])) for test_dsize in self.test_dsize_list: print("test_dsize = %s" % (size2str(test_dsize))) self.min_time_in_us.append( self.func(self.arch, self.test_dtype, test_dsize, MemoryBound.basic_repeat_times)) time.sleep(0.2) ti.reset()
def store_period_data(self, period): if config.debug: temp_mem = sum(v.nbytes for v in self.temp_variables.itervalues() if isinstance(v, np.ndarray)) main_mem = self.array.nbytes print("mem used: %s (main: %s / temp: %s)" % (size2str(temp_mem + main_mem), size2str(main_mem), size2str(temp_mem))) # erase all temporary variables which have been computed this period self.temp_variables = {} if period in self.output_rows: raise Exception("trying to modify already simulated rows") else: startrow = self.table.nrows self.array.append_to_table(self.table) self.output_rows[period] = (startrow, self.table.nrows) self.output_index[period] = self.id_to_rownum self.table.flush()
def store_period_data(self, period): if config.debug and config.log_level in ("functions", "processes"): temp_mem = sum(v.nbytes for v in self.temp_variables.itervalues() if isinstance(v, np.ndarray)) main_mem = self.array.nbytes print("mem used: %s (main: %s / temp: %s)" % (size2str(temp_mem + main_mem), size2str(main_mem), size2str(temp_mem))) # erase all temporary variables which have been computed this period self.temp_variables = {} if period in self.output_rows: raise Exception("trying to modify already simulated rows") if self.table is not None: startrow = self.table.nrows self.array.append_to_table(self.table) self.output_rows[period] = (startrow, self.table.nrows) self.flush_index(period) self.table.flush()
def get_results_dict(self): results_dict = {} for i in range(len(self._test_dsize_list)): dsize = self._test_dsize_list[i] repeat = scaled_repeat_times(self._arch, dsize, MemoryBound.basic_repeat_times) elapsed_time = self._min_time_in_us[i] item_name = size2str(dsize).replace('.0', '') item_dict = { 'dsize_byte': dsize, 'repeat': repeat, 'elapsed_time_ms': elapsed_time } results_dict[item_name] = item_dict return results_dict
def purge_locals(self): """purge all local variables""" global max_vars temp_vars = self.temp_variables local_var_names = self.local_var_names num_locals = len(local_var_names) if config.debug and num_locals: local_vars = [v for k, v in temp_vars.iteritems() if k in local_var_names and isinstance(v, np.ndarray)] max_vars = max(max_vars, num_locals) temp_mem = sum(v.nbytes for v in local_vars) avgsize = sum(v.dtype.itemsize for v in local_vars) / num_locals print(("purging {} variables (max {}), will free {} of memory " "(avg field size: {} b)".format(num_locals, max_vars, size2str(temp_mem), avgsize))) for var in local_var_names: del temp_vars[var]
def update_record(record, tensor): if tensor.dtype not in [ torch.float16, torch.float32, torch.float64 ]: # Mean function only works for float tensors tensor = tensor.to(torch.float32) if not tensor.is_contiguous(): tensor = tensor.contiguous() act = tensor.view(tensor.size(0), -1) numel = act.numel() if self.collecting_second_pass: record['b'] = update_b(act, record['b'], record['mean'], record['total_numel']) record['std'] = update_std(act, record['std'], record['mean'], record['total_numel']) record['total_numel'] += numel return # In the general case, the average min/max that we're collecting are averages over the per-sample # min/max values. That is - we first calculate the min/max for each sample in the batch, then average # over that. # But - If each sample contains just a single value, then such a per-sample calculation we'll result in # avg_min = avg_max. So in that case we "revert" to calculating "global" values, for the whole batch, # instead of per-sample values dim = 0 if numel == act.shape[0] else 1 min_per_sample = act.min(dim=dim)[0] max_per_sample = act.max(dim=dim)[0] record['min'] = min(record['min'], min_per_sample.min().item()) record['max'] = max(record['max'], max_per_sample.max().item()) record['avg_min'] = update_running_mean(min_per_sample, record['avg_min'], record['total_numel']) record['avg_max'] = update_running_mean(max_per_sample, record['avg_max'], record['total_numel']) new_mean = update_running_mean(act, record['mean'], record['total_numel']) record['mean'] = new_mean record['total_numel'] += numel if not record['shape']: record['shape'] = utils.size2str(tensor)
def run_guarded(self, simulation, const_dict): global max_vars periods = const_dict['periods'] idx = const_dict['period_idx'] period = periods[idx] print() for k, v in self.subprocesses: # print(" *", end=' ') if k is not None: print(k, end=' ') utils.timed(v.run_guarded, simulation, const_dict) # print "done." simulation.start_console(v.entity, period, const_dict['__globals__']) if config.autodump is not None: self._autodump(period) if config.autodiff is not None: self._autodiff(period) # purge all local variables temp_vars = self.entity.temp_variables all_vars = self.entity.variables local_var_names = set(temp_vars.keys()) - set(all_vars.keys()) num_locals = len(local_var_names) if config.debug and num_locals: local_vars = [v for k, v in temp_vars.iteritems() if k in local_var_names and isinstance(v, np.ndarray)] max_vars = max(max_vars, num_locals) temp_mem = sum(v.nbytes for v in local_vars) avgsize = sum(v.dtype.itemsize for v in local_vars) / num_locals print(("purging {} variables (max {}), will free {} of memory " "(avg field size: {} b)".format(num_locals, max_vars, utils.size2str(temp_mem), avgsize))) for var in local_var_names: del temp_vars[var]
def purge_locals(self): """purge all local variables""" global max_vars temp_vars = self.temp_variables local_var_names = self.local_var_names num_locals = len(local_var_names) if config.debug and num_locals: local_vars = [v for k, v in temp_vars.iteritems() if k in local_var_names] max_vars = max(max_vars, num_locals) temp_mem = sum(sys.getsizeof(v) + (v.nbytes if isinstance(v, np.ndarray) else 0) for v in local_vars) avgsize = sum(v.dtype.itemsize if isinstance(v, np.ndarray) else 0 for v in local_vars) / num_locals if config.log_level in ("functions", "processes"): print(("purging {} variables (max {}), will free {} of memory " "(avg field size: {} b)".format(num_locals, max_vars, size2str(temp_mem), avgsize))) for var in local_var_names: del temp_vars[var]