def _init_quant_env(self, quant_mode, output_dir, quant_strategy): if isinstance(quant_mode, int): NndctScreenLogger().warning(f"quant_mode will not support integer value in future version. It supports string values 'calib' and 'test'.") qmode = quant_mode elif isinstance(quant_mode, str): if quant_mode == 'calib': qmode = 1 elif quant_mode == 'test': qmode = 2 else: NndctScreenLogger().error(f"quant_mode supported values are 'calib' and 'test'. Change it to 'calib' as calibration mode") qmode = 1 else: NndctScreenLogger().error(f"quant_mode supported values are string 'calib' and 'test'. Change it to 'calib' as calibration mode") qmode = 1 if NndctOption.nndct_quant_mode.value > 0: qmode = NndctOption.nndct_quant_mode.value if qmode == 1: NndctScreenLogger().info(f"Quantization calibration process start up...") elif qmode == 2: NndctScreenLogger().info(f"Quantization test process start up...") quantizer = TORCHQuantizer.create_from_strategy(qmode, output_dir, quant_strategy) return quantizer, qmode
def deploy_model(self): if not self._qat_proc: NndctScreenLogger().warning( f"Only quant aware training process has deployable model.") return NndctScreenLogger().info(f"=>Get deployable module.") return self.processor.deploy_model()
def build_torch_graph(self, graph_name, module, input_args, train=False): self._module = module NndctScreenLogger().info("Start to trace model...") fw_graph, params = self._trace_graph_from_model(input_args, train) NndctScreenLogger().info("Finish tracing.") self._node_kinds = { node.kind().split(":")[-1] for node in fw_graph.nodes() } if NndctOption.nndct_parse_debug.value >= 1: NndctDebugLogger.write(f"jit graph:\n{fw_graph}") NndctDebugLogger.write( f"\nparsing nodes types:\n{self._node_kinds}\n") raw_graph, raw_params = self._build_raw_graph(graph_name, fw_graph, params) if NndctOption.nndct_parse_debug.value >= 2: NndctDebugLogger.write(f"\ntorch raw graph:\n{raw_graph}") opt_graph = self._opt_raw_graph(raw_graph) if NndctOption.nndct_parse_debug.value >= 2: NndctDebugLogger.write(f"\ntorch opt graph:\n{raw_graph}") if NndctOption.nndct_parse_debug.value >= 3: self._check_stub_topology(opt_graph) return opt_graph, raw_params
def _check_calibration_completion(self): ret = True # Check node output tensors for node in self.Nndctgraph.nodes: if self.configer.is_node_quantizable( node, self.lstm) and node.in_quant_part: qout = self.configer.quant_output(node.name).name bnfp = self.get_quant_config(qout, False) if bnfp[1] is None: if node.op.type not in [NNDCT_OP.SIGMOID, NNDCT_OP.TANH]: NndctScreenLogger().warning( f'Node ouptut tensor is not quantized: {node.name} type: {node.op.type}' ) ret = False # Check node input tensors for item in self._QuantInfo['input']: bnfp = self._QuantInfo['input'][item] if bnfp[1] is None: NndctScreenLogger().warning( f'Input tensor is not quantized: {item}') ret = False # Check node parameters for item in self._QuantInfo['param']: bnfp = self._QuantInfo['param'][item] if bnfp[1] is None: NndctScreenLogger().warning( f'Parameter tensor is not quantized: {item}') ret = False return ret
def deploy(self, run_fn, run_args, fmt='xmodel'): NndctScreenLogger().info(f'Quantized model depoyment begin:') # export quantized model # how to handle batch size must be 1 # check function input if fmt not in ['xmodel', 'onnx', 'torch_script']: NndctScreenLogger().error( f"Parameter deploy only can be set 'xmodel', 'onnx' and 'torch_script'." ) # set quantizer status and run simple evaluation self.quantizer.quant_mode = 2 register_output_hook(self.quantizer.quant_model, record_once=True) set_outputs_recorder_status(self.quantizer.quant_model, True) if self.quantizer.fast_finetuned: self.advanced_quant_setup() run_fn(*run_args) # export quantized model if fmt == 'xmodel': self.export_xmodel(self.quantizer.output_dir, deploy_check=False) elif fmt == 'onnx': self.export_onnx_model(self.quantizer.output_dir, verbose=True) elif fmt == 'torch_script': self.export_traced_torch_script(self.quantizer.output_dir, verbose=True) NndctScreenLogger().info(f'Quantized model depoyment end.')
def prepare_quantizable_module( module: torch.nn.Module, input_args: Union[torch.Tensor, Sequence[Any]], export_folder: str, state_dict_file: Optional[str] = None, quant_mode: int = 1, device: torch.device = torch.device("cuda") ) -> Tuple[torch.nn.Module, Graph]: nndct_utils.create_work_dir(export_folder) if isinstance(state_dict_file, str): state_dict = torch.load(state_dict_file) module.load_state_dict(state_dict) export_file = os.path.join(export_folder, module._get_name() + TorchSymbol.SCRIPT_SUFFIX) # switch to specified device module, input_args = to_device(module, input_args, device) # parse origin module to graph NndctScreenLogger().info(f"=>Parsing {module._get_name()}...") graph = parse_module(module, input_args) NndctScreenLogger().info( f"=>Quantizable module is generated.({export_file})") # recreate quantizable module from graph quant_module = recreate_nndct_module(graph, True, export_file).to(device) quant_module.train(mode=module.training) # hook module with graph connect_module_with_graph(quant_module, graph) return quant_module, graph
def cache_net_inpouts(self, run_fn, run_args): total_m, *_, available_m = list( map(lambda x: x / 1024, map(int, os.popen('free -t -m').readlines()[1].split()[1:]))) NndctScreenLogger().info( f"Mem status(total mem: {total_m:.2f}G, available mem: {available_m:.2f}G)." ) cache_layers = [] monitor_layers = [] batch_layers = [] for node in self.graph.nodes: # if node.op.type == NNDCT_OP.INPUT or node in end_nodes: if node.op.type == NNDCT_OP.INPUT or node in self._last_quant_nodes: cache_layers.append(node.module) elif self.quantizer.configer.is_conv_like(node): monitor_layers.append(node.module) if not batch_layers: batch_layers.append(node.module) monitor_handlers = self.hook_memory_monitor(monitor_layers) batch_handlers = self.hook_batch_size(batch_layers) cache_handlers = self.hook_cache_output(cache_layers, monitor_mem=True) with torch.no_grad(): run_fn(*run_args) # memory statistics total_memory_cost = 0.0 for layer in cache_layers: total_memory_cost += self._mem_count[layer] del self._mem_count[layer] NndctScreenLogger().info( f"Memory cost by fast finetuning is {total_memory_cost:.2f} G.") if total_memory_cost > 0.8 * available_m: NndctScreenLogger().warning( f"There is not enought memory for fast finetuning and this process will be ignored!.Try to use a smaller calibration dataset." ) return self.clean_hooks(monitor_handlers + cache_handlers + batch_handlers) net_inputs = [] for node in self.input_nodes: cached_net_input = [ out for out in self.cached_outputs[node.module] ] net_inputs.append(cached_net_input) del self.cached_outputs[node.module] net_outputs = {} for node in self._last_quant_nodes: cached_net_output = [ out for out in self.cached_outputs[node.module] ] net_outputs[node.module] = cached_net_output del self.cached_outputs[node.module] torch.cuda.empty_cache() return net_inputs, net_outputs
def features_check(self): if self.fast_finetuned and not self._finetuned_para_loaded: NndctScreenLogger().warning( f'Fast finetuned parameters are not loaded. \ Call load_ft_param to load them.') if self.bias_corrected and not self._bias_corr_loaded: NndctScreenLogger().warning( f'Bias correction file is not loaded. Set \ command line option \"--nndct_param_corr\" to load it.')
def quantize(self, run_fn, run_args, ft_run_args): NndctScreenLogger().info(f'Model quantization calibration begin:') # calibration self.quantizer.quant_mode = 1 if ft_run_args is not None: self.finetune(run_fn, ft_run_args) self.quantizer.fast_finetuned = True run_fn(*run_args) self.quantizer.export_quant_config() NndctScreenLogger().info(f'Model quantization calibration end.')
def test(self, run_fn, run_args): NndctScreenLogger().info(f'Quantized model test begin:') # test and print log message self.quantizer.quant_mode = 2 if self.quantizer.fast_finetuned: self.advanced_quant_setup() log_str = run_fn(*run_args) NndctScreenLogger().info( f'Quantized model evaluation returns metric:\n {log_str}') NndctScreenLogger().info(f'Quantized model end.')
def _show_partition_result_on_screen(self, graph, output_dir, verbose_level): # pd.set_option("display.max_columns", None) # pd.set_option("display.max_rows", None) # pd.set_option("max_colwidth", 100) # pd.set_option("display.width", 5000) target_name = DPUTargetHelper.get_name( self._target.get_devices()[0].get_legacy_dpu_target()) if verbose_level == 0: return elif verbose_level == 1: d = [] for node in graph.nodes: if node.op.type in [NNDCT_OP.RETURN, NNDCT_OP.INPUT]: continue if node.target_device is not None: if node.target_device.get_device_type() == DeviceType.CPU: d.append([ node.name, node.op.type, node.target_device.get_filter_message() ]) if d: # df = pd.DataFrame(d, columns=["Node Name", "Op Type", "Hardware Constraints"]) NndctScreenLogger().info( f"The operators assigned to the CPU are as follows(see more details in '{os.path.join(output_dir, f'inspect_{target_name}.txt')}'):" ) # print(df) print( tabulate(d, headers=[ "node name", "op Type", "hardware constraints" ])) else: NndctScreenLogger().info( f"All the operators are assigned to the DPU(see more details in '{os.path.join(output_dir, f'inspect_{target_name}.txt')}')" ) elif verbose_level == 2: d = [] for node in graph.nodes: if node.op.type in [NNDCT_OP.RETURN, NNDCT_OP.INPUT]: continue if node.target_device is not None: d.append([ node.name, node.op.type, node.target_device.get_device_type().value ]) # df = pd.DataFrame(d, columns=["Node_Name", "Op_Type", "Assgined_Device"]) NndctScreenLogger().info( f"Operator device allocation table(see more details in '{os.path.join(output_dir, 'inspect.txt')}'):" ) # print(df) print( tabulate(d, headers=["node name", "op type", "assgined device"]))
def dump_xmodel(output_dir="quantize_result", deploy_check=False): r"""converts module to xmodel for deployment compilation only works when quantm model = 2. The xmodel and some checking data will be generated under work dir. Args: deploy_check(bool): if true, can dump blobs and parameters of model for deployment verification Returns: None """ quantizer = GLOBAL_MAP.get_ele(NNDCT_KEYS.QUANTIZER) if quantizer and quantizer.quant_mode > 1: nndct_utils.create_work_dir(output_dir) # compile to xmodel compiler = CompilerFactory.get_compiler("xmodel") NndctScreenLogger().info("=>Converting to xmodel ...") deploy_graphs = get_deploy_graph_list(quantizer.quant_model, quantizer.Nndctgraph) depoly_infos = compiler.get_deloy_graph_infos(quantizer, deploy_graphs) for depoly_info in depoly_infos: try: compiler.do_compile(depoly_info.dev_graph, quant_config_info=depoly_info.quant_info, output_file_name=os.path.join( output_dir, depoly_info.dev_graph.name)) except AddXopError as e: NndctScreenLogger().error( f"Failed convert graph '{depoly_info.dev_graph.name}' to xmodel({str(e)})." ) # dump data for accuracy check if deploy_check: NndctScreenLogger().info( f"=>Dumping '{depoly_info.dev_graph.name}'' checking data..." ) checker = DeployChecker(output_dir_name=output_dir) checker.update_dump_folder(f"{depoly_info.dev_graph.name}") checker.dump_nodes_output( depoly_info.dev_graph, depoly_info.quant_info, round_method=quantizer.quant_opt['round_method'], select_batch=False) NndctScreenLogger().info( f"=>Finsh dumping data.({checker.dump_folder})") set_outputs_recorder_status(quantizer.quant_model, False)
def load_param(self): if self.quant_mode == 2: NndctScreenLogger().info( f"=>Loading quant model parameters.({self.param_file})") path = pathlib.Path(self.param_file) if not (path.exists() and path.is_file()) or not self.fast_finetuned: NndctScreenLogger().error( f"Fast finetuned parameter file does not exist. \ Please check calibration with fast finetune is done or not.") exit(2) self.quant_model.load_state_dict(torch.load(self.param_file)) self._finetuned_para_loaded = True
def __init__(self, graph, model_type, bitw, bita, lstm, mix_bit, custom_quant_ops=None): super().__init__(graph, model_type) self._QuantGroups = None if custom_quant_ops: for op in custom_quant_ops: if op not in self.QUANTIZABLE_OPS: self.QUANTIZABLE_OPS.append(op) NndctScreenLogger().info( f"Convert `{op}` to quantizable op.") self.group_graph() quant_strategy = create_quant_strategy(bitw, bita, lstm, mix_bit) self._quant_info = quant_strategy.create_quant_config(self) if NndctOption.nndct_stat.value > 0: print('Quantization groups:') pp.pprint(self._QuantGroups) pp.pprint(self._quant_info) # check groups, only permit one quantizable node in one group in quant part ignored_list = [NNDCT_OP.SHAPE] for k, v in self._QuantGroups.items(): if len(v) == 1: if len(self.Nndctgraph.parents(k)) == 0: break findQuantizableNode = False isIgnored = False type_list = self.LSTM_QUANTIZABLE_OPS if lstm else self.QUANTIZABLE_OPS for n in v: node = self.get_Nndctnode(node_name=n) if node.op.type in type_list: if findQuantizableNode: NndctScreenLogger().warning( f'Multiple quantizable node is found in group:') NndctScreenLogger().warning(f'{v}') else: findQuantizableNode = True elif node.op.type in ignored_list: isIgnored = True if not findQuantizableNode and not isIgnored: NndctScreenLogger().warning( f'No quantizable node is found in group, confirm no numerical calculation in the nodes:' ) NndctScreenLogger().warning(f'{v}')
def finetune_v2(self, run_fn, run_args): # check status if self.quantizer.quant_mode == 2: NndctScreenLogger().warning(f"Finetune function will be ignored in test mode!") return # parameter finetuning with AdaQuant(processor=self): # calibration to get a set of quantization steps NndctScreenLogger().info(f"=>Preparing data for fast finetuning module parameters ...") with NoQuant(): net_inputs, net_outputs = self.cache_net_inpouts(run_fn, run_args) NndctScreenLogger().info(f"=>Find initial quantization steps for fast finetuning...") self.calibrate(run_fn, run_args) NndctScreenLogger().info(f"=>Fast finetuning module parameters for better quantization accuracy...") self.setup_test() device = GLOBAL_MAP.get_ele(NNDCT_KEYS.QUANT_DEVICE) intial_net_loss = self.calc_net_loss(net_inputs, net_outputs, device) layer_act_pair = self.collect_layer_act_pair() finetune_group = [] for qmod, fmod in zip(self._quant_model.modules(), self._float_model.modules()): if hasattr(qmod, "node"): if (self.quantizer.configer.is_node_quantizable(qmod.node, False) and len(qmod.node.op.params) > 0): finetune_group.append([qmod.node, fmod]) net_loss = intial_net_loss for idx, (qnode, fmod) in tqdm(enumerate(finetune_group), total=len(finetune_group)): is_cached = self.is_cached(qnode, len(net_inputs[0])) if (is_cached and idx < len(finetune_group) / 2) or (not is_cached): need_cache = False else: need_cache = True net_loss = self.optimize_layer_v2(qnode, fmod, layer_act_pair, net_inputs, net_outputs, net_loss, device, need_cache) print(f"%%%%%%%%%%%%%%%%% final opt net loss:{net_loss.avg}") # print(f"{qnode.name}({need_cache}):{net_loss}") NndctScreenLogger().info(f"=>Export fast finetuned parameters ...") # export finetuned parameters self.quantizer.export_param()
def export_quant_config(self, export_file=None, adjust_pos=True): if NndctOption.nndct_param_corr.value > 0: if self.quant_mode == 1: # gather bias correction, how to get nn module objec? for node in self.Nndctgraph.nodes: if node.op.type in [ NNDCT_OP.CONV1D, NNDCT_OP.CONV2D, NNDCT_OP.CONVTRANSPOSE2D, NNDCT_OP.DEPTHWISE_CONV2D, NNDCT_OP.DENSE, NNDCT_OP.DEPTHWISE_CONVTRANSPOSE2D ]: if node.module.bias is not None: self.bias_corr[node.name] = node.module.bias_corr() # export bias correction torch.save(self.bias_corr, self.bias_corr_file) self.bias_corrected = True # export quant steps file_name = export_file or self.export_file if isinstance(file_name, str): NndctScreenLogger().info(f"=>Exporting quant config.({file_name})") if adjust_pos: self.organize_quant_pos() with open(file_name, 'w') as f: f.write(nndct_utils.to_jsonstr(self.quant_config))
def __init__(self, quant_mode: int, output_dir: str, quant_config: Dict[str, Union[str, int, bool]], is_lstm=False): super().__init__(quant_mode, output_dir, quant_config, is_lstm) self._quant_model = None self._bias_corr_loaded = False self._finetuned_para_loaded = False if NndctOption.nndct_param_corr.value > 0: if self.quant_mode == 2: path = pathlib.Path(self.bias_corr_file) if not (path.exists() and path.is_file()): NndctScreenLogger().error( f"Bias correction result file does not exist. \ Please check calibration with bias correction is done or not.") exit(2) self.bias_corr = torch.load(self.bias_corr_file) self._bias_corr_loaded = True self.exporting = False self.inplace = True self.output_dir = output_dir mix_bit = quant_config['mix_bit'] if is_lstm: self.quant_strategy = LstmQstrategy(quant_config) else: if mix_bit: self.quant_strategy = TQTStrategy(quant_config) else: self.quant_strategy = DPUQstrategy(quant_config)
def forward(self, *args, **kwargs): inputs = [] def collect_inputs(inputs, value): if isinstance(value, torch.Tensor): inputs.append(value) elif isinstance(value, (tuple, list)): for i in value: collect_inputs(inputs, i) for k, v in kwargs.items(): collect_inputs(inputs, v) inptus, _ = process_inputs_and_params(self.node, self.quantizer, inputs=inputs) try: output = caller(*args, **kwargs) except TypeError as e: NndctScreenLogger().warning_once( f"{str(e)}. The arguments of function will convert to positional arguments." ) inputs = list(args) + list(kwargs.values()) output = caller(*inputs) [output] = post_quant_process(self.node, [output]) return output
def get_fp_and_quantize(self, input_tensor, fp_name, fp_tensor, fp_stat_tensor=None, node=None, tensor_type='output'): #'input'|'output'|'param' # Forward the graph but not quantize parameter and activation if (self.quant_mode < 1 or NndctOption.nndct_quant_off.value): return input_tensor if input_tensor.dtype != tf.float32 and input_tensor.dtype != tf.float64: NndctScreenLogger().warning_once( f'The tensor type of {fp_name} is {str(input_tensor.dtype)}. Only support float32/double quantization.' ) return input_tensor # get fixed position mth = 3 if tensor_type != 'param': mth = 4 bnfp = self.get_bnfp(fp_name, False, tensor_type) bw = bnfp[0] if self.quant_mode == 1: # must be in eager mode #print('---- Calculating fix pos of {}'.format(fp_name), flush=True) fp_tensor.assign( diffs_fix_pos(input=input_tensor, bit_width=bw, range=5, method=mth)) bnfp[1] = (int)(fp_tensor.numpy()) # limit max fix pos to 12 bnfp[1] = min(12, bnfp[1]) # record fix pos of input/output by fp_stat_tensor if tensor_type != 'param': #fp_tensor.assign(stat_act_pos(fp_tensor, # fp_stat_tensor)) self.fp_history[tensor_type][fp_name].append(bnfp[1]) data = np.array(self.fp_history[tensor_type][fp_name]) bnfp[1] = stats.mode(data)[0][0] bnfp[1] = bnfp[1].astype(np.int32).tolist() fp_tensor.assign(bnfp[1]) bnfp = self.set_bnfp(fp_name, bnfp, tensor_type) if self.quant_mode > 0: # do quantization for parameter or activation tensor = fix_neuron(input_tensor, fp_tensor, bw, method=mth) if tensor_type == 'param': self.update_param_to_quantized(node, fp_name, tensor.numpy()) # XXX: Temporary. if self._dump_input and tensor_type == 'output' and 'input' in fp_name: if fp_name not in self._quantized_input: self._quantized_input[fp_name] = [] self._quantized_input[fp_name].append([tensor.numpy()]) return tensor else: return input_tensor
def forward(self, *args, **kwargs): inputs = [] def collect_inputs(inputs, value): if isinstance(value, torch.Tensor): inputs.append(value) elif isinstance(value, (tuple, list)): for i in value: collect_inputs(inputs, i) for _, v in kwargs.items(): collect_inputs(inputs, v) inputs = quantize_tensors(inputs, self.node, tensor_type='input') try: output = caller(*args, **kwargs) if isinstance(output, torch.Tensor): output = output.clone() except TypeError as e: NndctScreenLogger().warning_once( f"{str(e)}. The arguments of function will convert to positional arguments." ) inputs = list(args) + list(kwargs.values()) output = caller(*inputs) output = quantize_tensors([output], self.node)[0] return output
def forward(self, input): qinput = quantize_tensors([input], self.node, tensor_type='input')[0] # check input shape if self.node.out_tensors[0].is_complete_tensor( ) and self.node.out_tensors[0].ndim == 4: # py_utils.blob_to_torch_format(self.node.out_tensors[0]) if not (self.node.out_tensors[0].shape[1:] == list( input.size())[1:]): NndctScreenLogger().warning( f"The shape of input ({input.shape[1:]}) should be the same with that of dummy input ({self.node.out_tensors[0].shape[1:]})" ) # py_utils.blob_to_nndct_format(self.node.out_tensors[0]) output = qinput if (self.node.in_quant_part and NndctOption.nndct_stat.value > 2): print('Channel number of input data: {}'.format(output.shape[1])) print('Input data histogram: {}'.format( output.histc(bins=10).cpu().detach().numpy())) print( 'Network input channel-wise statistic [Min, Max, Mean, Std]:') t = output.transpose(0, 1) for c in range(t.shape[0]): print('[{}, {}, {}, {}]'.format(t[c].min(), t[c].max(), t[c].mean(), t[c].std())) print('histogram: {}'.format( t[c].histc(bins=10).cpu().detach().numpy())) if self.node.in_quant_part: output = quantize_tensors([output], self.node)[0] return output
def inspect(self, module: torch.nn.Module, input_args: Union[torch.Tensor, Tuple[Any]], device: torch.device = torch.device("cuda"), output_dir: str = "quantize_result", verbose_level: int = 1, image_format: Optional[str] = None): NndctScreenLogger().info(f"=>Start to inspect model...") self._inspector_impl.inspect(module, input_args, device, output_dir, verbose_level) if image_format is not None: available_format = ["svg", "png"] NndctScreenLogger().check(f"Only support dump svg or png format.", image_format in available_format) self._inspector_impl.export_dot_image_v2(output_dir, image_format) NndctScreenLogger().info(f"=>Finish inspecting.")
def __init__(self): if not _enable_plot: NndctScreenLogger().warning( "Please install matplotlib for visualization.") sys.exit(1) self._dir = '.nndct_quant_stat_figures' io.create_work_dir(self._dir)
def get_op_output_shape(self, name: str) -> List[int]: op = self.get_op_by_name(name) if op: return op.get_output_tensor().dims else: NndctScreenLogger().warning( "{name} is not in xmodel. Please check it.")
def export_dot_image(self, output_dir, format): assert self._graph is not None file_name = os.path.join(output_dir, ".".join(["inspect", format])) device_type_node_sets = defaultdict(list) for node in self._graph.nodes: if node.op.type == NNDCT_OP.RETURN: continue if node.target_device: device_type_node_sets[ node.target_device.get_device_type()].append(node) else: raise RuntimeError( f"{node}({node.op.type}) has no target device.") device_type_subgraph_node_sets = defaultdict(list) boundaries = [] for device_type, node_set in device_type_node_sets.items(): subgraph_node_sets, sub_boundaries = self._get_subgraphs_and_output_boundaries( node_set, device_type) device_type_subgraph_node_sets[device_type] = subgraph_node_sets boundaries += sub_boundaries dot_graph = self._create_dot_graph(output_dir, device_type_subgraph_node_sets, boundaries) dot_graph.render(outfile=file_name).replace('\\', '/') NndctScreenLogger().info(f"Dot image is generated.({file_name})")
def insert_scale_after_conv2d(module: torch.nn.Module): def _insert_func(op): insert_name = None conv2d_cnt = 0 find_conv2d = False for op_name, c_op in op.named_children(): if find_conv2d: conv2d_cnt = conv2d_cnt + 1 if isinstance(c_op, torch.nn.Conv2d) or isinstance( c_op, torch.nn.ConvTranspose2d): find_conv2d = True insert_name = op_name elif isinstance(c_op, torch.nn.BatchNorm2d) and (find_conv2d == True): insert_name = op_name if conv2d_cnt == 1: op._modules[insert_name] = torch.nn.Sequential( op._modules[insert_name], channel_scale.ChannelScale(channel_scale=1.0)) find_conv2d = False conv2d_cnt = 0 if find_conv2d: op._modules[insert_name] = torch.nn.Sequential( op._modules[insert_name], channel_scale.ChannelScale(channel_scale=1.0)) if any([(isinstance(submodule, torch.nn.Conv2d) or isinstance(submodule, torch.nn.ConvTranspose2d)) for submodule in module.modules()]): module.apply(_insert_func) NndctScreenLogger().warning( f"ChannelScale has been inserted after Conv2d.")
def create_quant_algo(tensor_type, quant_strategy_info, node): algo_config = quant_strategy_info quant_algo = None granularity = algo_config.get("granularity") if granularity == "per_channel": if (int(torch.__version__.split('.')[1]) < 5) and (int(torch.__version__.split('.')[0]) <= 1): NndctScreenLogger().error()(f"Torch should uptate to 1.5.0 or higher version if per_channel quantization") raise op_type = node.op.type axis = None #group = node.node_attr[node.op.AttrName.GROUP] if tensor_type != "weights": raise ValueError("Only support per_channel quantization for weights for now") if op_type in _CONV_LINEAR_TYPES: axis = 0 elif op_type in _CONV_TRANSPOSE_TYPES: axis = 1 quant_algo = PerChannelQuantAlgo(algo_config, axis) elif granularity == "per_tensor": method = algo_config.get("method") if method == "maxmin": quant_algo = MaxMinQuantPerTensorAlgo(algo_config) elif method == "percentile": quant_algo = PercentileQuantPerTensorAlgo(algo_config) elif method == "mse": quant_algo = MSEQuantPerTensorAlgo(algo_config) elif method == "entropy": quant_algo = EntropyQuantPerTensorAlgo(algo_config) return quant_algo
def __init__(self, quant_mode: int, output_dir: str, quant_config, is_lstm=False): super().__init__(quant_mode, output_dir, quant_config, is_lstm) self._quant_model = None self._bias_corr_loaded = False if NndctOption.nndct_param_corr.value > 0: if self.quant_mode == 2: path = pathlib.Path(self.bias_corr_file) if not (path.exists() and path.is_file()): NndctScreenLogger().error( f"Bias correction result file does not exist. \ Please check calibration with bias correction is done or not.") exit(2) self.bias_corr = torch.load(self.bias_corr_file) self._bias_corr_loaded = True self.exporting = False self.inplace = True self.serial = True #self._fast_finetuned = False self._finetuned_para_loaded = False self.output_dir = output_dir if NndctOption.nndct_tensorrt_strategy.value: self.quant_strategy = TensorRTCGQStrategy(quant_config) else: self.quant_strategy = NndctCGQstrategy(quant_config)
def _graph2module(op): node = getattr(op, "node", None) for param_type, tensor in node.op.params.items(): py_tensor_util.param_to_torch_format(tensor) data = np.copy(tensor.data) if node.op.type in [ NNDCT_OP.CONVTRANSPOSE2D, NNDCT_OP.CONVTRANSPOSE3D ] and param_type == node.op.ParamName.WEIGHTS: # data = data.transpose(1, 0, 2, 3) data = data.swapaxes(0, 1) data = np.ascontiguousarray(data) if node.op.type in [ NNDCT_OP.DEPTHWISE_CONV2D, NNDCT_OP.DEPTHWISE_CONV3D ] and param_type == node.op.ParamName.WEIGHTS: out_channels = node.node_config("out_channels") kernel_size = node.node_config("kernel_size") data = data.reshape((out_channels, 1, *kernel_size)) if node.op.type in [ NNDCT_OP.DEPTHWISE_CONVTRANSPOSE2D, NNDCT_OP.DEPTHWISE_CONVTRANSPOSE3D ] and param_type == node.op.ParamName.WEIGHTS: in_channels = node.node_config("in_channels") kernel_size = node.node_config("kernel_size") data = data.reshape((1, in_channels, *kernel_size)) data = data.swapaxes(0, 1) data = np.ascontiguousarray(data) torch_tensor = torch.from_numpy(data) param_name = cls._parameter_map.get(param_type, param_type.value) if node.has_bound_params(): if hasattr(op, param_name): if isinstance(getattr(op, param_name), torch.Tensor): torch_tensor = torch_tensor.to( getattr(op, param_name)) else: torch_tensor = torch_tensor.to( getattr(op, param_name).data) if param_name in op._buffers: op._buffers[param_name] = torch_tensor else: op._parameters[param_name] = torch.nn.Parameter( torch_tensor) else: NndctScreenLogger().warning( f"new parameter: '{param_name}' is registered in {node.name}" ) op.register_parameter(param_name, torch.nn.Parameter(torch_tensor)) else: torch_tensor = torch_tensor.to( device=GLOBAL_MAP.get_ele(NNDCT_KEYS.QUANT_DEVICE)) module.register_parameter(param_name, torch.nn.Parameter(torch_tensor)) py_tensor_util.param_to_nndct_format(tensor)
def _init_quant_env(): nonlocal quant_mode if NndctOption.nndct_quant_mode.value > 0: quant_mode = NndctOption.nndct_quant_mode.value if quant_mode == 1: NndctScreenLogger().info( f"Quantization calibration process start up...") elif quant_mode == 2: NndctScreenLogger().info(f"Quantization test process start up...") quantizer = TORCHQuantizer(quant_mode, output_dir, bitwidth_w, bitwidth_a) GLOBAL_MAP.set_map(NNDCT_KEYS.QUANTIZER, quantizer) GLOBAL_MAP.set_map(NNDCT_KEYS.QUANT_MODE, quant_mode) return quantizer, quant_mode