def load_conv_config(stage, unit, conv): cur_hawq_conv = "module.stage%d.unit%d.quant_convbn%d.convbn_scaling_factor" % (stage, unit, conv) assert cur_hawq_conv in params.keys(), cur_hawq_conv + " does not exist" kernel_scale = params[cur_hawq_conv] if conv == 1: last_conv_unit, last_conv_stage = unit - 1, stage if last_conv_unit == 0: last_conv_stage = stage - 1 last_conv_unit = units[last_conv_stage-1] if stage == 1 and unit == 1: last_conv = "conv0_qconfig" else: last_conv = "stage%d_unit%d_qconfig_add" % (last_conv_stage, last_conv_unit) assert last_conv in QuantizeContext.qconfig_dict.keys(), last_conv + " doesn't exist" from_scale = QuantizeContext.qconfig_dict[last_conv].output_scale # if stage == 1 and unit == 1: # last_hawq_conv = "module.init_block.conv.3.act_scaling_factor" # else: # last_hawq_conv = "module.stage%d.unit%d.quant_act.act_scaling_factor" % (last_conv_stage, last_conv_unit) last_hawq_conv = "module.stage%d.unit%d.quant_act.act_scaling_factor" % (stage, unit) assert last_hawq_conv in params.keys(), last_hawq_conv + " doesn't exist" input_scale = params[last_hawq_conv] output_scale = kernel_scale * input_scale QuantizeContext.qconfig_dict["stage%d_unit%d_qconfig%d" % (stage, unit, conv)] = \ QConfig(from_scale=from_scale, input_dtype=data_dtype, input_scale=input_scale, kernel_dtype=kernel_dtype, kernel_scale=kernel_scale, output_scale=output_scale) else: last_conv = "stage%d_unit%d_qconfig%d" % (stage, unit, conv-1) assert last_conv in QuantizeContext.qconfig_dict.keys(), last_conv + " doesn't exist" from_scale = QuantizeContext.qconfig_dict[last_conv].output_scale last_hawq_conv = "module.stage%d.unit%d.quant_act%d.act_scaling_factor" % (stage, unit, conv-1) assert last_hawq_conv in params.keys(), last_hawq_conv + " doesn't exist" input_scale = params[last_hawq_conv] output_scale = kernel_scale * input_scale QuantizeContext.qconfig_dict["stage%d_unit%d_qconfig%d" % (stage, unit, conv)] = \ QConfig(from_scale=from_scale, input_dtype=data_dtype, input_scale=input_scale, kernel_dtype=kernel_dtype, kernel_scale=kernel_scale, output_scale=output_scale)
def load_qconfig_from_bit_config(num_stages, units, bit_config, bottleneck): def get_dtype (bit_width): assert bit_width == 4 or bit_width == 8, "Bit width %d not supported" % bit_width if bit_width == 4: data_dtype = "uint4" kernel_dtype = "int4" elif bit_width == 8: data_dtype = "int8" kernel_dtype = "int8" return data_dtype, kernel_dtype def load_conv_config(stage, unit, conv): bit_width = bit_config["stage%d.unit%d.quant_convbn%d" % (stage, unit, conv)] data_dtype, kernel_dtype = get_dtype(bit_width) QuantizeContext.qconfig_dict["stage%d_unit%d_qconfig%d" % (stage, unit, conv)] = \ QConfig(input_dtype=data_dtype, kernel_dtype=kernel_dtype) def load_sc_config(stage, unit): bit_width = bit_config["stage%d.unit%d.quant_identity_convbn" % (stage, unit)] data_dtype, kernel_dtype = get_dtype(bit_width) QuantizeContext.qconfig_dict["stage%d_unit%d_qconfig_sc" % (stage, unit)] = \ QConfig(input_dtype=data_dtype, kernel_dtype=kernel_dtype) QuantizeContext.qconfig_dict["conv0_qconfig"] = \ QConfig(from_scale=1.0, input_dtype='int8', kernel_dtype='int8') for i in range(num_stages): for j in range(units[i]): if bottleneck: conv_num = 3 else: conv_num = 2 for k in range(conv_num): load_conv_config(i+1,j+1,k+1) if j == 0 and not (i == 0 and not bottleneck): load_sc_config(i+1, j+1) QuantizeContext.qconfig_dict["fc_qconfig"] = \ QConfig(input_dtype='int8', kernel_dtype='int8')
def load_add_config(stage, unit, dim_match): lhs_output_scale = QuantizeContext.qconfig_dict["stage%d_unit%d_qconfig3" % (stage, unit)].output_scale if dim_match: rhs_output_scale = QuantizeContext.qconfig_dict["stage%d_unit%d_qconfig1" % (stage, unit)].from_scale else: rhs_output_scale = QuantizeContext.qconfig_dict["stage%d_unit%d_qconfig_sc" % (stage, unit)].output_scale # output_scale = np.minimum(lhs_output_scale, rhs_output_scale) output_scale = params["module.stage%d.unit%d.quant_act_int32.act_scaling_factor" % (stage, unit)] QuantizeContext.qconfig_dict["stage%d_unit%d_qconfig_add" % (stage, unit)] = QConfig(output_scale=output_scale)
stage = 4 units = [3, 4, 6, 3] bottleneck = True if args.bit_config is not None: import bit_config hawq_utils.load_qconfig_from_bit_config( stage, units, bit_config.bit_config_dict[args.bit_config], bottleneck) else: if model_type == 'int4': int4_default_qconfig = QConfig(from_dtype='int32', from_scale=65.0, from_zero_point=0.0, input_dtype='uint4', input_scale=8.0, input_zero_point=0.0, kernel_dtype='int4', kernel_scale=8.0, kernel_zero_point=0.0, output_dtype='int32', output_scale=75.0, output_zero_point=0.0) QuantizeContext.set_default_qconfig(int4_default_qconfig) QuantizeContext.qconfig_dict = { "conv0_qconfig": QConfig(from_dtype='int32', from_scale=65.0, from_zero_point=0.0, input_dtype='int8', input_scale=8.0, input_zero_point=0.0,
def load_sc_config(stage, unit): kernel_scale = params["module.stage%d.unit%d.quant_identity_convbn.convbn_scaling_factor" % (stage, unit)] input_scale = QuantizeContext.qconfig_dict["stage%d_unit%d_qconfig1" % (stage, unit)].input_scale output_scale = kernel_scale * input_scale QuantizeContext.qconfig_dict["stage%d_unit%d_qconfig_sc" % (stage, unit)] = \ QConfig(input_dtype=data_dtype, input_scale=input_scale, kernel_dtype=kernel_dtype, kernel_scale=kernel_scale, output_scale=output_scale)
def load_qconfig(data_dtype, kernel_dtype, num_stages, units, model_load=False, scaling_factors=None, file_name=None): if not model_load: model = torch.load(file_name) scaling_factors = {**model['convbn_scaling_factor'], **model['fc_scaling_factor'], **model['act_scaling_factor']} params = {} for (key, tensor) in scaling_factors.items(): tensor_np = tensor.cpu().numpy().reshape((-1)) if "act_scaling_factor" in key: if np.ndim(tensor_np) == 1: tensor_np = tensor_np[0] params[key] = tensor_np def load_conv_config(stage, unit, conv): cur_hawq_conv = "module.stage%d.unit%d.quant_convbn%d.convbn_scaling_factor" % (stage, unit, conv) assert cur_hawq_conv in params.keys(), cur_hawq_conv + " does not exist" kernel_scale = params[cur_hawq_conv] if conv == 1: last_conv_unit, last_conv_stage = unit - 1, stage if last_conv_unit == 0: last_conv_stage = stage - 1 last_conv_unit = units[last_conv_stage-1] if stage == 1 and unit == 1: last_conv = "conv0_qconfig" else: last_conv = "stage%d_unit%d_qconfig_add" % (last_conv_stage, last_conv_unit) assert last_conv in QuantizeContext.qconfig_dict.keys(), last_conv + " doesn't exist" from_scale = QuantizeContext.qconfig_dict[last_conv].output_scale # if stage == 1 and unit == 1: # last_hawq_conv = "module.init_block.conv.3.act_scaling_factor" # else: # last_hawq_conv = "module.stage%d.unit%d.quant_act.act_scaling_factor" % (last_conv_stage, last_conv_unit) last_hawq_conv = "module.stage%d.unit%d.quant_act.act_scaling_factor" % (stage, unit) assert last_hawq_conv in params.keys(), last_hawq_conv + " doesn't exist" input_scale = params[last_hawq_conv] output_scale = kernel_scale * input_scale QuantizeContext.qconfig_dict["stage%d_unit%d_qconfig%d" % (stage, unit, conv)] = \ QConfig(from_scale=from_scale, input_dtype=data_dtype, input_scale=input_scale, kernel_dtype=kernel_dtype, kernel_scale=kernel_scale, output_scale=output_scale) else: last_conv = "stage%d_unit%d_qconfig%d" % (stage, unit, conv-1) assert last_conv in QuantizeContext.qconfig_dict.keys(), last_conv + " doesn't exist" from_scale = QuantizeContext.qconfig_dict[last_conv].output_scale last_hawq_conv = "module.stage%d.unit%d.quant_act%d.act_scaling_factor" % (stage, unit, conv-1) assert last_hawq_conv in params.keys(), last_hawq_conv + " doesn't exist" input_scale = params[last_hawq_conv] output_scale = kernel_scale * input_scale QuantizeContext.qconfig_dict["stage%d_unit%d_qconfig%d" % (stage, unit, conv)] = \ QConfig(from_scale=from_scale, input_dtype=data_dtype, input_scale=input_scale, kernel_dtype=kernel_dtype, kernel_scale=kernel_scale, output_scale=output_scale) def load_sc_config(stage, unit): kernel_scale = params["module.stage%d.unit%d.quant_identity_convbn.convbn_scaling_factor" % (stage, unit)] input_scale = QuantizeContext.qconfig_dict["stage%d_unit%d_qconfig1" % (stage, unit)].input_scale output_scale = kernel_scale * input_scale QuantizeContext.qconfig_dict["stage%d_unit%d_qconfig_sc" % (stage, unit)] = \ QConfig(input_dtype=data_dtype, input_scale=input_scale, kernel_dtype=kernel_dtype, kernel_scale=kernel_scale, output_scale=output_scale) def load_add_config(stage, unit, dim_match): lhs_output_scale = QuantizeContext.qconfig_dict["stage%d_unit%d_qconfig3" % (stage, unit)].output_scale if dim_match: rhs_output_scale = QuantizeContext.qconfig_dict["stage%d_unit%d_qconfig1" % (stage, unit)].from_scale else: rhs_output_scale = QuantizeContext.qconfig_dict["stage%d_unit%d_qconfig_sc" % (stage, unit)].output_scale # output_scale = np.minimum(lhs_output_scale, rhs_output_scale) output_scale = params["module.stage%d.unit%d.quant_act_int32.act_scaling_factor" % (stage, unit)] QuantizeContext.qconfig_dict["stage%d_unit%d_qconfig_add" % (stage, unit)] = QConfig(output_scale=output_scale) conv0_input_scale = params["module.quant_input.act_scaling_factor"] conv0_kernel_scale = params["module.quant_init_convbn.convbn_scaling_factor"] # conv0_output_scale = conv0_input_scale * conv0_kernel_scale conv0_output_scale = params["module.quant_act_int32.act_scaling_factor"] QuantizeContext.qconfig_dict["conv0_qconfig"] = \ QConfig(from_scale=1.0, input_dtype='int8', input_scale=conv0_input_scale, kernel_dtype='int8', kernel_scale=conv0_kernel_scale, output_scale=conv0_output_scale) for i in range(num_stages): for j in range(units[i]): for k in range(3): load_conv_config(i+1,j+1,k+1) if j == 0: load_sc_config(i+1, j+1) load_add_config(i+1, j+1, False) else: load_add_config(i+1, j+1, True) fc_from_scale = QuantizeContext.qconfig_dict["stage%d_unit%d_qconfig_add" % (num_stages, units[num_stages-1])].output_scale fc_input_scale = params["module.quant_act_output.act_scaling_factor"] fc_kernel_scale = params["module.quant_output.fc_scaling_factor"] fc_output_scale = (fc_input_scale * fc_kernel_scale) QuantizeContext.qconfig_dict["fc_qconfig"] = \ QConfig(from_scale=fc_from_scale, input_dtype='int8', input_scale=fc_input_scale, kernel_dtype='int8', kernel_scale=fc_kernel_scale, output_scale=fc_output_scale)
def load_sc_config(stage, unit): bit_width = bit_config["stage%d.unit%d.quant_identity_convbn" % (stage, unit)] data_dtype, kernel_dtype = get_dtype(bit_width) QuantizeContext.qconfig_dict["stage%d_unit%d_qconfig_sc" % (stage, unit)] = \ QConfig(input_dtype=data_dtype, kernel_dtype=kernel_dtype)
def load_conv_config(stage, unit, conv): bit_width = bit_config["stage%d.unit%d.quant_convbn%d" % (stage, unit, conv)] data_dtype, kernel_dtype = get_dtype(bit_width) QuantizeContext.qconfig_dict["stage%d_unit%d_qconfig%d" % (stage, unit, conv)] = \ QConfig(input_dtype=data_dtype, kernel_dtype=kernel_dtype)