Пример #1
0
    def load_conv_config(stage, unit, conv):

        cur_hawq_conv = "module.stage%d.unit%d.quant_convbn%d.convbn_scaling_factor" % (stage, unit, conv)
        assert cur_hawq_conv in params.keys(), cur_hawq_conv + " does not exist"
        kernel_scale = params[cur_hawq_conv]

        if conv == 1:
            last_conv_unit, last_conv_stage = unit - 1, stage

            if last_conv_unit == 0:
                last_conv_stage = stage - 1
                last_conv_unit = units[last_conv_stage-1]

            if stage == 1 and unit == 1:
                last_conv = "conv0_qconfig"
            else:
                last_conv = "stage%d_unit%d_qconfig_add" % (last_conv_stage, last_conv_unit)

            assert last_conv in QuantizeContext.qconfig_dict.keys(), last_conv + " doesn't exist"
            from_scale = QuantizeContext.qconfig_dict[last_conv].output_scale

            # if stage == 1 and unit == 1:
            #     last_hawq_conv = "module.init_block.conv.3.act_scaling_factor"
            # else:
            #     last_hawq_conv = "module.stage%d.unit%d.quant_act.act_scaling_factor" % (last_conv_stage, last_conv_unit)

            last_hawq_conv = "module.stage%d.unit%d.quant_act.act_scaling_factor" % (stage, unit)

            assert last_hawq_conv in params.keys(), last_hawq_conv + " doesn't exist"

            input_scale = params[last_hawq_conv]
            output_scale = kernel_scale * input_scale
            QuantizeContext.qconfig_dict["stage%d_unit%d_qconfig%d" % (stage, unit, conv)] = \
                QConfig(from_scale=from_scale, input_dtype=data_dtype, input_scale=input_scale, kernel_dtype=kernel_dtype, kernel_scale=kernel_scale, output_scale=output_scale)

        else:
            last_conv = "stage%d_unit%d_qconfig%d" % (stage, unit, conv-1)
            assert last_conv in QuantizeContext.qconfig_dict.keys(), last_conv + " doesn't exist"

            from_scale = QuantizeContext.qconfig_dict[last_conv].output_scale

            last_hawq_conv = "module.stage%d.unit%d.quant_act%d.act_scaling_factor" % (stage, unit, conv-1)
            assert last_hawq_conv in params.keys(), last_hawq_conv + " doesn't exist"
            input_scale = params[last_hawq_conv]

            output_scale = kernel_scale * input_scale

            QuantizeContext.qconfig_dict["stage%d_unit%d_qconfig%d" % (stage, unit, conv)] = \
                QConfig(from_scale=from_scale, input_dtype=data_dtype, input_scale=input_scale, kernel_dtype=kernel_dtype, kernel_scale=kernel_scale, output_scale=output_scale)
Пример #2
0
def load_qconfig_from_bit_config(num_stages, units, bit_config, bottleneck):

    def get_dtype (bit_width):
        assert bit_width == 4 or bit_width == 8, "Bit width %d not supported" % bit_width
        if bit_width == 4:
            data_dtype = "uint4"
            kernel_dtype = "int4"
        elif bit_width == 8:
            data_dtype = "int8"
            kernel_dtype = "int8"

        return data_dtype, kernel_dtype

    def load_conv_config(stage, unit, conv):
        bit_width = bit_config["stage%d.unit%d.quant_convbn%d" % (stage, unit, conv)]
        data_dtype, kernel_dtype = get_dtype(bit_width)
        QuantizeContext.qconfig_dict["stage%d_unit%d_qconfig%d" % (stage, unit, conv)] = \
                QConfig(input_dtype=data_dtype, kernel_dtype=kernel_dtype)

    def load_sc_config(stage, unit):
        bit_width = bit_config["stage%d.unit%d.quant_identity_convbn" % (stage, unit)]
        data_dtype, kernel_dtype = get_dtype(bit_width)
        QuantizeContext.qconfig_dict["stage%d_unit%d_qconfig_sc" % (stage, unit)] = \
            QConfig(input_dtype=data_dtype, kernel_dtype=kernel_dtype)

    QuantizeContext.qconfig_dict["conv0_qconfig"] = \
        QConfig(from_scale=1.0, input_dtype='int8', kernel_dtype='int8')

    for i in range(num_stages):
        for j in range(units[i]):

            if bottleneck:
                conv_num = 3
            else:
                conv_num = 2

            for k in range(conv_num):
                load_conv_config(i+1,j+1,k+1)

            if j == 0 and not (i == 0 and not bottleneck):
                load_sc_config(i+1, j+1)

    QuantizeContext.qconfig_dict["fc_qconfig"] = \
        QConfig(input_dtype='int8', kernel_dtype='int8')
Пример #3
0
    def load_add_config(stage, unit, dim_match):
        lhs_output_scale = QuantizeContext.qconfig_dict["stage%d_unit%d_qconfig3" % (stage, unit)].output_scale
        if dim_match:
            rhs_output_scale = QuantizeContext.qconfig_dict["stage%d_unit%d_qconfig1" % (stage, unit)].from_scale
        else:
            rhs_output_scale = QuantizeContext.qconfig_dict["stage%d_unit%d_qconfig_sc" % (stage, unit)].output_scale

        # output_scale = np.minimum(lhs_output_scale, rhs_output_scale)
        output_scale = params["module.stage%d.unit%d.quant_act_int32.act_scaling_factor" % (stage, unit)]
        QuantizeContext.qconfig_dict["stage%d_unit%d_qconfig_add" % (stage, unit)] = QConfig(output_scale=output_scale)
Пример #4
0
    stage = 4
    units = [3, 4, 6, 3]
    bottleneck = True

if args.bit_config is not None:
    import bit_config
    hawq_utils.load_qconfig_from_bit_config(
        stage, units, bit_config.bit_config_dict[args.bit_config], bottleneck)
else:
    if model_type == 'int4':
        int4_default_qconfig = QConfig(from_dtype='int32',
                                       from_scale=65.0,
                                       from_zero_point=0.0,
                                       input_dtype='uint4',
                                       input_scale=8.0,
                                       input_zero_point=0.0,
                                       kernel_dtype='int4',
                                       kernel_scale=8.0,
                                       kernel_zero_point=0.0,
                                       output_dtype='int32',
                                       output_scale=75.0,
                                       output_zero_point=0.0)

        QuantizeContext.set_default_qconfig(int4_default_qconfig)
        QuantizeContext.qconfig_dict = {
            "conv0_qconfig":
            QConfig(from_dtype='int32',
                    from_scale=65.0,
                    from_zero_point=0.0,
                    input_dtype='int8',
                    input_scale=8.0,
                    input_zero_point=0.0,
Пример #5
0
 def load_sc_config(stage, unit):
     kernel_scale = params["module.stage%d.unit%d.quant_identity_convbn.convbn_scaling_factor" % (stage, unit)]
     input_scale = QuantizeContext.qconfig_dict["stage%d_unit%d_qconfig1" % (stage, unit)].input_scale
     output_scale = kernel_scale * input_scale
     QuantizeContext.qconfig_dict["stage%d_unit%d_qconfig_sc" % (stage, unit)] = \
         QConfig(input_dtype=data_dtype, input_scale=input_scale, kernel_dtype=kernel_dtype, kernel_scale=kernel_scale, output_scale=output_scale)
Пример #6
0
def load_qconfig(data_dtype, kernel_dtype, num_stages, units, model_load=False, scaling_factors=None, file_name=None):

    if not model_load:
        model = torch.load(file_name)
        scaling_factors = {**model['convbn_scaling_factor'], **model['fc_scaling_factor'], **model['act_scaling_factor']}

    params = {}
    for (key, tensor) in scaling_factors.items():
        tensor_np = tensor.cpu().numpy().reshape((-1))

        if "act_scaling_factor" in key:
            if np.ndim(tensor_np) == 1:
                tensor_np = tensor_np[0]

        params[key] = tensor_np

    def load_conv_config(stage, unit, conv):

        cur_hawq_conv = "module.stage%d.unit%d.quant_convbn%d.convbn_scaling_factor" % (stage, unit, conv)
        assert cur_hawq_conv in params.keys(), cur_hawq_conv + " does not exist"
        kernel_scale = params[cur_hawq_conv]

        if conv == 1:
            last_conv_unit, last_conv_stage = unit - 1, stage

            if last_conv_unit == 0:
                last_conv_stage = stage - 1
                last_conv_unit = units[last_conv_stage-1]

            if stage == 1 and unit == 1:
                last_conv = "conv0_qconfig"
            else:
                last_conv = "stage%d_unit%d_qconfig_add" % (last_conv_stage, last_conv_unit)

            assert last_conv in QuantizeContext.qconfig_dict.keys(), last_conv + " doesn't exist"
            from_scale = QuantizeContext.qconfig_dict[last_conv].output_scale

            # if stage == 1 and unit == 1:
            #     last_hawq_conv = "module.init_block.conv.3.act_scaling_factor"
            # else:
            #     last_hawq_conv = "module.stage%d.unit%d.quant_act.act_scaling_factor" % (last_conv_stage, last_conv_unit)

            last_hawq_conv = "module.stage%d.unit%d.quant_act.act_scaling_factor" % (stage, unit)

            assert last_hawq_conv in params.keys(), last_hawq_conv + " doesn't exist"

            input_scale = params[last_hawq_conv]
            output_scale = kernel_scale * input_scale
            QuantizeContext.qconfig_dict["stage%d_unit%d_qconfig%d" % (stage, unit, conv)] = \
                QConfig(from_scale=from_scale, input_dtype=data_dtype, input_scale=input_scale, kernel_dtype=kernel_dtype, kernel_scale=kernel_scale, output_scale=output_scale)

        else:
            last_conv = "stage%d_unit%d_qconfig%d" % (stage, unit, conv-1)
            assert last_conv in QuantizeContext.qconfig_dict.keys(), last_conv + " doesn't exist"

            from_scale = QuantizeContext.qconfig_dict[last_conv].output_scale

            last_hawq_conv = "module.stage%d.unit%d.quant_act%d.act_scaling_factor" % (stage, unit, conv-1)
            assert last_hawq_conv in params.keys(), last_hawq_conv + " doesn't exist"
            input_scale = params[last_hawq_conv]

            output_scale = kernel_scale * input_scale

            QuantizeContext.qconfig_dict["stage%d_unit%d_qconfig%d" % (stage, unit, conv)] = \
                QConfig(from_scale=from_scale, input_dtype=data_dtype, input_scale=input_scale, kernel_dtype=kernel_dtype, kernel_scale=kernel_scale, output_scale=output_scale)

    def load_sc_config(stage, unit):
        kernel_scale = params["module.stage%d.unit%d.quant_identity_convbn.convbn_scaling_factor" % (stage, unit)]
        input_scale = QuantizeContext.qconfig_dict["stage%d_unit%d_qconfig1" % (stage, unit)].input_scale
        output_scale = kernel_scale * input_scale
        QuantizeContext.qconfig_dict["stage%d_unit%d_qconfig_sc" % (stage, unit)] = \
            QConfig(input_dtype=data_dtype, input_scale=input_scale, kernel_dtype=kernel_dtype, kernel_scale=kernel_scale, output_scale=output_scale)

    def load_add_config(stage, unit, dim_match):
        lhs_output_scale = QuantizeContext.qconfig_dict["stage%d_unit%d_qconfig3" % (stage, unit)].output_scale
        if dim_match:
            rhs_output_scale = QuantizeContext.qconfig_dict["stage%d_unit%d_qconfig1" % (stage, unit)].from_scale
        else:
            rhs_output_scale = QuantizeContext.qconfig_dict["stage%d_unit%d_qconfig_sc" % (stage, unit)].output_scale

        # output_scale = np.minimum(lhs_output_scale, rhs_output_scale)
        output_scale = params["module.stage%d.unit%d.quant_act_int32.act_scaling_factor" % (stage, unit)]
        QuantizeContext.qconfig_dict["stage%d_unit%d_qconfig_add" % (stage, unit)] = QConfig(output_scale=output_scale)

    conv0_input_scale = params["module.quant_input.act_scaling_factor"]
    conv0_kernel_scale = params["module.quant_init_convbn.convbn_scaling_factor"]
    # conv0_output_scale = conv0_input_scale * conv0_kernel_scale
    conv0_output_scale = params["module.quant_act_int32.act_scaling_factor"]
    QuantizeContext.qconfig_dict["conv0_qconfig"] = \
        QConfig(from_scale=1.0, input_dtype='int8', input_scale=conv0_input_scale, kernel_dtype='int8', kernel_scale=conv0_kernel_scale, output_scale=conv0_output_scale)

    for i in range(num_stages):
        for j in range(units[i]):
            for k in range(3):
                load_conv_config(i+1,j+1,k+1)

            if j == 0:
                load_sc_config(i+1, j+1)
                load_add_config(i+1, j+1, False)
            else:
                load_add_config(i+1, j+1, True)

    fc_from_scale = QuantizeContext.qconfig_dict["stage%d_unit%d_qconfig_add" % (num_stages, units[num_stages-1])].output_scale
    fc_input_scale = params["module.quant_act_output.act_scaling_factor"]
    fc_kernel_scale = params["module.quant_output.fc_scaling_factor"]
    fc_output_scale = (fc_input_scale * fc_kernel_scale)
    QuantizeContext.qconfig_dict["fc_qconfig"] = \
        QConfig(from_scale=fc_from_scale, input_dtype='int8', input_scale=fc_input_scale, kernel_dtype='int8', kernel_scale=fc_kernel_scale, output_scale=fc_output_scale)
Пример #7
0
 def load_sc_config(stage, unit):
     bit_width = bit_config["stage%d.unit%d.quant_identity_convbn" % (stage, unit)]
     data_dtype, kernel_dtype = get_dtype(bit_width)
     QuantizeContext.qconfig_dict["stage%d_unit%d_qconfig_sc" % (stage, unit)] = \
         QConfig(input_dtype=data_dtype, kernel_dtype=kernel_dtype)
Пример #8
0
 def load_conv_config(stage, unit, conv):
     bit_width = bit_config["stage%d.unit%d.quant_convbn%d" % (stage, unit, conv)]
     data_dtype, kernel_dtype = get_dtype(bit_width)
     QuantizeContext.qconfig_dict["stage%d_unit%d_qconfig%d" % (stage, unit, conv)] = \
             QConfig(input_dtype=data_dtype, kernel_dtype=kernel_dtype)