def __init__(self, input_width, weight_width, act_width):

        super(QuantLeNet, self).__init__()
        self.quant_inp = QuantIdentity(bit_width=input_width,
                                       min_val=-1.0,
                                       max_val=1.0)

        self.conv1 = QuantConv2d(1, 6, 5, weight_bit_width=weight_width)
        self.conv2 = QuantConv2d(6, 16, 5, weight_bit_width=weight_width)
        self.fc1 = QuantLinear(16 * 4 * 4,
                               120,
                               bias=True,
                               weight_bit_width=weight_width)
        self.fc2 = QuantLinear(120,
                               84,
                               bias=True,
                               weight_bit_width=weight_width)
        self.fc3 = QuantLinear(84,
                               10,
                               bias=False,
                               weight_bit_width=weight_width)

        self.relu1 = QuantReLU(bit_width=act_width, max_val=6)
        self.relu2 = QuantReLU(bit_width=act_width, max_val=6)
        self.relu3 = QuantReLU(bit_width=act_width, max_val=6)
        self.relu4 = QuantReLU(bit_width=act_width, max_val=6)
Exemplo n.º 2
0
def make_qconv2d(in_planes,
                 out_planes,
                 kernel_size=1,
                 padding=0,
                 stride=1,
                 groups=1,
                 bias=True,
                 no_quant=True,
                 **kwargs) -> QuantConv2d:
    if no_quant:
        return QuantConv2d(in_channels=in_planes,
                           out_channels=out_planes,
                           kernel_size=kernel_size,
                           stride=stride,
                           padding=padding,
                           groups=groups,
                           bias=bias,
                           weight_quant=None,
                           input_quant=None,
                           bias_quant=None,
                           output_quant=None,
                           update_wqi=None,
                           update_bqi=None,
                           update_iqi=None,
                           update_oqi=None)
    else:
        return QuantConv2d(in_channels=in_planes,
                           out_channels=out_planes,
                           kernel_size=kernel_size,
                           stride=stride,
                           padding=padding,
                           groups=groups,
                           bias=bias,
                           weight_bit_width=kwargs['bit_width'])
Exemplo n.º 3
0
 def __init__(self):
     super().__init__()
     self.conv1 = QuantConv2d(kernel_size=KERNEL_SIZE,
                              in_channels=CHANNELS,
                              out_channels=CHANNELS,
                              weight_quant=DPUv1WeightQuantInjector,
                              bias_quant=None,
                              output_quant=DPUv1OutputQuantInjector,
                              bias=False,
                              return_quant_tensor=True)
     self.act1 = QuantReLU(act_quant=DPUv1ActQuantInjector,
                           return_quant_tensor=True)
     self.conv2 = QuantConv2d(kernel_size=KERNEL_SIZE,
                              in_channels=CHANNELS,
                              out_channels=CHANNELS,
                              weight_quant=DPUv1WeightQuantInjector,
                              bias_quant=None,
                              output_quant=DPUv1OutputQuantInjector,
                              bias=False,
                              return_quant_tensor=True)
     self.act2 = QuantReLU(act_quant=DPUv1ActQuantInjector,
                           return_quant_tensor=True)
     self.conv3 = QuantConv2d(kernel_size=KERNEL_SIZE,
                              in_channels=CHANNELS,
                              out_channels=CHANNELS,
                              weight_quant=DPUv1WeightQuantInjector,
                              bias_quant=None,
                              output_quant=DPUv1OutputQuantInjector,
                              bias=False,
                              return_quant_tensor=True)
     self.act3 = QuantReLU(act_quant=DPUv1ActQuantInjector,
                           return_quant_tensor=False)
     self.linear = nn.Linear(FC_IN_SIZE, CHANNELS)
Exemplo n.º 4
0
 def __init__(self,weight_bit_width=4,acti_bit_width=8):
     super(QuantLeNet, self).__init__()
     self.conv1 = QuantConv2d(1, 6, 5, padding=2,weight_bit_width=weight_bit_width)
     # self.relu1 = QuantReLU(bit_width=acti_bit_width, max_val=6)
     self.conv2 = QuantConv2d(6, 16, 5, weight_bit_width=weight_bit_width)
     # self.relu2 = QuantReLU(bit_width=acti_bit_width, max_val=6)
     self.fc1   = QuantLinear(16*5*5, 120, bias=True, weight_bit_width=weight_bit_width)
     # self.relu3 = QuantReLU(bit_width=acti_bit_width, max_val=6)
     self.fc2   = QuantLinear(120, 84, bias=True, weight_bit_width=weight_bit_width)
     # self.relu4 = QuantReLU(bit_width=acti_bit_width, max_val=6)
     self.fc3   = QuantLinear(84, 10, bias=True, weight_bit_width=weight_bit_width)
Exemplo n.º 5
0
 def __init__(self,
              in_channels,
              out_channels,
              kernel_size,
              weight_bit_width,
              act_bit_width,
              stride=1,
              padding=0,
              groups=1,
              bn_eps=1e-5,
              activation_scaling_per_channel=False):
     super(ConvBlock, self).__init__()
     self.conv = QuantConv2d(in_channels=in_channels,
                             out_channels=out_channels,
                             kernel_size=kernel_size,
                             stride=stride,
                             padding=padding,
                             groups=groups,
                             bias=False,
                             weight_quant=CommonIntWeightPerChannelQuant,
                             weight_bit_width=weight_bit_width)
     self.bn = nn.BatchNorm2d(num_features=out_channels, eps=bn_eps)
     self.activation = QuantReLU(
         act_quant=CommonUintActQuant,
         bit_width=act_bit_width,
         per_channel_broadcastable_shape=(1, out_channels, 1, 1),
         scaling_per_channel=activation_scaling_per_channel,
         return_quant_tensor=True)
Exemplo n.º 6
0
def test_quant_conv2d(dw, bias, bias_quant, in_features, in_channels,
                      out_channels, w_bits, channel_scaling, kernel_size,
                      padding, stride, i_bits):
    # required to generated quantized inputs, not part of the exported model to test
    quant_inp = QuantIdentity(bit_width=i_bits, return_quant_tensor=True)
    inp_tensor = quant_inp(
        torch.randn(1, in_channels, in_features, in_features))
    conv = QuantConv2d(in_channels=in_channels,
                       out_channels=in_channels if dw else out_channels,
                       groups=in_channels if dw else 1,
                       kernel_size=kernel_size,
                       padding=padding,
                       stride=stride,
                       bias=bias,
                       bias_quant=bias_quant,
                       weight_bit_width=w_bits,
                       weight_scaling_per_output_channel=channel_scaling)
    conv.eval()
    model = bo.export_finn_onnx(conv, input_t=inp_tensor)
    model = ModelWrapper(model)
    model = model.transform(InferShapes())
    # the quantized input tensor passed to FINN should be in integer form
    int_inp_array = inp_tensor.int(float_datatype=True).numpy()
    idict = {model.graph.input[0].name: int_inp_array}
    odict = oxe.execute_onnx(model, idict, True)
    produced = odict[model.graph.output[0].name]
    expected = conv(inp_tensor).detach().numpy()
    assert np.isclose(produced, expected, atol=1e-3).all()
Exemplo n.º 7
0
 def prepare_for_export(self, module: QuantConv2d):
     bias = module.quant_bias()
     if bias is not None:
         bias = bias.value.detach()
     weight = module.quant_weight().value.detach()
     if len(weight.shape) == 4:  # move weights to NHWC already
         weight = weight.permute(0, 2, 3, 1)
     self.symbolic_kwargs = {
         'weight': weight,
         'bias': bias,
         'input_quant': self.input_quant_symbolic_kwargs(module),
         'weight_quant': self.weight_quant_symbolic_kwargs(module),
         'bias_quant': self.bias_quant_symbolic_kwargs(module),
         'output_quant': self.output_quant_symbolic_kwargs(module),
         'op': self.op_symbolic_kwargs(module)
     }
    def __init__(self,
                 in_channels,
                 out_channels,
                 kernel_size,
                 stride,
                 padding,
                 quant_type,
                 weight_bit_width,
                 act_bit_width,
                 act_scaling_per_channel,
                 weight_scaling_impl_type,
                 bias,
                 compute_micronet_cost,
                 dilation=1,
                 groups=1,
                 bn_eps=1e-5,
                 shared_act=None):
        super(ConvBlock, self).__init__()
        self.compute_micronet_cost = compute_micronet_cost

        self.conv = QuantConv2d(
            in_channels=in_channels,
            out_channels=out_channels,
            kernel_size=kernel_size,
            stride=stride,
            padding=padding,
            dilation=dilation,
            groups=groups,
            bias=bias,
            weight_quant_type=quant_type,
            weight_bit_width=weight_bit_width,
            weight_scaling_impl_type=weight_scaling_impl_type,
            weight_restrict_scaling_type=RestrictValueType.LOG_FP,
            weight_narrow_range=True,
            weight_scaling_stats_op=StatsOp.MAX,
            weight_scaling_min_val=MIN_SCALING_VALUE,
            compute_output_bit_width=
            True,  # Compute the number of bits in the output accumulator
            return_quant_tensor=
            True,  # Return a quantized tensor that represents the quantized accumulator
            weight_scaling_per_output_channel=True)
        self.bn = nn.BatchNorm2d(num_features=out_channels, eps=bn_eps)
        if shared_act is None and quant_type == QuantType.FP:
            self.activ = nn.ReLU6()
        elif shared_act is None and quant_type == QuantType.INT:
            self.activ = QuantReLU(
                quant_type=quant_type,
                bit_width=act_bit_width,
                max_val=RELU_MAX_VAL,
                scaling_per_channel=act_scaling_per_channel,
                scaling_impl_type=ScalingImplType.PARAMETER,
                scaling_min_val=MIN_SCALING_VALUE,
                restrict_scaling_type=RestrictValueType.LOG_FP,
                per_channel_broadcastable_shape=(1, out_channels, 1, 1),
                return_quant_tensor=True)
        elif shared_act is not None:
            self.activ = shared_act
        else:
            raise Exception("Activ non recognized.")
Exemplo n.º 9
0
def test_brevitas_QConv2d(dw, bias, in_channels, QONNX_export):
    ishape = (1, 32, 111, 111)
    if dw is True:
        groups = in_channels
        out_channels = in_channels
        kernel_size = 3
        padding = 1
        stride = 1
        w_shape = (32, 1, 3, 3)

    else:
        groups = 1
        out_channels = 64
        kernel_size = 1
        padding = 0
        stride = 1
        w_shape = (64, 32, 1, 1)

    b_conv = QuantConv2d(
        in_channels=in_channels,
        out_channels=out_channels,
        groups=groups,
        kernel_size=kernel_size,
        padding=padding,
        stride=stride,
        bias=bias,
        bias_quant_type=QuantType.FP,
        weight_bit_width=4,
        weight_quant_type=QuantType.INT,
        weight_scaling_impl_type=ScalingImplType.STATS,
        weight_scaling_stats_op=StatsOp.MAX,
        weight_scaling_per_output_channel=True,
        weight_restrict_scaling_type=RestrictValueType.LOG_FP,
        weight_narrow_range=True,
        weight_scaling_min_val=2e-16,
    )
    weight_tensor = gen_finn_dt_tensor(DataType["INT4"], w_shape)
    b_conv.weight = torch.nn.Parameter(torch.from_numpy(weight_tensor).float())
    b_conv.eval()
    if QONNX_export:
        m_path = export_onnx_path
        BrevitasONNXManager.export(b_conv, ishape, m_path)
        qonnx_cleanup(m_path, out_file=m_path)
        model = ModelWrapper(m_path)
        model = model.transform(ConvertQONNXtoFINN())
        model.save(m_path)
    else:
        bo.export_finn_onnx(b_conv, ishape, export_onnx_path)
    model = ModelWrapper(export_onnx_path)
    model = model.transform(InferShapes())
    inp_tensor = np.random.uniform(low=-1.0, high=1.0, size=ishape).astype(np.float32)
    idict = {model.graph.input[0].name: inp_tensor}
    odict = oxe.execute_onnx(model, idict, True)
    produced = odict[model.graph.output[0].name]
    inp_tensor = torch.from_numpy(inp_tensor).float()
    expected = b_conv.forward(inp_tensor).detach().numpy()

    assert np.isclose(produced, expected, atol=1e-3).all()
    os.remove(export_onnx_path)
Exemplo n.º 10
0
def test_float_bias_zero_point():
    conv = QuantConv2d(IN_CH,
                       OUTPUT_CH,
                       KERNEL_SIZE,
                       bias=True,
                       input_quant=Int8ActPerTensorFloat,
                       return_quant_tensor=True)
    out = conv(torch.randn(1, IN_CH, 10, 10))
    assert (out.zero_point != 0.).all()
Exemplo n.º 11
0
 def test_internally_scaled_int_bias(self):
     mod = QuantConv2d(out_channels=OUTPUT_CHANNELS,
                       in_channels=INPUT_CHANNELS,
                       kernel_size=KERNEL_SIZE,
                       weight_quant_delay_steps=1,
                       bias=True,
                       bias_quant=Int8BiasPerTensorFloatInternalScaling)
     inp = torch.randn(1, INPUT_CHANNELS, 20, 20)
     mod(inp)
Exemplo n.º 12
0
    def __init__(self, num_classes, weight_bit_width, act_bit_width,
                 in_bit_width, in_ch):
        super(CNV, self).__init__()

        self.conv_features = ModuleList()
        self.linear_features = ModuleList()

        self.conv_features.append(
            QuantIdentity(  # for Q1.7 input format
                act_quant=CommonActQuant,
                bit_width=in_bit_width,
                min_val=-1.0,
                max_val=1.0 - 2.0**(-7),
                narrow_range=False,
                restrict_scaling_type=RestrictValueType.POWER_OF_TWO))

        for out_ch, is_pool_enabled in CNV_OUT_CH_POOL:
            self.conv_features.append(
                QuantConv2d(kernel_size=KERNEL_SIZE,
                            in_channels=in_ch,
                            out_channels=out_ch,
                            bias=False,
                            weight_quant=CommonWeightQuant,
                            weight_bit_width=weight_bit_width))
            in_ch = out_ch
            self.conv_features.append(BatchNorm2d(in_ch, eps=1e-4))
            self.conv_features.append(
                QuantIdentity(act_quant=CommonActQuant,
                              bit_width=act_bit_width))
            if is_pool_enabled:
                self.conv_features.append(MaxPool2d(kernel_size=2))

        for in_features, out_features in INTERMEDIATE_FC_FEATURES:
            self.linear_features.append(
                QuantLinear(in_features=in_features,
                            out_features=out_features,
                            bias=False,
                            weight_quant=CommonWeightQuant,
                            weight_bit_width=weight_bit_width))
            self.linear_features.append(BatchNorm1d(out_features, eps=1e-4))
            self.linear_features.append(
                QuantIdentity(act_quant=CommonActQuant,
                              bit_width=act_bit_width))

        self.linear_features.append(
            QuantLinear(in_features=LAST_FC_IN_FEATURES,
                        out_features=num_classes,
                        bias=False,
                        weight_quant=CommonWeightQuant,
                        weight_bit_width=weight_bit_width))
        self.linear_features.append(TensorNorm())

        for m in self.modules():
            if isinstance(m, QuantConv2d) or isinstance(m, QuantLinear):
                torch.nn.init.uniform_(m.weight.data, -1, 1)
Exemplo n.º 13
0
 def test_internally_scaled_int_bias_after_bn_merge(self):
     mod = QuantConv2d(out_channels=OUTPUT_CHANNELS,
                       in_channels=INPUT_CHANNELS,
                       kernel_size=KERNEL_SIZE,
                       weight_quant_delay_steps=1,
                       bias=False,
                       bias_quant=Int8BiasPerTensorFloatInternalScaling)
     bn = BatchNorm2d(OUTPUT_CHANNELS)
     merge_bn(mod, bn)
     inp = torch.randn(1, INPUT_CHANNELS, 20, 20)
     mod(inp)
Exemplo n.º 14
0
 def __init__(self):
     super().__init__()
     self.conv = QuantConv2d(out_channels=OUT_CH,
                             in_channels=IN_CH,
                             bias=True,
                             kernel_size=3,
                             input_quant=Int8ActPerTensorFloat,
                             output_quant=Int8ActPerTensorFloat,
                             bias_quant=Int16Bias,
                             return_quant_tensor=False)
     self.conv.weight.data.uniform_(-0.01, 0.01)
Exemplo n.º 15
0
 def __init__(self):
     super().__init__()
     self.inp_quant = QuantIdentity(act_quant=Int8ActPerTensorFixedPoint,
                                    return_quant_tensor=True)
     self.conv = QuantConv2d(5,
                             10, (3, 3),
                             weight_quant=Int8WeightPerTensorFixedPoint,
                             bias_quant=Int8Bias,
                             output_quant=Int8ActPerTensorFixedPoint,
                             return_quant_tensor=True)
     self.conv2 = QuantConv2d(10,
                              10, (3, 3),
                              weight_quant=Int8WeightPerTensorFixedPoint,
                              bias_quant=Int8Bias,
                              output_quant=Int8ActPerTensorFixedPoint,
                              return_quant_tensor=True)
     self.conv.cache_inference_quant_out = True
     self.conv.cache_inference_quant_bias = True
     self.conv2.cache_inference_quant_out = True
     self.conv2.cache_inference_quant_bias = True
Exemplo n.º 16
0
 def __init__(self):
     super().__init__()
     self.conv1 = QuantConv2d(
         out_channels=OUT_CH,
         in_channels=IN_CH,
         kernel_size=KERNEL_SIZE,
         bias=False,
         weight_quant=Int8WeightPerTensorFixedPoint,
         input_quant=Int8ActPerTensorFixedPoint,
         output_quant=Int8ActPerTensorFixedPoint,
         return_quant_tensor=True)
     self.conv1.weight.data.uniform_(-0.01, 0.01)
Exemplo n.º 17
0
 def __init__(self):
     super().__init__()
     self.conv1 = QuantConv2d(
         out_channels=OUT_CH,
         in_channels=IN_CH,
         kernel_size=KERNEL_SIZE,
         bias=False,
         weight_quant=ShiftedUint8WeightPerTensorFloat,
         input_quant=ShiftedUint8ActPerTensorFloat,
         output_quant=ShiftedUint8ActPerTensorFloat,
         return_quant_tensor=False)
     self.conv1.weight.data.uniform_(-1.0, 1.0)
Exemplo n.º 18
0
def get_quant_conv2d(in_ch, out_ch, bit_width, quant_type):
    return QuantConv2d(in_channels=in_ch,
                       kernel_size=KERNEL_SIZE,
                       out_channels=out_ch,
                       weight_quant_type=quant_type,
                       weight_bit_width=bit_width,
                       weight_narrow_range=NARROW_RANGE_ENABLED,
                       weight_scaling_impl_type=WEIGHT_SCALING_IMPL_TYPE,
                       weight_scaling_const=WEIGHT_SCALING_CONST,
                       weight_scaling_per_output_channel=CONV_PER_OUT_CH_SCALING,
                       weight_restrict_scaling_type=SCALING_VALUE_TYPE,
                       weight_bit_width_impl_type=BIT_WIDTH_IMPL_TYPE,
                       bias=BIAS_ENABLED)
Exemplo n.º 19
0
 def __init__(self):
     super().__init__()
     self.conv = QuantConv2d(
         out_channels=OUT_CH,
         in_channels=IN_CH,
         kernel_size=KERNEL_SIZE,
         bias=False,
         input_bit_width=7,
         output_bit_width=7,
         weight_quant=Int8WeightPerTensorFloat,
         bias_quant=Int16Bias,
         input_quant=ShiftedUint8ActPerTensorFloat,
         output_quant=ShiftedUint8ActPerTensorFloat,
         return_quant_tensor=False)
     self.conv.weight.data.uniform_(-0.01, 0.01)
Exemplo n.º 20
0
 def test_delayed_quant_module(self):
     float_mod = Conv2d(out_channels=OUTPUT_CHANNELS,
                        in_channels=INPUT_CHANNELS,
                        kernel_size=KERNEL_SIZE,
                        bias=False)
     quant_mod = QuantConv2d(out_channels=OUTPUT_CHANNELS,
                             in_channels=INPUT_CHANNELS,
                             kernel_size=KERNEL_SIZE,
                             weight_quant_delay_steps=1,
                             bias=False)
     quant_mod.load_state_dict(float_mod.state_dict())
     inp = torch.randn(1, INPUT_CHANNELS, 20, 20)
     out_float = float_mod(inp)
     out_quant = quant_mod(inp)
     assert out_float.isclose(out_quant).all().item()
Exemplo n.º 21
0
def make_layers(cfg, batch_norm, bit_width):
    layers = []
    in_channels = 3
    for v in cfg:
        if v == 'M':
            layers += [nn.MaxPool2d(kernel_size=2, stride=2)]
        else:
            conv2d = QuantConv2d(
                in_channels, v, kernel_size=3, stride=1, padding=1, groups=1, bias=not batch_norm,
                weight_bit_width=bit_width, weight_quant=CommonIntWeightPerChannelQuant)
            act = QuantReLU(
                act_quant=CommonUintActQuant, bit_width=bit_width, return_quant_tensor=True)
            if batch_norm:
                layers += [conv2d, nn.BatchNorm2d(v), act]
            else:
                layers += [conv2d, act]
            in_channels = v
    return nn.Sequential(*layers)
def PreQuantizedConv2d(in_channels,
                       out_channels,
                       kernel_size,
                       config,
                       stride=1,
                       padding=0,
                       dilation=1,
                       groups=1,
                       bias=True):
    return QuantConv2d(in_channels=in_channels,
                       out_channels=out_channels,
                       kernel_size=kernel_size,
                       stride=stride,
                       padding=padding,
                       dilation=dilation,
                       groups=groups,
                       bias=bias,
                       weight_quant_type=QuantType.INT,
                       weight_narrow_range=True,
                       weight_bit_width=config.weight_bit_width,
                       weight_scaling_per_output_channel=True)
Exemplo n.º 23
0
    def __init__(
            self,
            in_channels,
            out_channels,
            kernel_size,
            stride,
            padding,
            weight_bit_width,
            act_bit_width,
            act_scaling_per_channel,
            bias,
            groups=1,
            bn_eps=1e-5,
            shared_act=None,
            return_quant_tensor=False):
        super(ConvBlock, self).__init__()

        self.conv = QuantConv2d(
            in_channels=in_channels,
            out_channels=out_channels,
            kernel_size=kernel_size,
            stride=stride,
            padding=padding,
            groups=groups,
            bias=bias,
            weight_bit_width=weight_bit_width,
            weight_quant=CommonIntWeightPerChannelQuant)
        self.bn = nn.BatchNorm2d(num_features=out_channels, eps=bn_eps)
        if shared_act is None:
            self.activ = QuantReLU(
                act_quant=CommonUintActQuant,
                bit_width=act_bit_width,
                scaling_per_channel=act_scaling_per_channel,
                per_channel_broadcastable_shape=(1, out_channels, 1, 1),
                return_quant_tensor=return_quant_tensor)
        else:
            self.activ = shared_act
Exemplo n.º 24
0
 def quant_weight_scale(module: QuantConv2d):
     quant_weight_scale = module.quant_weight_scale()
     return DPUv1QuantConv2dHandler.neg_scalar_exponent_from_scale(
         quant_weight_scale)
Exemplo n.º 25
0
 def test_module_init(self):
     mod = QuantConv2d(out_channels=OUTPUT_CHANNELS,
                       in_channels=INPUT_CHANNELS,
                       kernel_size=KERNEL_SIZE,
                       bias=False)
Exemplo n.º 26
0
 def __init__(self):
     super(QuantLeNet, self).__init__()
     self.conv1 = QuantConv2d(1,
                              6,
                              5,
                              weight_quant=None,
                              input_quant=None,
                              bias_quant=None,
                              output_quant=None,
                              update_wqi=None,
                              update_bqi=None,
                              update_iqi=None,
                              update_oqi=None)
     self.relu1 = QuantReLU(input_quant=None,
                            act_quant=None,
                            output_quant=None,
                            update_iqi=None,
                            update_aqi=None)
     self.conv2 = QuantConv2d(6,
                              16,
                              5,
                              weight_quant=None,
                              input_quant=None,
                              bias_quant=None,
                              output_quant=None,
                              update_wqi=None,
                              update_bqi=None,
                              update_iqi=None,
                              update_oqi=None)
     self.relu2 = QuantReLU(input_quant=None,
                            act_quant=None,
                            output_quant=None,
                            update_iqi=None,
                            update_aqi=None)
     self.fc1 = QuantLinear(16 * 5 * 5,
                            120,
                            bias=True,
                            weight_quant=None,
                            input_quant=None,
                            bias_quant=None,
                            output_quant=None,
                            update_wqi=None,
                            update_bqi=None,
                            update_iqi=None,
                            update_oqi=None)
     self.relu3 = QuantReLU(input_quant=None,
                            act_quant=None,
                            output_quant=None,
                            update_iqi=None,
                            update_aqi=None)
     self.fc2 = QuantLinear(120,
                            84,
                            bias=True,
                            weight_quant=None,
                            input_quant=None,
                            bias_quant=None,
                            output_quant=None,
                            update_wqi=None,
                            update_bqi=None,
                            update_iqi=None,
                            update_oqi=None)
     self.relu4 = QuantReLU(input_quant=None,
                            act_quant=None,
                            output_quant=None,
                            update_iqi=None,
                            update_aqi=None)
     self.fc3 = QuantLinear(84,
                            10,
                            bias=False,
                            weight_quant=None,
                            input_quant=None,
                            bias_quant=None,
                            output_quant=None,
                            update_wqi=None,
                            update_bqi=None,
                            update_iqi=None,
                            update_oqi=None)
Exemplo n.º 27
0
def get_8_bits_quantized_lenet():
    model = QuantLeNet()
    model.conv1 = QuantConv2d(1,
                              6,
                              5,
                              weight_quant=LSQ_weight_quant_8bits,
                              bias_quant=None,
                              input_quant=None,
                              output_quant=None,
                              update_wqi=None,
                              update_bqi=None,
                              update_iqi=None,
                              update_oqi=None)
    model.relu1 = QuantReLU(input_quant=None,
                            act_quant=LSQ_input_quant_8bits,
                            output_quant=None,
                            update_iqi=None,
                            update_aqi=None)

    model.conv2 = QuantConv2d(6,
                              16,
                              5,
                              weight_quant=LSQ_weight_quant_8bits,
                              bias_quant=None,
                              input_quant=None,
                              output_quant=None,
                              update_wqi=None,
                              update_bqi=None,
                              update_iqi=None,
                              update_oqi=None)
    model.relu2 = QuantReLU(input_quant=None,
                            act_quant=LSQ_input_quant_8bits,
                            output_quant=None,
                            update_iqi=None,
                            update_aqi=None)

    model.fc1 = QuantLinear(16 * 5 * 5,
                            120,
                            bias=True,
                            weight_quant=LSQ_weight_quant_8bits,
                            bias_quant=None,
                            input_quant=None,
                            output_quant=None,
                            update_wqi=None,
                            update_bqi=None,
                            update_iqi=None,
                            update_oqi=None)

    model.relu3 = QuantReLU(input_quant=None,
                            act_quant=LSQ_input_quant_8bits,
                            output_quant=None,
                            update_iqi=None,
                            update_aqi=None)

    model.fc2 = QuantLinear(120,
                            84,
                            bias=True,
                            weight_quant=LSQ_weight_quant_8bits,
                            bias_quant=None,
                            input_quant=None,
                            output_quant=None,
                            update_wqi=None,
                            update_bqi=None,
                            update_iqi=None,
                            update_oqi=None)

    model.relu4 = QuantReLU(input_quant=None,
                            act_quant=LSQ_input_quant_8bits,
                            output_quant=None,
                            update_iqi=None,
                            update_aqi=None)

    model.fc3 = QuantLinear(84,
                            10,
                            bias=False,
                            weight_quant=LSQ_weight_quant_8bits,
                            bias_quant=None,
                            input_quant=None,
                            output_quant=None,
                            update_wqi=None,
                            update_bqi=None,
                            update_iqi=None,
                            update_oqi=None)

    return model
Exemplo n.º 28
0
 def quant_weight_scale(module: QuantConv2d):
     quant_weight_scale = module.quant_weight_scale().type(
         torch.FloatTensor).detach()
     if len(quant_weight_scale.shape) == 4:
         quant_weight_scale = quant_weight_scale.view(1, -1, 1, 1)
     return quant_weight_scale
Exemplo n.º 29
0
 def int_weight(module: QuantConv2d):
     return module.int_weight(float_datatype=False).detach()
Exemplo n.º 30
0
 def quant_weight_bit_width(module: QuantConv2d):
     bit_width = module.quant_weight_bit_width()
     return DPUv1QuantLayerHandler.validate_8b_bit_width(bit_width)