def __init__(self, input_width, weight_width, act_width):

        super(QuantLeNet, self).__init__()
        self.quant_inp = QuantIdentity(bit_width=input_width,
                                       min_val=-1.0,
                                       max_val=1.0)

        self.conv1 = QuantConv2d(1, 6, 5, weight_bit_width=weight_width)
        self.conv2 = QuantConv2d(6, 16, 5, weight_bit_width=weight_width)
        self.fc1 = QuantLinear(16 * 4 * 4,
                               120,
                               bias=True,
                               weight_bit_width=weight_width)
        self.fc2 = QuantLinear(120,
                               84,
                               bias=True,
                               weight_bit_width=weight_width)
        self.fc3 = QuantLinear(84,
                               10,
                               bias=False,
                               weight_bit_width=weight_width)

        self.relu1 = QuantReLU(bit_width=act_width, max_val=6)
        self.relu2 = QuantReLU(bit_width=act_width, max_val=6)
        self.relu3 = QuantReLU(bit_width=act_width, max_val=6)
        self.relu4 = QuantReLU(bit_width=act_width, max_val=6)
Exemplo n.º 2
0
def test_brevitas_qlinear(bias, out_features, in_features, w_bits, i_dtype):
    i_shape = (1, in_features)
    w_shape = (out_features, in_features)
    b_linear = QuantLinear(
        out_features=out_features,
        in_features=in_features,
        bias=bias,
        bias_quant_type=QuantType.FP,
        weight_bit_width=w_bits,
        weight_quant_type=QuantType.INT,
        weight_scaling_per_output_channel=True,
    )
    weight_tensor_fp = np.random.uniform(low=-1.0, high=1.0,
                                         size=w_shape).astype(np.float32)
    b_linear.weight.data = torch.from_numpy(weight_tensor_fp)
    b_linear.eval()
    bo.export_finn_onnx(b_linear, i_shape, export_onnx_path)
    model = ModelWrapper(export_onnx_path)
    model = model.transform(InferShapes())
    inp_tensor = gen_finn_dt_tensor(i_dtype, i_shape)
    idict = {model.graph.input[0].name: inp_tensor}
    odict = oxe.execute_onnx(model, idict, True)
    produced = odict[model.graph.output[0].name]
    inp_tensor = torch.from_numpy(inp_tensor).float()
    expected = b_linear.forward(inp_tensor).detach().numpy()

    assert np.isclose(produced, expected, atol=1e-3).all()
    os.remove(export_onnx_path)
Exemplo n.º 3
0
    def __init__(self,
                 num_classes,
                 weight_bit_width,
                 act_bit_width,
                 in_bit_width,
                 in_features=(1, 28, 28)):
        super(FC, self).__init__()

        self.features = ModuleList()
        self.features.append(
            QuantIdentity(act_quant=CommonActQuant, bit_width=in_bit_width))
        self.features.append(Dropout(p=DROPOUT))
        in_features = reduce(mul, in_features)

        self.features.append(
            QuantLinear(in_features=in_features,
                        out_features=64,
                        bias=False,
                        weight_bit_width=weight_bit_width,
                        weight_quant=CommonWeightQuant))
        self.features.append(BatchNorm1d(num_features=64))
        self.features.append(
            QuantIdentity(act_quant=CommonActQuant, bit_width=act_bit_width))
        self.features.append(Dropout(p=DROPOUT))

        self.features.append(
            QuantLinear(in_features=64,
                        out_features=64,
                        bias=False,
                        weight_bit_width=weight_bit_width,
                        weight_quant=CommonWeightQuant))
        self.features.append(BatchNorm1d(num_features=64))
        self.features.append(
            QuantIdentity(act_quant=CommonActQuant, bit_width=act_bit_width))
        self.features.append(Dropout(p=DROPOUT))

        self.features.append(
            QuantLinear(in_features=64,
                        out_features=64,
                        bias=False,
                        weight_bit_width=weight_bit_width,
                        weight_quant=CommonWeightQuant))
        self.features.append(BatchNorm1d(num_features=64))
        self.features.append(
            QuantIdentity(act_quant=CommonActQuant, bit_width=act_bit_width))
        self.features.append(Dropout(p=DROPOUT))

        self.features.append(
            QuantLinear(in_features=64,
                        out_features=num_classes,
                        bias=False,
                        weight_bit_width=weight_bit_width,
                        weight_quant=CommonWeightQuant))
        self.features.append(TensorNorm())

        for m in self.modules():
            if isinstance(m, QuantLinear):
                torch.nn.init.uniform_(m.weight.data, -1, 1)
Exemplo n.º 4
0
    def __init__(self, num_classes, weight_bit_width, act_bit_width,
                 in_bit_width, in_ch):
        super(CNV, self).__init__()

        self.conv_features = ModuleList()
        self.linear_features = ModuleList()

        self.conv_features.append(
            QuantIdentity(  # for Q1.7 input format
                act_quant=CommonActQuant,
                bit_width=in_bit_width,
                min_val=-1.0,
                max_val=1.0 - 2.0**(-7),
                narrow_range=False,
                restrict_scaling_type=RestrictValueType.POWER_OF_TWO))

        for out_ch, is_pool_enabled in CNV_OUT_CH_POOL:
            self.conv_features.append(
                QuantConv2d(kernel_size=KERNEL_SIZE,
                            in_channels=in_ch,
                            out_channels=out_ch,
                            bias=False,
                            weight_quant=CommonWeightQuant,
                            weight_bit_width=weight_bit_width))
            in_ch = out_ch
            self.conv_features.append(BatchNorm2d(in_ch, eps=1e-4))
            self.conv_features.append(
                QuantIdentity(act_quant=CommonActQuant,
                              bit_width=act_bit_width))
            if is_pool_enabled:
                self.conv_features.append(MaxPool2d(kernel_size=2))

        for in_features, out_features in INTERMEDIATE_FC_FEATURES:
            self.linear_features.append(
                QuantLinear(in_features=in_features,
                            out_features=out_features,
                            bias=False,
                            weight_quant=CommonWeightQuant,
                            weight_bit_width=weight_bit_width))
            self.linear_features.append(BatchNorm1d(out_features, eps=1e-4))
            self.linear_features.append(
                QuantIdentity(act_quant=CommonActQuant,
                              bit_width=act_bit_width))

        self.linear_features.append(
            QuantLinear(in_features=LAST_FC_IN_FEATURES,
                        out_features=num_classes,
                        bias=False,
                        weight_quant=CommonWeightQuant,
                        weight_bit_width=weight_bit_width))
        self.linear_features.append(TensorNorm())

        for m in self.modules():
            if isinstance(m, QuantConv2d) or isinstance(m, QuantLinear):
                torch.nn.init.uniform_(m.weight.data, -1, 1)
Exemplo n.º 5
0
 def __init__(self,weight_bit_width=4,acti_bit_width=8):
     super(QuantLeNet, self).__init__()
     self.conv1 = QuantConv2d(1, 6, 5, padding=2,weight_bit_width=weight_bit_width)
     # self.relu1 = QuantReLU(bit_width=acti_bit_width, max_val=6)
     self.conv2 = QuantConv2d(6, 16, 5, weight_bit_width=weight_bit_width)
     # self.relu2 = QuantReLU(bit_width=acti_bit_width, max_val=6)
     self.fc1   = QuantLinear(16*5*5, 120, bias=True, weight_bit_width=weight_bit_width)
     # self.relu3 = QuantReLU(bit_width=acti_bit_width, max_val=6)
     self.fc2   = QuantLinear(120, 84, bias=True, weight_bit_width=weight_bit_width)
     # self.relu4 = QuantReLU(bit_width=acti_bit_width, max_val=6)
     self.fc3   = QuantLinear(84, 10, bias=True, weight_bit_width=weight_bit_width)
Exemplo n.º 6
0
 def op_symbolic_kwargs(cls, module: QuantLinear):
     linear_symbolic_kwargs = {
         'input_scale': module.quant_input_scale(),
         'input_zero_point': cls.quant_input_zero_point(module),
         'int_weight': cls.int_weight(module).t(),
         'weight_scale': module.quant_weight_scale(),
         'weight_zero_point': cls.quant_weight_zero_point(module),
         'output_scale': module.quant_output_scale(),
         'output_zero_point': cls.quant_output_zero_point(module),
         'output_dtype': cls.torch_8b_dtype(module.is_quant_output_signed),
         'out_shape': cls.quant_output_shape(module)}
     return linear_symbolic_kwargs
Exemplo n.º 7
0
 def op_symbolic_kwargs(cls, module: QuantLinear):
     linear_symbolic_kwargs = {
         'input_scale': module.quant_input_scale(),
         'input_zero_point': cls.quant_input_zero_point(module),
         'int_weight': cls.int_weight(module),
         'weight_scale': module.quant_weight_scale(),
         'weight_zero_point': cls.quant_weight_zero_point(module),
         'output_scale': module.quant_output_scale(),
         'output_zero_point': cls.quant_output_zero_point(module),
         'out_shape': cls.quant_output_shape(module),
         'in_features': module.in_features,
         'out_features': module.out_features
     }
     return linear_symbolic_kwargs
Exemplo n.º 8
0
 def test_forward_bias_fp(self):
     mod = QuantLinear(
         out_features=OUTPUT_FEATURES,
         in_features=INPUT_FEATURES,
         bias=True)
     x = torch.rand(size=(3, INPUT_FEATURES))
     assert mod(x) is not None
Exemplo n.º 9
0
def default_wbiol_quant_linear(bias_enabled):
    """
    QuantLinear layer with default quantization settings
    """
    return QuantLinear(out_features=OUTPUT_CH,
                       in_features=IN_CH,
                       bias=bias_enabled)
Exemplo n.º 10
0
 def test_module_init_bias_int(self):
     mod = QuantLinear(
         out_features=OUTPUT_FEATURES,
         in_features=INPUT_FEATURES,
         bias=True,
         bias_quant_type='INT')
     assert mod
Exemplo n.º 11
0
 def test_forward_bias_int(self):
     mod = QuantLinear(
         out_features=OUTPUT_FEATURES,
         in_features=INPUT_FEATURES,
         bias=True,
         bias_quant_type='INT')
     x = QuantTensor(torch.rand(size=(3, INPUT_FEATURES)), torch.tensor(1.0), torch.tensor(3))
     assert mod(x) is not None
Exemplo n.º 12
0
def test_weight_bit_width_weighted_by_size():
    model = QuantLinear(out_features=6,
                        in_features=5,
                        bias=False,
                        weight_bit_width_impl_type='parameter',
                        weight_bit_width=4)
    loss = WeightBitWidthWeightedBySize(model)
    out = model(torch.randn(2, 5, 5))
    assert loss.tot_num_elements == 30
    assert loss.retrieve() == 4.0
Exemplo n.º 13
0
 def __init__(self):
     super().__init__()
     self.quant_inp = QuantIdentity(return_quant_tensor=True)
     self.linear = QuantLinear(out_features=OUT_CH,
                               in_features=IN_CH,
                               bias=True,
                               output_quant=Int8ActPerTensorFloat,
                               bias_quant=Int16Bias,
                               return_quant_tensor=False)
     self.linear.weight.data.uniform_(-0.01, 0.01)
Exemplo n.º 14
0
def get_quant_linear(in_features, out_features, per_out_ch_scaling, bit_width, quant_type):
    return QuantLinear(bias=BIAS_ENABLED,
                       in_features=in_features,
                       out_features=out_features,
                       weight_quant_type=quant_type,
                       weight_bit_width=bit_width,
                       weight_scaling_const=WEIGHT_SCALING_CONST,
                       weight_bit_width_impl_type=BIT_WIDTH_IMPL_TYPE,
                       weight_scaling_per_output_channel=per_out_ch_scaling,
                       weight_scaling_impl_type=WEIGHT_SCALING_IMPL_TYPE,
                       weight_narrow_range=NARROW_RANGE_ENABLED)
Exemplo n.º 15
0
def get_quant_linear(in_features, out_features, per_out_ch_scaling, bit_width, quant_type, stats_op):
    return QuantLinear(bias=BIAS_ENABLED,
                       in_features=in_features,
                       out_features=out_features,
                       weight_quant_type=quant_type,
                       weight_narrow_range=NARROW_RANGE_ENABLED,
                       weight_bit_width=bit_width,
                       weight_bit_width_impl_type=BIT_WIDTH_IMPL_TYPE,
                       weight_scaling_per_output_channel=per_out_ch_scaling,
                       weight_scaling_stats_op=stats_op,
                       weight_scaling_stats_sigma=SIGMA)
Exemplo n.º 16
0
 def __init__(self, cfg, batch_norm, bit_width=8, num_classes=1000):
     super(QuantVGG, self).__init__()
     self.features = make_layers(cfg, batch_norm, bit_width)
     self.avgpool = QuantAvgPool2d(kernel_size=(7, 7), stride=1, bit_width=bit_width)
     self.classifier = nn.Sequential(
         QuantLinear(
             512 * 7 * 7, 4096, bias=True,
             weight_quant=CommonIntWeightPerChannelQuant, weight_bit_width=bit_width),
         QuantReLU(act_quant=CommonUintActQuant, bit_width=bit_width),
         nn.Dropout(),
         QuantLinear(
             4096, 4096, bias=True,
             weight_quant=CommonIntWeightPerChannelQuant, weight_bit_width=bit_width),
         QuantReLU(act_quant=CommonUintActQuant, weight_bit_width=bit_width),
         nn.Dropout(),
         QuantLinear(
             4096, num_classes, bias=False,
             weight_quant=CommonIntWeightPerTensorQuant, weight_bit_width=bit_width),
     )
     self._initialize_weights()
Exemplo n.º 17
0
def test_parameter_from_stats_state_dict():
    q_linear1 = QuantLinear(10,
                            5,
                            bias=False,
                            weight_quant_type='binary',
                            weight_scaling_impl_type='parameter',
                            weight_scaling_init=0.1)
    q_linear2 = QuantLinear(10,
                            5,
                            bias=False,
                            weight_quant_type='binary',
                            weight_scaling_impl_type='parameter',
                            weight_scaling_init=0.001)
    q_linear1_old_scale = q_linear1.quant_weight_scale()
    q_linear1.load_state_dict(q_linear2.state_dict())
    q_linear1_new_scale = q_linear1.quant_weight_scale()
    q_linear2_scale = q_linear2.quant_weight_scale()
    assert q_linear1_old_scale != q_linear2_scale
    assert q_linear1_old_scale != q_linear1_new_scale
    assert q_linear1_new_scale == q_linear2_scale
def PreQuantizedLinear(in_features,
                       out_features,
                       config,
                       bias=True):
    return QuantLinear(in_features,
                       out_features,
                       bias,
                       weight_quant_type=QuantType.INT,
                       weight_narrow_range=True,
                       weight_bit_width=config.weight_bit_width,
                       weight_scaling_per_output_channel=False)
Exemplo n.º 19
0
def test_output_bit_weighted_by_ops():
    model = QuantLinear(out_features=6,
                        in_features=5,
                        bias=False,
                        input_quant=Int8ActPerTensorFloat,
                        weight_bit_width_impl_type='parameter',
                        return_quant_tensor=True)
    loss = QuantLayerOutputBitWidthWeightedByOps(model)
    out = model(torch.randn(2, 4, 5))
    assert loss.tot_num_elements == 24 * 10 / MEGA
    assert loss.retrieve() == out.bit_width
Exemplo n.º 20
0
 def __init__(self):
     super().__init__()
     self.linear = QuantLinear(
         in_features=IN_CH,
         out_features=OUT_CH,
         bias=False,
         weight_quant=ShiftedUint8WeightPerTensorFloat,
         input_quant=ShiftedUint8ActPerTensorFloat,
         output_quant=ShiftedUint8ActPerTensorFloat,
         return_quant_tensor=False)
     self.linear.weight.data.uniform_(-0.01, 0.01)
Exemplo n.º 21
0
 def __init__(self):
     super().__init__()
     self.linear = QuantLinear(
         in_features=IN_CH,
         out_features=OUT_CH,
         bias=True,
         weight_quant=Int8WeightPerTensorFixedPoint,
         bias_quant=Int8BiasPerTensorFixedPointInternalScaling,
         input_quant=Int8ActPerTensorFixedPoint,
         output_quant=Int8ActPerTensorFixedPoint,
         return_quant_tensor=False)
     self.linear.weight.data.uniform_(-0.01, 0.01)
Exemplo n.º 22
0
def make_qlinear(in_features,
                 out_features,
                 bias=True,
                 no_quant=True,
                 **kwargs) -> QuantConv2d:
    if no_quant:
        return QuantLinear(in_features=in_features,
                           out_features=out_features,
                           bias=bias,
                           weight_quant=None,
                           input_quant=None,
                           bias_quant=None,
                           output_quant=None,
                           update_wqi=None,
                           update_bqi=None,
                           update_iqi=None,
                           update_oqi=None)
    else:
        return QuantLinear(in_features=in_features,
                           out_features=out_features,
                           bias=bias,
                           weight_bit_width=kwargs['bit_width'])
Exemplo n.º 23
0
 def __init__(self):
     super().__init__()
     self.linear = QuantLinear(
         in_features=IN_CH,
         out_features=OUT_CH,
         bias=True,
         weight_quant=Int8WeightPerTensorFloat,
         input_bit_width=7,
         output_bit_width=7,
         input_quant=ShiftedUint8ActPerTensorFloat,
         output_quant=ShiftedUint8ActPerTensorFloat,
         bias_quant=IntBiasExternalBitWidth,
         return_quant_tensor=False)
     self.linear.weight.data.uniform_(-0.01, 0.01)
Exemplo n.º 24
0
def test_quant_linear(bias, bias_quant, out_features, in_features, w_bits,
                      channel_scaling, i_bits):
    # required to generated quantized inputs, not part of the exported model to test
    quant_inp = QuantIdentity(bit_width=i_bits, return_quant_tensor=True)
    inp_tensor = quant_inp(torch.randn(1, in_features))
    linear = QuantLinear(out_features=out_features,
                         in_features=in_features,
                         bias=bias,
                         bias_quant=bias_quant,
                         weight_bit_width=w_bits,
                         weight_scaling_per_output_channel=channel_scaling)
    linear.eval()
    model = bo.export_finn_onnx(linear,
                                input_t=inp_tensor,
                                export_path='linear.onnx')
    model = ModelWrapper(model)
    model = model.transform(InferShapes())
    # the quantized input tensor passed to FINN should be in integer form
    int_inp_array = inp_tensor.int(float_datatype=True).numpy()
    idict = {model.graph.input[0].name: int_inp_array}
    odict = oxe.execute_onnx(model, idict, True)
    produced = odict[model.graph.output[0].name]
    expected = linear(inp_tensor).detach().numpy()
    assert np.isclose(produced, expected, atol=1e-3).all()
Exemplo n.º 25
0
    def __init__(self,
                 channels,
                 first_stage_stride,
                 bit_width,
                 in_channels=3,
                 num_classes=1000):
        super(MobileNet, self).__init__()
        init_block_channels = channels[0][0]

        self.features = Sequential()
        init_block = ConvBlock(in_channels=in_channels,
                               out_channels=init_block_channels,
                               kernel_size=3,
                               stride=2,
                               weight_bit_width=FIRST_LAYER_BIT_WIDTH,
                               activation_scaling_per_channel=True,
                               act_bit_width=bit_width)
        self.features.add_module('init_block', init_block)
        in_channels = init_block_channels
        for i, channels_per_stage in enumerate(channels[1:]):
            stage = Sequential()
            pw_activation_scaling_per_channel = i < len(channels[1:]) - 1
            for j, out_channels in enumerate(channels_per_stage):
                stride = 2 if (j == 0) and (
                    (i != 0) or first_stage_stride) else 1
                mod = DwsConvBlock(in_channels=in_channels,
                                   out_channels=out_channels,
                                   stride=stride,
                                   bit_width=bit_width,
                                   pw_activation_scaling_per_channel=
                                   pw_activation_scaling_per_channel)
                stage.add_module('unit{}'.format(j + 1), mod)
                in_channels = out_channels
            self.features.add_module('stage{}'.format(i + 1), stage)
        self.final_pool = QuantAvgPool2d(kernel_size=7,
                                         stride=1,
                                         bit_width=bit_width)
        self.output = QuantLinear(in_channels,
                                  num_classes,
                                  bias=True,
                                  bias_quant=IntBias,
                                  weight_quant=CommonIntWeightPerTensorQuant,
                                  weight_bit_width=bit_width)
Exemplo n.º 26
0
def test_parameter_from_stats_update():
    config.IGNORE_MISSING_KEYS = True
    linear = nn.Linear(10, 5, bias=False)
    q_linear = QuantLinear(10,
                           5,
                           bias=False,
                           weight_quant_type='binary',
                           weight_scaling_impl_type='parameter_from_stats')
    l_max = linear.weight.abs().max()
    old_scale = q_linear.quant_weight_scale()
    old_ql_max = q_linear.weight.abs().max()
    q_linear.load_state_dict(linear.state_dict())
    new_scale = q_linear.quant_weight_scale()
    new_ql_max = q_linear.weight.abs().max()
    assert old_scale == old_ql_max
    assert new_scale == l_max
    assert new_scale == new_ql_max
Exemplo n.º 27
0
 def test_module_init_scale_impl_type_override(self):
     mod = QuantLinear(
         out_features=OUTPUT_FEATURES,
         in_features=INPUT_FEATURES,
         bias=True, weight_scaling_impl_type='HE')
     assert mod.quant_weight_scale()
Exemplo n.º 28
0
    def __init__(self,
                 channels,
                 init_block_channels,
                 final_block_channels,
                 residuals,
                 shortcuts,
                 kernel_sizes,
                 expansions,
                 quant_type,
                 bit_width,
                 depthwise_bit_width,
                 first_layer_bit_width,
                 hard_tanh_threshold,
                 dropout_rate,
                 dropout_steps,
                 weight_scaling_impl_type,
                 compute_micronet_cost,
                 input_bit_width=8,
                 bn_eps=1e-3,
                 in_channels=3,
                 num_classes=1000):
        super(ProxylessNAS, self).__init__()
        self.compute_micronet_cost = compute_micronet_cost
        self.input_bit_width = torch.tensor(input_bit_width).float().cuda()
        self.num_classes = num_classes
        self.dropout_rate = dropout_rate
        self.dropout_steps = dropout_steps

        self.features = nn.Sequential()
        self.features.add_module(
            "init_block",
            ConvBlock(in_channels=in_channels,
                      out_channels=init_block_channels,
                      kernel_size=3,
                      stride=2,
                      padding=1,
                      groups=1,
                      bn_eps=bn_eps,
                      act_scaling_per_channel=False,
                      weight_scaling_impl_type=weight_scaling_impl_type,
                      bias=False,
                      quant_type=quant_type,
                      act_bit_width=bit_width,
                      weight_bit_width=first_layer_bit_width,
                      compute_micronet_cost=compute_micronet_cost))
        in_channels = init_block_channels
        shared_act = None
        for i, channels_per_stage in enumerate(channels):
            stage = nn.Sequential()
            residuals_per_stage = residuals[i]
            shortcuts_per_stage = shortcuts[i]
            kernel_sizes_per_stage = kernel_sizes[i]
            expansions_per_stage = expansions[i]
            for j, out_channels in enumerate(channels_per_stage):
                residual = (residuals_per_stage[j] == 1)
                shortcut = (shortcuts_per_stage[j] == 1)
                kernel_size = kernel_sizes_per_stage[j]
                expansion = expansions_per_stage[j]
                stride = 2 if (j == 0) and (i != 0) else 1

                if not shortcut:
                    shared_act = QuantHardTanh(
                        bit_width=bit_width,
                        quant_type=quant_type,
                        scaling_per_channel=False,
                        scaling_impl_type=ScalingImplType.PARAMETER,
                        scaling_min_val=MIN_SCALING_VALUE,
                        max_val=hard_tanh_threshold,
                        min_val=-hard_tanh_threshold,
                        restrict_scaling_type=RestrictValueType.LOG_FP,
                        return_quant_tensor=True)

                stage.add_module(
                    "unit{}".format(j + 1),
                    ProxylessUnit(
                        in_channels=in_channels,
                        out_channels=out_channels,
                        kernel_size=kernel_size,
                        stride=stride,
                        bn_eps=bn_eps,
                        expansion=expansion,
                        residual=residual,
                        shortcut=shortcut,
                        bit_width=bit_width,
                        depthwise_bit_width=depthwise_bit_width,
                        quant_type=quant_type,
                        weight_scaling_impl_type=weight_scaling_impl_type,
                        shared_act=shared_act,
                        compute_micronet_cost=compute_micronet_cost))
                in_channels = out_channels

            self.features.add_module("stage{}".format(i + 1), stage)
        self.features.add_module(
            "final_block",
            ConvBlock(in_channels=in_channels,
                      out_channels=final_block_channels,
                      kernel_size=1,
                      stride=1,
                      padding=0,
                      groups=1,
                      bn_eps=bn_eps,
                      act_scaling_per_channel=False,
                      quant_type=quant_type,
                      act_bit_width=bit_width,
                      weight_bit_width=bit_width,
                      weight_scaling_impl_type=weight_scaling_impl_type,
                      bias=False,
                      compute_micronet_cost=compute_micronet_cost))
        in_channels = final_block_channels
        self.final_pool = QuantAvgPool2d(kernel_size=7,
                                         stride=1,
                                         quant_type=quant_type,
                                         min_overall_bit_width=bit_width,
                                         max_overall_bit_width=bit_width)

        self.output = QuantLinear(
            in_features=in_channels,
            out_features=num_classes,
            bias=True,
            bias_quant_type=quant_type,
            compute_output_bit_width=quant_type == QuantType.INT,
            compute_output_scale=quant_type == QuantType.INT,
            weight_bit_width=bit_width,
            weight_quant_type=quant_type,
            weight_scaling_min_val=MIN_SCALING_VALUE,
            weight_scaling_per_output_channel=False,
            weight_scaling_stats_op=StatsOp.MAX,
            weight_narrow_range=True,
            weight_restrict_scaling_type=RestrictValueType.LOG_FP,
            weight_scaling_impl_type=weight_scaling_impl_type,
            return_quant_tensor=True)

        self._init_params()
Exemplo n.º 29
0
 def __init__(self):
     super(QuantLeNet, self).__init__()
     self.conv1 = QuantConv2d(1,
                              6,
                              5,
                              weight_quant=None,
                              input_quant=None,
                              bias_quant=None,
                              output_quant=None,
                              update_wqi=None,
                              update_bqi=None,
                              update_iqi=None,
                              update_oqi=None)
     self.relu1 = QuantReLU(input_quant=None,
                            act_quant=None,
                            output_quant=None,
                            update_iqi=None,
                            update_aqi=None)
     self.conv2 = QuantConv2d(6,
                              16,
                              5,
                              weight_quant=None,
                              input_quant=None,
                              bias_quant=None,
                              output_quant=None,
                              update_wqi=None,
                              update_bqi=None,
                              update_iqi=None,
                              update_oqi=None)
     self.relu2 = QuantReLU(input_quant=None,
                            act_quant=None,
                            output_quant=None,
                            update_iqi=None,
                            update_aqi=None)
     self.fc1 = QuantLinear(16 * 5 * 5,
                            120,
                            bias=True,
                            weight_quant=None,
                            input_quant=None,
                            bias_quant=None,
                            output_quant=None,
                            update_wqi=None,
                            update_bqi=None,
                            update_iqi=None,
                            update_oqi=None)
     self.relu3 = QuantReLU(input_quant=None,
                            act_quant=None,
                            output_quant=None,
                            update_iqi=None,
                            update_aqi=None)
     self.fc2 = QuantLinear(120,
                            84,
                            bias=True,
                            weight_quant=None,
                            input_quant=None,
                            bias_quant=None,
                            output_quant=None,
                            update_wqi=None,
                            update_bqi=None,
                            update_iqi=None,
                            update_oqi=None)
     self.relu4 = QuantReLU(input_quant=None,
                            act_quant=None,
                            output_quant=None,
                            update_iqi=None,
                            update_aqi=None)
     self.fc3 = QuantLinear(84,
                            10,
                            bias=False,
                            weight_quant=None,
                            input_quant=None,
                            bias_quant=None,
                            output_quant=None,
                            update_wqi=None,
                            update_bqi=None,
                            update_iqi=None,
                            update_oqi=None)
Exemplo n.º 30
0
def get_8_bits_quantized_lenet():
    model = QuantLeNet()
    model.conv1 = QuantConv2d(1,
                              6,
                              5,
                              weight_quant=LSQ_weight_quant_8bits,
                              bias_quant=None,
                              input_quant=None,
                              output_quant=None,
                              update_wqi=None,
                              update_bqi=None,
                              update_iqi=None,
                              update_oqi=None)
    model.relu1 = QuantReLU(input_quant=None,
                            act_quant=LSQ_input_quant_8bits,
                            output_quant=None,
                            update_iqi=None,
                            update_aqi=None)

    model.conv2 = QuantConv2d(6,
                              16,
                              5,
                              weight_quant=LSQ_weight_quant_8bits,
                              bias_quant=None,
                              input_quant=None,
                              output_quant=None,
                              update_wqi=None,
                              update_bqi=None,
                              update_iqi=None,
                              update_oqi=None)
    model.relu2 = QuantReLU(input_quant=None,
                            act_quant=LSQ_input_quant_8bits,
                            output_quant=None,
                            update_iqi=None,
                            update_aqi=None)

    model.fc1 = QuantLinear(16 * 5 * 5,
                            120,
                            bias=True,
                            weight_quant=LSQ_weight_quant_8bits,
                            bias_quant=None,
                            input_quant=None,
                            output_quant=None,
                            update_wqi=None,
                            update_bqi=None,
                            update_iqi=None,
                            update_oqi=None)

    model.relu3 = QuantReLU(input_quant=None,
                            act_quant=LSQ_input_quant_8bits,
                            output_quant=None,
                            update_iqi=None,
                            update_aqi=None)

    model.fc2 = QuantLinear(120,
                            84,
                            bias=True,
                            weight_quant=LSQ_weight_quant_8bits,
                            bias_quant=None,
                            input_quant=None,
                            output_quant=None,
                            update_wqi=None,
                            update_bqi=None,
                            update_iqi=None,
                            update_oqi=None)

    model.relu4 = QuantReLU(input_quant=None,
                            act_quant=LSQ_input_quant_8bits,
                            output_quant=None,
                            update_iqi=None,
                            update_aqi=None)

    model.fc3 = QuantLinear(84,
                            10,
                            bias=False,
                            weight_quant=LSQ_weight_quant_8bits,
                            bias_quant=None,
                            input_quant=None,
                            output_quant=None,
                            update_wqi=None,
                            update_bqi=None,
                            update_iqi=None,
                            update_oqi=None)

    return model