def __init__(self, input_width, weight_width, act_width): super(QuantLeNet, self).__init__() self.quant_inp = QuantIdentity(bit_width=input_width, min_val=-1.0, max_val=1.0) self.conv1 = QuantConv2d(1, 6, 5, weight_bit_width=weight_width) self.conv2 = QuantConv2d(6, 16, 5, weight_bit_width=weight_width) self.fc1 = QuantLinear(16 * 4 * 4, 120, bias=True, weight_bit_width=weight_width) self.fc2 = QuantLinear(120, 84, bias=True, weight_bit_width=weight_width) self.fc3 = QuantLinear(84, 10, bias=False, weight_bit_width=weight_width) self.relu1 = QuantReLU(bit_width=act_width, max_val=6) self.relu2 = QuantReLU(bit_width=act_width, max_val=6) self.relu3 = QuantReLU(bit_width=act_width, max_val=6) self.relu4 = QuantReLU(bit_width=act_width, max_val=6)
def test_brevitas_qlinear(bias, out_features, in_features, w_bits, i_dtype): i_shape = (1, in_features) w_shape = (out_features, in_features) b_linear = QuantLinear( out_features=out_features, in_features=in_features, bias=bias, bias_quant_type=QuantType.FP, weight_bit_width=w_bits, weight_quant_type=QuantType.INT, weight_scaling_per_output_channel=True, ) weight_tensor_fp = np.random.uniform(low=-1.0, high=1.0, size=w_shape).astype(np.float32) b_linear.weight.data = torch.from_numpy(weight_tensor_fp) b_linear.eval() bo.export_finn_onnx(b_linear, i_shape, export_onnx_path) model = ModelWrapper(export_onnx_path) model = model.transform(InferShapes()) inp_tensor = gen_finn_dt_tensor(i_dtype, i_shape) idict = {model.graph.input[0].name: inp_tensor} odict = oxe.execute_onnx(model, idict, True) produced = odict[model.graph.output[0].name] inp_tensor = torch.from_numpy(inp_tensor).float() expected = b_linear.forward(inp_tensor).detach().numpy() assert np.isclose(produced, expected, atol=1e-3).all() os.remove(export_onnx_path)
def __init__(self, num_classes, weight_bit_width, act_bit_width, in_bit_width, in_features=(1, 28, 28)): super(FC, self).__init__() self.features = ModuleList() self.features.append( QuantIdentity(act_quant=CommonActQuant, bit_width=in_bit_width)) self.features.append(Dropout(p=DROPOUT)) in_features = reduce(mul, in_features) self.features.append( QuantLinear(in_features=in_features, out_features=64, bias=False, weight_bit_width=weight_bit_width, weight_quant=CommonWeightQuant)) self.features.append(BatchNorm1d(num_features=64)) self.features.append( QuantIdentity(act_quant=CommonActQuant, bit_width=act_bit_width)) self.features.append(Dropout(p=DROPOUT)) self.features.append( QuantLinear(in_features=64, out_features=64, bias=False, weight_bit_width=weight_bit_width, weight_quant=CommonWeightQuant)) self.features.append(BatchNorm1d(num_features=64)) self.features.append( QuantIdentity(act_quant=CommonActQuant, bit_width=act_bit_width)) self.features.append(Dropout(p=DROPOUT)) self.features.append( QuantLinear(in_features=64, out_features=64, bias=False, weight_bit_width=weight_bit_width, weight_quant=CommonWeightQuant)) self.features.append(BatchNorm1d(num_features=64)) self.features.append( QuantIdentity(act_quant=CommonActQuant, bit_width=act_bit_width)) self.features.append(Dropout(p=DROPOUT)) self.features.append( QuantLinear(in_features=64, out_features=num_classes, bias=False, weight_bit_width=weight_bit_width, weight_quant=CommonWeightQuant)) self.features.append(TensorNorm()) for m in self.modules(): if isinstance(m, QuantLinear): torch.nn.init.uniform_(m.weight.data, -1, 1)
def __init__(self, num_classes, weight_bit_width, act_bit_width, in_bit_width, in_ch): super(CNV, self).__init__() self.conv_features = ModuleList() self.linear_features = ModuleList() self.conv_features.append( QuantIdentity( # for Q1.7 input format act_quant=CommonActQuant, bit_width=in_bit_width, min_val=-1.0, max_val=1.0 - 2.0**(-7), narrow_range=False, restrict_scaling_type=RestrictValueType.POWER_OF_TWO)) for out_ch, is_pool_enabled in CNV_OUT_CH_POOL: self.conv_features.append( QuantConv2d(kernel_size=KERNEL_SIZE, in_channels=in_ch, out_channels=out_ch, bias=False, weight_quant=CommonWeightQuant, weight_bit_width=weight_bit_width)) in_ch = out_ch self.conv_features.append(BatchNorm2d(in_ch, eps=1e-4)) self.conv_features.append( QuantIdentity(act_quant=CommonActQuant, bit_width=act_bit_width)) if is_pool_enabled: self.conv_features.append(MaxPool2d(kernel_size=2)) for in_features, out_features in INTERMEDIATE_FC_FEATURES: self.linear_features.append( QuantLinear(in_features=in_features, out_features=out_features, bias=False, weight_quant=CommonWeightQuant, weight_bit_width=weight_bit_width)) self.linear_features.append(BatchNorm1d(out_features, eps=1e-4)) self.linear_features.append( QuantIdentity(act_quant=CommonActQuant, bit_width=act_bit_width)) self.linear_features.append( QuantLinear(in_features=LAST_FC_IN_FEATURES, out_features=num_classes, bias=False, weight_quant=CommonWeightQuant, weight_bit_width=weight_bit_width)) self.linear_features.append(TensorNorm()) for m in self.modules(): if isinstance(m, QuantConv2d) or isinstance(m, QuantLinear): torch.nn.init.uniform_(m.weight.data, -1, 1)
def __init__(self,weight_bit_width=4,acti_bit_width=8): super(QuantLeNet, self).__init__() self.conv1 = QuantConv2d(1, 6, 5, padding=2,weight_bit_width=weight_bit_width) # self.relu1 = QuantReLU(bit_width=acti_bit_width, max_val=6) self.conv2 = QuantConv2d(6, 16, 5, weight_bit_width=weight_bit_width) # self.relu2 = QuantReLU(bit_width=acti_bit_width, max_val=6) self.fc1 = QuantLinear(16*5*5, 120, bias=True, weight_bit_width=weight_bit_width) # self.relu3 = QuantReLU(bit_width=acti_bit_width, max_val=6) self.fc2 = QuantLinear(120, 84, bias=True, weight_bit_width=weight_bit_width) # self.relu4 = QuantReLU(bit_width=acti_bit_width, max_val=6) self.fc3 = QuantLinear(84, 10, bias=True, weight_bit_width=weight_bit_width)
def op_symbolic_kwargs(cls, module: QuantLinear): linear_symbolic_kwargs = { 'input_scale': module.quant_input_scale(), 'input_zero_point': cls.quant_input_zero_point(module), 'int_weight': cls.int_weight(module).t(), 'weight_scale': module.quant_weight_scale(), 'weight_zero_point': cls.quant_weight_zero_point(module), 'output_scale': module.quant_output_scale(), 'output_zero_point': cls.quant_output_zero_point(module), 'output_dtype': cls.torch_8b_dtype(module.is_quant_output_signed), 'out_shape': cls.quant_output_shape(module)} return linear_symbolic_kwargs
def op_symbolic_kwargs(cls, module: QuantLinear): linear_symbolic_kwargs = { 'input_scale': module.quant_input_scale(), 'input_zero_point': cls.quant_input_zero_point(module), 'int_weight': cls.int_weight(module), 'weight_scale': module.quant_weight_scale(), 'weight_zero_point': cls.quant_weight_zero_point(module), 'output_scale': module.quant_output_scale(), 'output_zero_point': cls.quant_output_zero_point(module), 'out_shape': cls.quant_output_shape(module), 'in_features': module.in_features, 'out_features': module.out_features } return linear_symbolic_kwargs
def test_forward_bias_fp(self): mod = QuantLinear( out_features=OUTPUT_FEATURES, in_features=INPUT_FEATURES, bias=True) x = torch.rand(size=(3, INPUT_FEATURES)) assert mod(x) is not None
def default_wbiol_quant_linear(bias_enabled): """ QuantLinear layer with default quantization settings """ return QuantLinear(out_features=OUTPUT_CH, in_features=IN_CH, bias=bias_enabled)
def test_module_init_bias_int(self): mod = QuantLinear( out_features=OUTPUT_FEATURES, in_features=INPUT_FEATURES, bias=True, bias_quant_type='INT') assert mod
def test_forward_bias_int(self): mod = QuantLinear( out_features=OUTPUT_FEATURES, in_features=INPUT_FEATURES, bias=True, bias_quant_type='INT') x = QuantTensor(torch.rand(size=(3, INPUT_FEATURES)), torch.tensor(1.0), torch.tensor(3)) assert mod(x) is not None
def test_weight_bit_width_weighted_by_size(): model = QuantLinear(out_features=6, in_features=5, bias=False, weight_bit_width_impl_type='parameter', weight_bit_width=4) loss = WeightBitWidthWeightedBySize(model) out = model(torch.randn(2, 5, 5)) assert loss.tot_num_elements == 30 assert loss.retrieve() == 4.0
def __init__(self): super().__init__() self.quant_inp = QuantIdentity(return_quant_tensor=True) self.linear = QuantLinear(out_features=OUT_CH, in_features=IN_CH, bias=True, output_quant=Int8ActPerTensorFloat, bias_quant=Int16Bias, return_quant_tensor=False) self.linear.weight.data.uniform_(-0.01, 0.01)
def get_quant_linear(in_features, out_features, per_out_ch_scaling, bit_width, quant_type): return QuantLinear(bias=BIAS_ENABLED, in_features=in_features, out_features=out_features, weight_quant_type=quant_type, weight_bit_width=bit_width, weight_scaling_const=WEIGHT_SCALING_CONST, weight_bit_width_impl_type=BIT_WIDTH_IMPL_TYPE, weight_scaling_per_output_channel=per_out_ch_scaling, weight_scaling_impl_type=WEIGHT_SCALING_IMPL_TYPE, weight_narrow_range=NARROW_RANGE_ENABLED)
def get_quant_linear(in_features, out_features, per_out_ch_scaling, bit_width, quant_type, stats_op): return QuantLinear(bias=BIAS_ENABLED, in_features=in_features, out_features=out_features, weight_quant_type=quant_type, weight_narrow_range=NARROW_RANGE_ENABLED, weight_bit_width=bit_width, weight_bit_width_impl_type=BIT_WIDTH_IMPL_TYPE, weight_scaling_per_output_channel=per_out_ch_scaling, weight_scaling_stats_op=stats_op, weight_scaling_stats_sigma=SIGMA)
def __init__(self, cfg, batch_norm, bit_width=8, num_classes=1000): super(QuantVGG, self).__init__() self.features = make_layers(cfg, batch_norm, bit_width) self.avgpool = QuantAvgPool2d(kernel_size=(7, 7), stride=1, bit_width=bit_width) self.classifier = nn.Sequential( QuantLinear( 512 * 7 * 7, 4096, bias=True, weight_quant=CommonIntWeightPerChannelQuant, weight_bit_width=bit_width), QuantReLU(act_quant=CommonUintActQuant, bit_width=bit_width), nn.Dropout(), QuantLinear( 4096, 4096, bias=True, weight_quant=CommonIntWeightPerChannelQuant, weight_bit_width=bit_width), QuantReLU(act_quant=CommonUintActQuant, weight_bit_width=bit_width), nn.Dropout(), QuantLinear( 4096, num_classes, bias=False, weight_quant=CommonIntWeightPerTensorQuant, weight_bit_width=bit_width), ) self._initialize_weights()
def test_parameter_from_stats_state_dict(): q_linear1 = QuantLinear(10, 5, bias=False, weight_quant_type='binary', weight_scaling_impl_type='parameter', weight_scaling_init=0.1) q_linear2 = QuantLinear(10, 5, bias=False, weight_quant_type='binary', weight_scaling_impl_type='parameter', weight_scaling_init=0.001) q_linear1_old_scale = q_linear1.quant_weight_scale() q_linear1.load_state_dict(q_linear2.state_dict()) q_linear1_new_scale = q_linear1.quant_weight_scale() q_linear2_scale = q_linear2.quant_weight_scale() assert q_linear1_old_scale != q_linear2_scale assert q_linear1_old_scale != q_linear1_new_scale assert q_linear1_new_scale == q_linear2_scale
def PreQuantizedLinear(in_features, out_features, config, bias=True): return QuantLinear(in_features, out_features, bias, weight_quant_type=QuantType.INT, weight_narrow_range=True, weight_bit_width=config.weight_bit_width, weight_scaling_per_output_channel=False)
def test_output_bit_weighted_by_ops(): model = QuantLinear(out_features=6, in_features=5, bias=False, input_quant=Int8ActPerTensorFloat, weight_bit_width_impl_type='parameter', return_quant_tensor=True) loss = QuantLayerOutputBitWidthWeightedByOps(model) out = model(torch.randn(2, 4, 5)) assert loss.tot_num_elements == 24 * 10 / MEGA assert loss.retrieve() == out.bit_width
def __init__(self): super().__init__() self.linear = QuantLinear( in_features=IN_CH, out_features=OUT_CH, bias=False, weight_quant=ShiftedUint8WeightPerTensorFloat, input_quant=ShiftedUint8ActPerTensorFloat, output_quant=ShiftedUint8ActPerTensorFloat, return_quant_tensor=False) self.linear.weight.data.uniform_(-0.01, 0.01)
def __init__(self): super().__init__() self.linear = QuantLinear( in_features=IN_CH, out_features=OUT_CH, bias=True, weight_quant=Int8WeightPerTensorFixedPoint, bias_quant=Int8BiasPerTensorFixedPointInternalScaling, input_quant=Int8ActPerTensorFixedPoint, output_quant=Int8ActPerTensorFixedPoint, return_quant_tensor=False) self.linear.weight.data.uniform_(-0.01, 0.01)
def make_qlinear(in_features, out_features, bias=True, no_quant=True, **kwargs) -> QuantConv2d: if no_quant: return QuantLinear(in_features=in_features, out_features=out_features, bias=bias, weight_quant=None, input_quant=None, bias_quant=None, output_quant=None, update_wqi=None, update_bqi=None, update_iqi=None, update_oqi=None) else: return QuantLinear(in_features=in_features, out_features=out_features, bias=bias, weight_bit_width=kwargs['bit_width'])
def __init__(self): super().__init__() self.linear = QuantLinear( in_features=IN_CH, out_features=OUT_CH, bias=True, weight_quant=Int8WeightPerTensorFloat, input_bit_width=7, output_bit_width=7, input_quant=ShiftedUint8ActPerTensorFloat, output_quant=ShiftedUint8ActPerTensorFloat, bias_quant=IntBiasExternalBitWidth, return_quant_tensor=False) self.linear.weight.data.uniform_(-0.01, 0.01)
def test_quant_linear(bias, bias_quant, out_features, in_features, w_bits, channel_scaling, i_bits): # required to generated quantized inputs, not part of the exported model to test quant_inp = QuantIdentity(bit_width=i_bits, return_quant_tensor=True) inp_tensor = quant_inp(torch.randn(1, in_features)) linear = QuantLinear(out_features=out_features, in_features=in_features, bias=bias, bias_quant=bias_quant, weight_bit_width=w_bits, weight_scaling_per_output_channel=channel_scaling) linear.eval() model = bo.export_finn_onnx(linear, input_t=inp_tensor, export_path='linear.onnx') model = ModelWrapper(model) model = model.transform(InferShapes()) # the quantized input tensor passed to FINN should be in integer form int_inp_array = inp_tensor.int(float_datatype=True).numpy() idict = {model.graph.input[0].name: int_inp_array} odict = oxe.execute_onnx(model, idict, True) produced = odict[model.graph.output[0].name] expected = linear(inp_tensor).detach().numpy() assert np.isclose(produced, expected, atol=1e-3).all()
def __init__(self, channels, first_stage_stride, bit_width, in_channels=3, num_classes=1000): super(MobileNet, self).__init__() init_block_channels = channels[0][0] self.features = Sequential() init_block = ConvBlock(in_channels=in_channels, out_channels=init_block_channels, kernel_size=3, stride=2, weight_bit_width=FIRST_LAYER_BIT_WIDTH, activation_scaling_per_channel=True, act_bit_width=bit_width) self.features.add_module('init_block', init_block) in_channels = init_block_channels for i, channels_per_stage in enumerate(channels[1:]): stage = Sequential() pw_activation_scaling_per_channel = i < len(channels[1:]) - 1 for j, out_channels in enumerate(channels_per_stage): stride = 2 if (j == 0) and ( (i != 0) or first_stage_stride) else 1 mod = DwsConvBlock(in_channels=in_channels, out_channels=out_channels, stride=stride, bit_width=bit_width, pw_activation_scaling_per_channel= pw_activation_scaling_per_channel) stage.add_module('unit{}'.format(j + 1), mod) in_channels = out_channels self.features.add_module('stage{}'.format(i + 1), stage) self.final_pool = QuantAvgPool2d(kernel_size=7, stride=1, bit_width=bit_width) self.output = QuantLinear(in_channels, num_classes, bias=True, bias_quant=IntBias, weight_quant=CommonIntWeightPerTensorQuant, weight_bit_width=bit_width)
def test_parameter_from_stats_update(): config.IGNORE_MISSING_KEYS = True linear = nn.Linear(10, 5, bias=False) q_linear = QuantLinear(10, 5, bias=False, weight_quant_type='binary', weight_scaling_impl_type='parameter_from_stats') l_max = linear.weight.abs().max() old_scale = q_linear.quant_weight_scale() old_ql_max = q_linear.weight.abs().max() q_linear.load_state_dict(linear.state_dict()) new_scale = q_linear.quant_weight_scale() new_ql_max = q_linear.weight.abs().max() assert old_scale == old_ql_max assert new_scale == l_max assert new_scale == new_ql_max
def test_module_init_scale_impl_type_override(self): mod = QuantLinear( out_features=OUTPUT_FEATURES, in_features=INPUT_FEATURES, bias=True, weight_scaling_impl_type='HE') assert mod.quant_weight_scale()
def __init__(self, channels, init_block_channels, final_block_channels, residuals, shortcuts, kernel_sizes, expansions, quant_type, bit_width, depthwise_bit_width, first_layer_bit_width, hard_tanh_threshold, dropout_rate, dropout_steps, weight_scaling_impl_type, compute_micronet_cost, input_bit_width=8, bn_eps=1e-3, in_channels=3, num_classes=1000): super(ProxylessNAS, self).__init__() self.compute_micronet_cost = compute_micronet_cost self.input_bit_width = torch.tensor(input_bit_width).float().cuda() self.num_classes = num_classes self.dropout_rate = dropout_rate self.dropout_steps = dropout_steps self.features = nn.Sequential() self.features.add_module( "init_block", ConvBlock(in_channels=in_channels, out_channels=init_block_channels, kernel_size=3, stride=2, padding=1, groups=1, bn_eps=bn_eps, act_scaling_per_channel=False, weight_scaling_impl_type=weight_scaling_impl_type, bias=False, quant_type=quant_type, act_bit_width=bit_width, weight_bit_width=first_layer_bit_width, compute_micronet_cost=compute_micronet_cost)) in_channels = init_block_channels shared_act = None for i, channels_per_stage in enumerate(channels): stage = nn.Sequential() residuals_per_stage = residuals[i] shortcuts_per_stage = shortcuts[i] kernel_sizes_per_stage = kernel_sizes[i] expansions_per_stage = expansions[i] for j, out_channels in enumerate(channels_per_stage): residual = (residuals_per_stage[j] == 1) shortcut = (shortcuts_per_stage[j] == 1) kernel_size = kernel_sizes_per_stage[j] expansion = expansions_per_stage[j] stride = 2 if (j == 0) and (i != 0) else 1 if not shortcut: shared_act = QuantHardTanh( bit_width=bit_width, quant_type=quant_type, scaling_per_channel=False, scaling_impl_type=ScalingImplType.PARAMETER, scaling_min_val=MIN_SCALING_VALUE, max_val=hard_tanh_threshold, min_val=-hard_tanh_threshold, restrict_scaling_type=RestrictValueType.LOG_FP, return_quant_tensor=True) stage.add_module( "unit{}".format(j + 1), ProxylessUnit( in_channels=in_channels, out_channels=out_channels, kernel_size=kernel_size, stride=stride, bn_eps=bn_eps, expansion=expansion, residual=residual, shortcut=shortcut, bit_width=bit_width, depthwise_bit_width=depthwise_bit_width, quant_type=quant_type, weight_scaling_impl_type=weight_scaling_impl_type, shared_act=shared_act, compute_micronet_cost=compute_micronet_cost)) in_channels = out_channels self.features.add_module("stage{}".format(i + 1), stage) self.features.add_module( "final_block", ConvBlock(in_channels=in_channels, out_channels=final_block_channels, kernel_size=1, stride=1, padding=0, groups=1, bn_eps=bn_eps, act_scaling_per_channel=False, quant_type=quant_type, act_bit_width=bit_width, weight_bit_width=bit_width, weight_scaling_impl_type=weight_scaling_impl_type, bias=False, compute_micronet_cost=compute_micronet_cost)) in_channels = final_block_channels self.final_pool = QuantAvgPool2d(kernel_size=7, stride=1, quant_type=quant_type, min_overall_bit_width=bit_width, max_overall_bit_width=bit_width) self.output = QuantLinear( in_features=in_channels, out_features=num_classes, bias=True, bias_quant_type=quant_type, compute_output_bit_width=quant_type == QuantType.INT, compute_output_scale=quant_type == QuantType.INT, weight_bit_width=bit_width, weight_quant_type=quant_type, weight_scaling_min_val=MIN_SCALING_VALUE, weight_scaling_per_output_channel=False, weight_scaling_stats_op=StatsOp.MAX, weight_narrow_range=True, weight_restrict_scaling_type=RestrictValueType.LOG_FP, weight_scaling_impl_type=weight_scaling_impl_type, return_quant_tensor=True) self._init_params()
def __init__(self): super(QuantLeNet, self).__init__() self.conv1 = QuantConv2d(1, 6, 5, weight_quant=None, input_quant=None, bias_quant=None, output_quant=None, update_wqi=None, update_bqi=None, update_iqi=None, update_oqi=None) self.relu1 = QuantReLU(input_quant=None, act_quant=None, output_quant=None, update_iqi=None, update_aqi=None) self.conv2 = QuantConv2d(6, 16, 5, weight_quant=None, input_quant=None, bias_quant=None, output_quant=None, update_wqi=None, update_bqi=None, update_iqi=None, update_oqi=None) self.relu2 = QuantReLU(input_quant=None, act_quant=None, output_quant=None, update_iqi=None, update_aqi=None) self.fc1 = QuantLinear(16 * 5 * 5, 120, bias=True, weight_quant=None, input_quant=None, bias_quant=None, output_quant=None, update_wqi=None, update_bqi=None, update_iqi=None, update_oqi=None) self.relu3 = QuantReLU(input_quant=None, act_quant=None, output_quant=None, update_iqi=None, update_aqi=None) self.fc2 = QuantLinear(120, 84, bias=True, weight_quant=None, input_quant=None, bias_quant=None, output_quant=None, update_wqi=None, update_bqi=None, update_iqi=None, update_oqi=None) self.relu4 = QuantReLU(input_quant=None, act_quant=None, output_quant=None, update_iqi=None, update_aqi=None) self.fc3 = QuantLinear(84, 10, bias=False, weight_quant=None, input_quant=None, bias_quant=None, output_quant=None, update_wqi=None, update_bqi=None, update_iqi=None, update_oqi=None)
def get_8_bits_quantized_lenet(): model = QuantLeNet() model.conv1 = QuantConv2d(1, 6, 5, weight_quant=LSQ_weight_quant_8bits, bias_quant=None, input_quant=None, output_quant=None, update_wqi=None, update_bqi=None, update_iqi=None, update_oqi=None) model.relu1 = QuantReLU(input_quant=None, act_quant=LSQ_input_quant_8bits, output_quant=None, update_iqi=None, update_aqi=None) model.conv2 = QuantConv2d(6, 16, 5, weight_quant=LSQ_weight_quant_8bits, bias_quant=None, input_quant=None, output_quant=None, update_wqi=None, update_bqi=None, update_iqi=None, update_oqi=None) model.relu2 = QuantReLU(input_quant=None, act_quant=LSQ_input_quant_8bits, output_quant=None, update_iqi=None, update_aqi=None) model.fc1 = QuantLinear(16 * 5 * 5, 120, bias=True, weight_quant=LSQ_weight_quant_8bits, bias_quant=None, input_quant=None, output_quant=None, update_wqi=None, update_bqi=None, update_iqi=None, update_oqi=None) model.relu3 = QuantReLU(input_quant=None, act_quant=LSQ_input_quant_8bits, output_quant=None, update_iqi=None, update_aqi=None) model.fc2 = QuantLinear(120, 84, bias=True, weight_quant=LSQ_weight_quant_8bits, bias_quant=None, input_quant=None, output_quant=None, update_wqi=None, update_bqi=None, update_iqi=None, update_oqi=None) model.relu4 = QuantReLU(input_quant=None, act_quant=LSQ_input_quant_8bits, output_quant=None, update_iqi=None, update_aqi=None) model.fc3 = QuantLinear(84, 10, bias=False, weight_quant=LSQ_weight_quant_8bits, bias_quant=None, input_quant=None, output_quant=None, update_wqi=None, update_bqi=None, update_iqi=None, update_oqi=None) return model