示例#1
0
def test_brevitas_act_export_qhardtanh_nonscaled(abits, narrow_range, max_val):
    def get_quant_type(bit_width):
        if bit_width is None:
            return QuantType.FP
        elif bit_width == 1:
            return QuantType.BINARY
        else:
            return QuantType.INT

    act_quant_type = get_quant_type(abits)
    min_val = -1.0
    ishape = (1, 10)
    b_act = QuantHardTanh(
        bit_width=abits,
        quant_type=act_quant_type,
        max_val=max_val,
        min_val=min_val,
        restrict_scaling_type=RestrictValueType.LOG_FP,
        scaling_impl_type=ScalingImplType.CONST,
        narrow_range=narrow_range,
    )
    bo.export_finn_onnx(b_act, ishape, export_onnx_path)
    model = ModelWrapper(export_onnx_path)
    model = model.transform(InferShapes())
    inp_tensor = np.random.uniform(low=min_val, high=max_val,
                                   size=ishape).astype(np.float32)
    idict = {model.graph.input[0].name: inp_tensor}
    odict = oxe.execute_onnx(model, idict, True)
    produced = odict[model.graph.output[0].name]
    inp_tensor = torch.from_numpy(inp_tensor).float()
    expected = b_act.forward(inp_tensor).detach().numpy()
    assert np.isclose(produced, expected, atol=1e-3).all()
    os.remove(export_onnx_path)
示例#2
0
 def thresholds(module: QuantHardTanh, extend_tensor_to_channels=True):
     bit_width = int(module.quant_act_bit_width().item())
     if bit_width != 1:
         if module.is_quant_act_narrow_range:
             # assuming narrow range, symmetric quantization around zero
             # when using narrow range, we represent one element less
             num_distinct_values = 2 ** bit_width - 1
         else:
             num_distinct_values = 2 ** bit_width
         num_thresholds = num_distinct_values - 1
         flat_scale = module.quant_act_scale().view(-1)
         num_scale_channels = flat_scale.shape[0]
         step = torch.abs(flat_scale)
         half_step = step / 2.0
         thresholds = torch.empty(num_scale_channels, num_thresholds)
         # compute the value of the smallest threshold, we'll neg-bias all
         # generated thresholds by this much
         min_threshold = - half_step - step * ((num_thresholds // 2) - 1)
         if not module.is_quant_act_narrow_range:
             min_threshold -= step
         for c in range(num_scale_channels):
             for t in range(num_thresholds):
                 thresholds[c][t] = min_threshold[c] + step[c] * t
         if extend_tensor_to_channels:
             output_channels = module._cached_inp.shape[1]
             final_shape = (output_channels, num_thresholds)
             if thresholds.shape != final_shape:
                 thresholds = thresholds.expand(final_shape)
         return thresholds
     else:
         thresholds = torch.empty([1, 1])
         thresholds[0] = 0
         return thresholds
示例#3
0
 def __init__(self, model_config):
     super(JetSubstructureNeqModel, self).__init__()
     self.model_config = model_config
     self.num_neurons = [model_config["input_length"]] + model_config["hidden_layers"] + [model_config["output_length"]]
     layer_list = []
     for i in range(1, len(self.num_neurons)):
         in_features = self.num_neurons[i-1]
         out_features = self.num_neurons[i]
         bn = nn.BatchNorm1d(out_features)
         if i == 1:
             bn_in = nn.BatchNorm1d(in_features)
             input_bias = ScalarBiasScale(scale=False, bias_init=-0.25)
             input_quant = QuantBrevitasActivation(QuantHardTanh(model_config["input_bitwidth"], max_val=1., narrow_range=False, quant_type=QuantType.INT, scaling_impl_type=ScalingImplType.PARAMETER), pre_transforms=[bn_in, input_bias])
             output_quant = QuantBrevitasActivation(QuantReLU(bit_width=model_config["hidden_bitwidth"], max_val=1.61, quant_type=QuantType.INT, scaling_impl_type=ScalingImplType.PARAMETER), pre_transforms=[bn])
             mask = RandomFixedSparsityMask2D(in_features, out_features, fan_in=model_config["input_fanin"])
             layer = SparseLinearNeq(in_features, out_features, input_quant=input_quant, output_quant=output_quant, sparse_linear_kws={'mask': mask})
             layer_list.append(layer)
         elif i == len(self.num_neurons)-1:
             output_bias_scale = ScalarBiasScale(bias_init=0.33)
             output_quant = QuantBrevitasActivation(QuantHardTanh(bit_width=model_config["output_bitwidth"], max_val=1.33, narrow_range=False, quant_type=QuantType.INT, scaling_impl_type=ScalingImplType.PARAMETER), pre_transforms=[bn], post_transforms=[output_bias_scale])
             mask = RandomFixedSparsityMask2D(in_features, out_features, fan_in=model_config["output_fanin"])
             layer = SparseLinearNeq(in_features, out_features, input_quant=layer_list[-1].output_quant, output_quant=output_quant, sparse_linear_kws={'mask': mask}, apply_input_quant=False)
             layer_list.append(layer)
         else:
             output_quant = QuantBrevitasActivation(QuantReLU(bit_width=model_config["hidden_bitwidth"], max_val=1.61, quant_type=QuantType.INT, scaling_impl_type=ScalingImplType.PARAMETER), pre_transforms=[bn])
             mask = RandomFixedSparsityMask2D(in_features, out_features, fan_in=model_config["hidden_fanin"])
             layer = SparseLinearNeq(in_features, out_features, input_quant=layer_list[-1].output_quant, output_quant=output_quant, sparse_linear_kws={'mask': mask}, apply_input_quant=False)
             layer_list.append(layer)
     self.module_list = nn.ModuleList(layer_list)
     self.is_verilog_inference = False
     self.latency = 1
     self.verilog_dir = None
     self.top_module_filename = None
     self.dut = None
     self.logfile = None
示例#4
0
 def quant_act_scale(module: QuantHardTanh):
     bit_width = int(module.quant_act_bit_width().item())
     quant_act_scale = module.quant_act_scale().type(torch.FloatTensor).detach()
     if bit_width != 1:
         return quant_act_scale
     else:
         assert quant_act_scale.view(-1).shape[0] == 1, "Unsupported BIPOLAR per channel scale"
         assert quant_act_scale.flatten().item() == 1.0, "Unsupported BIPOLAR scale != 1"
         return quant_act_scale * 2
示例#5
0
    def __init__(self,
                 num_classes=10,
                 weight_bit_width=None,
                 act_bit_width=None,
                 in_bit_width=None,
                 in_ch=3):
        super(CNV, self).__init__()

        weight_quant_type = get_quant_type(weight_bit_width)
        act_quant_type = get_quant_type(act_bit_width)
        in_quant_type = get_quant_type(in_bit_width)
        max_in_val = 1 - 2**(-7)  # for Q1.7 input format
        self.conv_features = ModuleList()
        self.linear_features = ModuleList()

        self.conv_features.append(
            QuantHardTanh(bit_width=in_bit_width,
                          quant_type=in_quant_type,
                          max_val=max_in_val,
                          restrict_scaling_type=RestrictValueType.POWER_OF_TWO,
                          scaling_impl_type=ScalingImplType.CONST))

        for out_ch, is_pool_enabled in CNV_OUT_CH_POOL:
            self.conv_features.append(
                get_quant_conv2d(in_ch=in_ch,
                                 out_ch=out_ch,
                                 bit_width=weight_bit_width,
                                 quant_type=weight_quant_type))
            in_ch = out_ch
            self.conv_features.append(BatchNorm2d(in_ch, eps=1e-4))
            self.conv_features.append(
                get_act_quant(act_bit_width, act_quant_type))
            if is_pool_enabled:
                self.conv_features.append(MaxPool2d(kernel_size=2))

        for in_features, out_features in INTERMEDIATE_FC_FEATURES:
            self.linear_features.append(
                get_quant_linear(
                    in_features=in_features,
                    out_features=out_features,
                    per_out_ch_scaling=INTERMEDIATE_FC_PER_OUT_CH_SCALING,
                    bit_width=weight_bit_width,
                    quant_type=weight_quant_type))
            self.linear_features.append(BatchNorm1d(out_features, eps=1e-4))
            self.linear_features.append(
                get_act_quant(act_bit_width, act_quant_type))

        self.linear_features.append(
            get_quant_linear(in_features=LAST_FC_IN_FEATURES,
                             out_features=num_classes,
                             per_out_ch_scaling=LAST_FC_PER_OUT_CH_SCALING,
                             bit_width=weight_bit_width,
                             quant_type=weight_quant_type))
        self.linear_features.append(TensorNorm())

        for m in self.modules():
            if isinstance(m, QuantConv2d) or isinstance(m, QuantLinear):
                torch.nn.init.uniform_(m.weight.data, -1, 1)
示例#6
0
def get_act_quant(act_bit_width, act_quant_type):
    return QuantHardTanh(quant_type=act_quant_type,
                         bit_width=act_bit_width,
                         bit_width_impl_type=BIT_WIDTH_IMPL_TYPE,
                         min_val=HARD_TANH_MIN,
                         max_val=HARD_TANH_MAX,
                         scaling_impl_type=ACT_SCALING_IMPL_TYPE,
                         restrict_scaling_type=SCALING_VALUE_TYPE,
                         scaling_per_channel=ACT_PER_OUT_CH_SCALING,
                         narrow_range=NARROW_RANGE_ENABLED)
示例#7
0
 def quant_act_bias(module: QuantHardTanh):
     bit_width = int(module.quant_act_bit_width().item())
     if bit_width == 1:
         return torch.tensor(-0.5).type(torch.FloatTensor)
     else:
         if module.is_quant_act_narrow_range:
             min_non_scaled_val = - (2 ** (bit_width - 1) - 1)
         else:
             min_non_scaled_val = - 2 ** (bit_width - 1)
         return torch.tensor(min_non_scaled_val).type(torch.FloatTensor)
示例#8
0
 def quant_type(
         module: QuantHardTanh,
         supported_int_bit_width_range: Tuple[int,...] = (2, 33)):
     bit_width = int(module.quant_act_bit_width().item())
     if bit_width == 1:
         return "BIPOLAR"
     elif bit_width in range(*supported_int_bit_width_range):
         # note: even though this particular config is intx (signed)
         # quantization, we set the export mode for MultiThreshold as
         # UINTX, since the signed bias is added as a separate node
         return f"UINT{bit_width}"
     else:
         raise RuntimeError(f"Unsupported input bit width {bit_width} for export")
示例#9
0
def get_act_quant(act_bit_width, act_quant_type):
    if act_quant_type == QuantType.INT:
        act_scaling_impl_type = ScalingImplType.PARAMETER
    else:
        act_scaling_impl_type = ScalingImplType.CONST
    return QuantHardTanh(quant_type=act_quant_type,
                         bit_width=act_bit_width,
                         bit_width_impl_type=BIT_WIDTH_IMPL_TYPE,
                         min_val=HARD_TANH_MIN,
                         max_val=HARD_TANH_MAX,
                         scaling_impl_type=act_scaling_impl_type,
                         restrict_scaling_type=RestrictValueType.LOG_FP,
                         scaling_per_channel=ACT_PER_OUT_CH_SCALING,
                         narrow_range=NARROW_RANGE_ENABLED)
    def __init__(self,
                 channels,
                 init_block_channels,
                 final_block_channels,
                 residuals,
                 shortcuts,
                 kernel_sizes,
                 expansions,
                 quant_type,
                 bit_width,
                 depthwise_bit_width,
                 first_layer_bit_width,
                 hard_tanh_threshold,
                 dropout_rate,
                 dropout_steps,
                 weight_scaling_impl_type,
                 compute_micronet_cost,
                 input_bit_width=8,
                 bn_eps=1e-3,
                 in_channels=3,
                 num_classes=1000):
        super(ProxylessNAS, self).__init__()
        self.compute_micronet_cost = compute_micronet_cost
        self.input_bit_width = torch.tensor(input_bit_width).float().cuda()
        self.num_classes = num_classes
        self.dropout_rate = dropout_rate
        self.dropout_steps = dropout_steps

        self.features = nn.Sequential()
        self.features.add_module(
            "init_block",
            ConvBlock(in_channels=in_channels,
                      out_channels=init_block_channels,
                      kernel_size=3,
                      stride=2,
                      padding=1,
                      groups=1,
                      bn_eps=bn_eps,
                      act_scaling_per_channel=False,
                      weight_scaling_impl_type=weight_scaling_impl_type,
                      bias=False,
                      quant_type=quant_type,
                      act_bit_width=bit_width,
                      weight_bit_width=first_layer_bit_width,
                      compute_micronet_cost=compute_micronet_cost))
        in_channels = init_block_channels
        shared_act = None
        for i, channels_per_stage in enumerate(channels):
            stage = nn.Sequential()
            residuals_per_stage = residuals[i]
            shortcuts_per_stage = shortcuts[i]
            kernel_sizes_per_stage = kernel_sizes[i]
            expansions_per_stage = expansions[i]
            for j, out_channels in enumerate(channels_per_stage):
                residual = (residuals_per_stage[j] == 1)
                shortcut = (shortcuts_per_stage[j] == 1)
                kernel_size = kernel_sizes_per_stage[j]
                expansion = expansions_per_stage[j]
                stride = 2 if (j == 0) and (i != 0) else 1

                if not shortcut:
                    shared_act = QuantHardTanh(
                        bit_width=bit_width,
                        quant_type=quant_type,
                        scaling_per_channel=False,
                        scaling_impl_type=ScalingImplType.PARAMETER,
                        scaling_min_val=MIN_SCALING_VALUE,
                        max_val=hard_tanh_threshold,
                        min_val=-hard_tanh_threshold,
                        restrict_scaling_type=RestrictValueType.LOG_FP,
                        return_quant_tensor=True)

                stage.add_module(
                    "unit{}".format(j + 1),
                    ProxylessUnit(
                        in_channels=in_channels,
                        out_channels=out_channels,
                        kernel_size=kernel_size,
                        stride=stride,
                        bn_eps=bn_eps,
                        expansion=expansion,
                        residual=residual,
                        shortcut=shortcut,
                        bit_width=bit_width,
                        depthwise_bit_width=depthwise_bit_width,
                        quant_type=quant_type,
                        weight_scaling_impl_type=weight_scaling_impl_type,
                        shared_act=shared_act,
                        compute_micronet_cost=compute_micronet_cost))
                in_channels = out_channels

            self.features.add_module("stage{}".format(i + 1), stage)
        self.features.add_module(
            "final_block",
            ConvBlock(in_channels=in_channels,
                      out_channels=final_block_channels,
                      kernel_size=1,
                      stride=1,
                      padding=0,
                      groups=1,
                      bn_eps=bn_eps,
                      act_scaling_per_channel=False,
                      quant_type=quant_type,
                      act_bit_width=bit_width,
                      weight_bit_width=bit_width,
                      weight_scaling_impl_type=weight_scaling_impl_type,
                      bias=False,
                      compute_micronet_cost=compute_micronet_cost))
        in_channels = final_block_channels
        self.final_pool = QuantAvgPool2d(kernel_size=7,
                                         stride=1,
                                         quant_type=quant_type,
                                         min_overall_bit_width=bit_width,
                                         max_overall_bit_width=bit_width)

        self.output = QuantLinear(
            in_features=in_channels,
            out_features=num_classes,
            bias=True,
            bias_quant_type=quant_type,
            compute_output_bit_width=quant_type == QuantType.INT,
            compute_output_scale=quant_type == QuantType.INT,
            weight_bit_width=bit_width,
            weight_quant_type=quant_type,
            weight_scaling_min_val=MIN_SCALING_VALUE,
            weight_scaling_per_output_channel=False,
            weight_scaling_stats_op=StatsOp.MAX,
            weight_narrow_range=True,
            weight_restrict_scaling_type=RestrictValueType.LOG_FP,
            weight_scaling_impl_type=weight_scaling_impl_type,
            return_quant_tensor=True)

        self._init_params()
示例#11
0
 def quant_type(module: QuantHardTanh):
     return finn_datatype(module.quant_act_bit_width(),
                          module.is_quant_act_signed)
def test_brevitas_act_export_qhardtanh_scaled(abits, narrow_range, min_val,
                                              max_val, scaling_impl_type):
    def get_quant_type(bit_width):
        if bit_width is None:
            return QuantType.FP
        elif bit_width == 1:
            return QuantType.BINARY
        else:
            return QuantType.INT

    act_quant_type = get_quant_type(abits)
    ishape = (1, 15)
    b_act = QuantHardTanh(
        bit_width=abits,
        quant_type=act_quant_type,
        max_val=max_val,
        min_val=min_val,
        restrict_scaling_type=RestrictValueType.LOG_FP,
        scaling_impl_type=scaling_impl_type,
        narrow_range=narrow_range,
    )
    if scaling_impl_type == ScalingImplType.PARAMETER:
        checkpoint = {
            "act_quant_proxy.fused_activation_quant_proxy.\
tensor_quant.scaling_impl.learned_value":
            torch.tensor(0.49).type(torch.FloatTensor)
        }
        b_act.load_state_dict(checkpoint)

    bo.export_finn_onnx(b_act, ishape, export_onnx_path)
    model = ModelWrapper(export_onnx_path)
    model = model.transform(InferShapes())
    inp_tensor = np.random.uniform(low=min_val, high=max_val,
                                   size=ishape).astype(np.float32)
    idict = {model.graph.input[0].name: inp_tensor}
    odict = oxe.execute_onnx(model, idict, True)
    produced = odict[model.graph.output[0].name]
    inp_tensor = torch.from_numpy(inp_tensor).float()
    b_act.eval()
    expected = b_act.forward(inp_tensor).detach().numpy()
    if not np.isclose(produced, expected, atol=1e-3).all():
        print(
            "abits: ",
            abits,
            " | narrow_range: ",
            narrow_range,
            " | min_val: ",
            min_val,
            " | max_val: ",
            max_val,
        )
        print("layer scale: ",
              b_act.quant_act_scale().type(torch.FloatTensor).detach())
        print("export scale: ", b_act.export_act_scale)
        if abits < 5:
            print(
                "thres:",
                ", ".join(["{:8.4f}".format(x)
                           for x in b_act.export_thres[0]]),
            )
        print("input:",
              ", ".join(["{:8.4f}".format(x) for x in inp_tensor[0]]))
        print("prod :", ", ".join(["{:8.4f}".format(x) for x in produced[0]]))
        print("expec:", ", ".join(["{:8.4f}".format(x) for x in expected[0]]))

    assert np.isclose(produced, expected, atol=1e-3).all()
    os.remove(export_onnx_path)
示例#13
0
 def test_module_init_min_max(self):
     mod = QuantHardTanh(min_val=-1.0, max_val=1.0)