def test_brevitas_act_export_qhardtanh_nonscaled(abits, narrow_range, max_val): def get_quant_type(bit_width): if bit_width is None: return QuantType.FP elif bit_width == 1: return QuantType.BINARY else: return QuantType.INT act_quant_type = get_quant_type(abits) min_val = -1.0 ishape = (1, 10) b_act = QuantHardTanh( bit_width=abits, quant_type=act_quant_type, max_val=max_val, min_val=min_val, restrict_scaling_type=RestrictValueType.LOG_FP, scaling_impl_type=ScalingImplType.CONST, narrow_range=narrow_range, ) bo.export_finn_onnx(b_act, ishape, export_onnx_path) model = ModelWrapper(export_onnx_path) model = model.transform(InferShapes()) inp_tensor = np.random.uniform(low=min_val, high=max_val, size=ishape).astype(np.float32) idict = {model.graph.input[0].name: inp_tensor} odict = oxe.execute_onnx(model, idict, True) produced = odict[model.graph.output[0].name] inp_tensor = torch.from_numpy(inp_tensor).float() expected = b_act.forward(inp_tensor).detach().numpy() assert np.isclose(produced, expected, atol=1e-3).all() os.remove(export_onnx_path)
def thresholds(module: QuantHardTanh, extend_tensor_to_channels=True): bit_width = int(module.quant_act_bit_width().item()) if bit_width != 1: if module.is_quant_act_narrow_range: # assuming narrow range, symmetric quantization around zero # when using narrow range, we represent one element less num_distinct_values = 2 ** bit_width - 1 else: num_distinct_values = 2 ** bit_width num_thresholds = num_distinct_values - 1 flat_scale = module.quant_act_scale().view(-1) num_scale_channels = flat_scale.shape[0] step = torch.abs(flat_scale) half_step = step / 2.0 thresholds = torch.empty(num_scale_channels, num_thresholds) # compute the value of the smallest threshold, we'll neg-bias all # generated thresholds by this much min_threshold = - half_step - step * ((num_thresholds // 2) - 1) if not module.is_quant_act_narrow_range: min_threshold -= step for c in range(num_scale_channels): for t in range(num_thresholds): thresholds[c][t] = min_threshold[c] + step[c] * t if extend_tensor_to_channels: output_channels = module._cached_inp.shape[1] final_shape = (output_channels, num_thresholds) if thresholds.shape != final_shape: thresholds = thresholds.expand(final_shape) return thresholds else: thresholds = torch.empty([1, 1]) thresholds[0] = 0 return thresholds
def __init__(self, model_config): super(JetSubstructureNeqModel, self).__init__() self.model_config = model_config self.num_neurons = [model_config["input_length"]] + model_config["hidden_layers"] + [model_config["output_length"]] layer_list = [] for i in range(1, len(self.num_neurons)): in_features = self.num_neurons[i-1] out_features = self.num_neurons[i] bn = nn.BatchNorm1d(out_features) if i == 1: bn_in = nn.BatchNorm1d(in_features) input_bias = ScalarBiasScale(scale=False, bias_init=-0.25) input_quant = QuantBrevitasActivation(QuantHardTanh(model_config["input_bitwidth"], max_val=1., narrow_range=False, quant_type=QuantType.INT, scaling_impl_type=ScalingImplType.PARAMETER), pre_transforms=[bn_in, input_bias]) output_quant = QuantBrevitasActivation(QuantReLU(bit_width=model_config["hidden_bitwidth"], max_val=1.61, quant_type=QuantType.INT, scaling_impl_type=ScalingImplType.PARAMETER), pre_transforms=[bn]) mask = RandomFixedSparsityMask2D(in_features, out_features, fan_in=model_config["input_fanin"]) layer = SparseLinearNeq(in_features, out_features, input_quant=input_quant, output_quant=output_quant, sparse_linear_kws={'mask': mask}) layer_list.append(layer) elif i == len(self.num_neurons)-1: output_bias_scale = ScalarBiasScale(bias_init=0.33) output_quant = QuantBrevitasActivation(QuantHardTanh(bit_width=model_config["output_bitwidth"], max_val=1.33, narrow_range=False, quant_type=QuantType.INT, scaling_impl_type=ScalingImplType.PARAMETER), pre_transforms=[bn], post_transforms=[output_bias_scale]) mask = RandomFixedSparsityMask2D(in_features, out_features, fan_in=model_config["output_fanin"]) layer = SparseLinearNeq(in_features, out_features, input_quant=layer_list[-1].output_quant, output_quant=output_quant, sparse_linear_kws={'mask': mask}, apply_input_quant=False) layer_list.append(layer) else: output_quant = QuantBrevitasActivation(QuantReLU(bit_width=model_config["hidden_bitwidth"], max_val=1.61, quant_type=QuantType.INT, scaling_impl_type=ScalingImplType.PARAMETER), pre_transforms=[bn]) mask = RandomFixedSparsityMask2D(in_features, out_features, fan_in=model_config["hidden_fanin"]) layer = SparseLinearNeq(in_features, out_features, input_quant=layer_list[-1].output_quant, output_quant=output_quant, sparse_linear_kws={'mask': mask}, apply_input_quant=False) layer_list.append(layer) self.module_list = nn.ModuleList(layer_list) self.is_verilog_inference = False self.latency = 1 self.verilog_dir = None self.top_module_filename = None self.dut = None self.logfile = None
def quant_act_scale(module: QuantHardTanh): bit_width = int(module.quant_act_bit_width().item()) quant_act_scale = module.quant_act_scale().type(torch.FloatTensor).detach() if bit_width != 1: return quant_act_scale else: assert quant_act_scale.view(-1).shape[0] == 1, "Unsupported BIPOLAR per channel scale" assert quant_act_scale.flatten().item() == 1.0, "Unsupported BIPOLAR scale != 1" return quant_act_scale * 2
def __init__(self, num_classes=10, weight_bit_width=None, act_bit_width=None, in_bit_width=None, in_ch=3): super(CNV, self).__init__() weight_quant_type = get_quant_type(weight_bit_width) act_quant_type = get_quant_type(act_bit_width) in_quant_type = get_quant_type(in_bit_width) max_in_val = 1 - 2**(-7) # for Q1.7 input format self.conv_features = ModuleList() self.linear_features = ModuleList() self.conv_features.append( QuantHardTanh(bit_width=in_bit_width, quant_type=in_quant_type, max_val=max_in_val, restrict_scaling_type=RestrictValueType.POWER_OF_TWO, scaling_impl_type=ScalingImplType.CONST)) for out_ch, is_pool_enabled in CNV_OUT_CH_POOL: self.conv_features.append( get_quant_conv2d(in_ch=in_ch, out_ch=out_ch, bit_width=weight_bit_width, quant_type=weight_quant_type)) in_ch = out_ch self.conv_features.append(BatchNorm2d(in_ch, eps=1e-4)) self.conv_features.append( get_act_quant(act_bit_width, act_quant_type)) if is_pool_enabled: self.conv_features.append(MaxPool2d(kernel_size=2)) for in_features, out_features in INTERMEDIATE_FC_FEATURES: self.linear_features.append( get_quant_linear( in_features=in_features, out_features=out_features, per_out_ch_scaling=INTERMEDIATE_FC_PER_OUT_CH_SCALING, bit_width=weight_bit_width, quant_type=weight_quant_type)) self.linear_features.append(BatchNorm1d(out_features, eps=1e-4)) self.linear_features.append( get_act_quant(act_bit_width, act_quant_type)) self.linear_features.append( get_quant_linear(in_features=LAST_FC_IN_FEATURES, out_features=num_classes, per_out_ch_scaling=LAST_FC_PER_OUT_CH_SCALING, bit_width=weight_bit_width, quant_type=weight_quant_type)) self.linear_features.append(TensorNorm()) for m in self.modules(): if isinstance(m, QuantConv2d) or isinstance(m, QuantLinear): torch.nn.init.uniform_(m.weight.data, -1, 1)
def get_act_quant(act_bit_width, act_quant_type): return QuantHardTanh(quant_type=act_quant_type, bit_width=act_bit_width, bit_width_impl_type=BIT_WIDTH_IMPL_TYPE, min_val=HARD_TANH_MIN, max_val=HARD_TANH_MAX, scaling_impl_type=ACT_SCALING_IMPL_TYPE, restrict_scaling_type=SCALING_VALUE_TYPE, scaling_per_channel=ACT_PER_OUT_CH_SCALING, narrow_range=NARROW_RANGE_ENABLED)
def quant_act_bias(module: QuantHardTanh): bit_width = int(module.quant_act_bit_width().item()) if bit_width == 1: return torch.tensor(-0.5).type(torch.FloatTensor) else: if module.is_quant_act_narrow_range: min_non_scaled_val = - (2 ** (bit_width - 1) - 1) else: min_non_scaled_val = - 2 ** (bit_width - 1) return torch.tensor(min_non_scaled_val).type(torch.FloatTensor)
def quant_type( module: QuantHardTanh, supported_int_bit_width_range: Tuple[int,...] = (2, 33)): bit_width = int(module.quant_act_bit_width().item()) if bit_width == 1: return "BIPOLAR" elif bit_width in range(*supported_int_bit_width_range): # note: even though this particular config is intx (signed) # quantization, we set the export mode for MultiThreshold as # UINTX, since the signed bias is added as a separate node return f"UINT{bit_width}" else: raise RuntimeError(f"Unsupported input bit width {bit_width} for export")
def get_act_quant(act_bit_width, act_quant_type): if act_quant_type == QuantType.INT: act_scaling_impl_type = ScalingImplType.PARAMETER else: act_scaling_impl_type = ScalingImplType.CONST return QuantHardTanh(quant_type=act_quant_type, bit_width=act_bit_width, bit_width_impl_type=BIT_WIDTH_IMPL_TYPE, min_val=HARD_TANH_MIN, max_val=HARD_TANH_MAX, scaling_impl_type=act_scaling_impl_type, restrict_scaling_type=RestrictValueType.LOG_FP, scaling_per_channel=ACT_PER_OUT_CH_SCALING, narrow_range=NARROW_RANGE_ENABLED)
def __init__(self, channels, init_block_channels, final_block_channels, residuals, shortcuts, kernel_sizes, expansions, quant_type, bit_width, depthwise_bit_width, first_layer_bit_width, hard_tanh_threshold, dropout_rate, dropout_steps, weight_scaling_impl_type, compute_micronet_cost, input_bit_width=8, bn_eps=1e-3, in_channels=3, num_classes=1000): super(ProxylessNAS, self).__init__() self.compute_micronet_cost = compute_micronet_cost self.input_bit_width = torch.tensor(input_bit_width).float().cuda() self.num_classes = num_classes self.dropout_rate = dropout_rate self.dropout_steps = dropout_steps self.features = nn.Sequential() self.features.add_module( "init_block", ConvBlock(in_channels=in_channels, out_channels=init_block_channels, kernel_size=3, stride=2, padding=1, groups=1, bn_eps=bn_eps, act_scaling_per_channel=False, weight_scaling_impl_type=weight_scaling_impl_type, bias=False, quant_type=quant_type, act_bit_width=bit_width, weight_bit_width=first_layer_bit_width, compute_micronet_cost=compute_micronet_cost)) in_channels = init_block_channels shared_act = None for i, channels_per_stage in enumerate(channels): stage = nn.Sequential() residuals_per_stage = residuals[i] shortcuts_per_stage = shortcuts[i] kernel_sizes_per_stage = kernel_sizes[i] expansions_per_stage = expansions[i] for j, out_channels in enumerate(channels_per_stage): residual = (residuals_per_stage[j] == 1) shortcut = (shortcuts_per_stage[j] == 1) kernel_size = kernel_sizes_per_stage[j] expansion = expansions_per_stage[j] stride = 2 if (j == 0) and (i != 0) else 1 if not shortcut: shared_act = QuantHardTanh( bit_width=bit_width, quant_type=quant_type, scaling_per_channel=False, scaling_impl_type=ScalingImplType.PARAMETER, scaling_min_val=MIN_SCALING_VALUE, max_val=hard_tanh_threshold, min_val=-hard_tanh_threshold, restrict_scaling_type=RestrictValueType.LOG_FP, return_quant_tensor=True) stage.add_module( "unit{}".format(j + 1), ProxylessUnit( in_channels=in_channels, out_channels=out_channels, kernel_size=kernel_size, stride=stride, bn_eps=bn_eps, expansion=expansion, residual=residual, shortcut=shortcut, bit_width=bit_width, depthwise_bit_width=depthwise_bit_width, quant_type=quant_type, weight_scaling_impl_type=weight_scaling_impl_type, shared_act=shared_act, compute_micronet_cost=compute_micronet_cost)) in_channels = out_channels self.features.add_module("stage{}".format(i + 1), stage) self.features.add_module( "final_block", ConvBlock(in_channels=in_channels, out_channels=final_block_channels, kernel_size=1, stride=1, padding=0, groups=1, bn_eps=bn_eps, act_scaling_per_channel=False, quant_type=quant_type, act_bit_width=bit_width, weight_bit_width=bit_width, weight_scaling_impl_type=weight_scaling_impl_type, bias=False, compute_micronet_cost=compute_micronet_cost)) in_channels = final_block_channels self.final_pool = QuantAvgPool2d(kernel_size=7, stride=1, quant_type=quant_type, min_overall_bit_width=bit_width, max_overall_bit_width=bit_width) self.output = QuantLinear( in_features=in_channels, out_features=num_classes, bias=True, bias_quant_type=quant_type, compute_output_bit_width=quant_type == QuantType.INT, compute_output_scale=quant_type == QuantType.INT, weight_bit_width=bit_width, weight_quant_type=quant_type, weight_scaling_min_val=MIN_SCALING_VALUE, weight_scaling_per_output_channel=False, weight_scaling_stats_op=StatsOp.MAX, weight_narrow_range=True, weight_restrict_scaling_type=RestrictValueType.LOG_FP, weight_scaling_impl_type=weight_scaling_impl_type, return_quant_tensor=True) self._init_params()
def quant_type(module: QuantHardTanh): return finn_datatype(module.quant_act_bit_width(), module.is_quant_act_signed)
def test_brevitas_act_export_qhardtanh_scaled(abits, narrow_range, min_val, max_val, scaling_impl_type): def get_quant_type(bit_width): if bit_width is None: return QuantType.FP elif bit_width == 1: return QuantType.BINARY else: return QuantType.INT act_quant_type = get_quant_type(abits) ishape = (1, 15) b_act = QuantHardTanh( bit_width=abits, quant_type=act_quant_type, max_val=max_val, min_val=min_val, restrict_scaling_type=RestrictValueType.LOG_FP, scaling_impl_type=scaling_impl_type, narrow_range=narrow_range, ) if scaling_impl_type == ScalingImplType.PARAMETER: checkpoint = { "act_quant_proxy.fused_activation_quant_proxy.\ tensor_quant.scaling_impl.learned_value": torch.tensor(0.49).type(torch.FloatTensor) } b_act.load_state_dict(checkpoint) bo.export_finn_onnx(b_act, ishape, export_onnx_path) model = ModelWrapper(export_onnx_path) model = model.transform(InferShapes()) inp_tensor = np.random.uniform(low=min_val, high=max_val, size=ishape).astype(np.float32) idict = {model.graph.input[0].name: inp_tensor} odict = oxe.execute_onnx(model, idict, True) produced = odict[model.graph.output[0].name] inp_tensor = torch.from_numpy(inp_tensor).float() b_act.eval() expected = b_act.forward(inp_tensor).detach().numpy() if not np.isclose(produced, expected, atol=1e-3).all(): print( "abits: ", abits, " | narrow_range: ", narrow_range, " | min_val: ", min_val, " | max_val: ", max_val, ) print("layer scale: ", b_act.quant_act_scale().type(torch.FloatTensor).detach()) print("export scale: ", b_act.export_act_scale) if abits < 5: print( "thres:", ", ".join(["{:8.4f}".format(x) for x in b_act.export_thres[0]]), ) print("input:", ", ".join(["{:8.4f}".format(x) for x in inp_tensor[0]])) print("prod :", ", ".join(["{:8.4f}".format(x) for x in produced[0]])) print("expec:", ", ".join(["{:8.4f}".format(x) for x in expected[0]])) assert np.isclose(produced, expected, atol=1e-3).all() os.remove(export_onnx_path)
def test_module_init_min_max(self): mod = QuantHardTanh(min_val=-1.0, max_val=1.0)