def __init__(self, in_features: int, out_features: int, bias: bool, weight_quant: Union[ WeightQuantProxyProtocol, Type[Injector]] = Int8WeightPerTensorFloat, bias_quant: Union[BiasQuantProxyProtocol, Type[Injector]] = FloatBias, input_quant: Union[ActQuantProxyProtocol, Type[Injector]] = None, output_quant: Union[ActQuantProxyProtocol, Type[Injector]] = None, return_quant_tensor: bool = False, **kwargs) -> None: Linear.__init__(self, in_features, out_features, bias) QuantWBIOL.__init__(self, weight=self.weight, bias=self.bias, weight_quant=weight_quant, bias_quant=bias_quant, input_quant=input_quant, output_quant=output_quant, return_quant_tensor=return_quant_tensor, **kwargs)
def __init__( self, in_features: int, out_features: int, bias: bool = True, rpu_config: Optional[RPUConfigAlias] = None, realistic_read_write: bool = False, weight_scaling_omega: Optional[float] = None, ): # Call super() after tile creation, including ``reset_parameters``. Linear.__init__(self, in_features, out_features, bias=bias) # Create tiles if rpu_config is None: rpu_config = SingleRPUConfig() AnalogModuleBase.__init__( self, in_features, out_features, bias, realistic_read_write, rpu_config.mapping ) if self.analog_bias: raise ModuleError("AnalogLinearMapped only supports digital bias.") # More than one tile may need to be created. If so, divide # weight matrix into equal pieces along input dimension with # as many tiles as needed max_input_size = rpu_config.mapping.max_input_size max_output_size = rpu_config.mapping.max_output_size self.in_sizes = self.get_split_sizes(in_features, max_input_size) self.out_sizes = self.get_split_sizes(out_features, max_output_size) self.analog_tile_array = [] for i, in_tile_size in enumerate(self.in_sizes): in_tiles = [] for j, out_tile_size in enumerate(self.out_sizes): tile = rpu_config.tile_class(out_tile_size, in_tile_size, rpu_config, bias=self.analog_bias) self.register_analog_tile(tile, name=f"{i}_{j}") in_tiles.append(tile) self.analog_tile_array.append(in_tiles) # Set weights from the reset_parameters self.set_weights(self.weight, self.bias, remap_weights=True, weight_scaling_omega=weight_scaling_omega) # Unregister weight/bias as a parameter but keep for sync self.unregister_parameter('weight') if self.analog_bias: self.unregister_parameter('bias')
def __init__( self, in_features: int, out_features: int, bias: bool, weight_quant: Optional[WeightQuantType] = Int8WeightPerTensorFloat, bias_quant: Optional[BiasQuantType] = None, input_quant: Optional[ActQuantType] = None, output_quant: Optional[ActQuantType] = None, return_quant_tensor: bool = False, **kwargs) -> None: Linear.__init__(self, in_features, out_features, bias) QuantWBIOL.__init__(self, weight_quant=weight_quant, bias_quant=bias_quant, input_quant=input_quant, output_quant=output_quant, return_quant_tensor=return_quant_tensor, **kwargs)
def __init__( self, in_features: int, out_features: int, bias: bool = True, rpu_config: Optional[RPUConfigAlias] = None, realistic_read_write: bool = False, weight_scaling_omega: Optional[float] = None, ): # Call super() after tile creation, including ``reset_parameters``. Linear.__init__(self, in_features, out_features, bias=bias) # Create tile if rpu_config is None: rpu_config = SingleRPUConfig() AnalogModuleBase.__init__( self, in_features, out_features, bias, realistic_read_write, weight_scaling_omega, rpu_config.mapping ) self.analog_tile = self._setup_tile(rpu_config) # Register tile self.register_analog_tile(self.analog_tile) # Set weights from the reset_parameters call self.set_weights(self.weight, self.bias) # Unregister weight/bias as a parameter but keep it as a # field (needed for syncing still) self.unregister_parameter('weight') if self.analog_bias: self.unregister_parameter('bias')
def __init__(self, in_features, out_features, bias=True, cast_func=void_cast_func, n_train_sample=1): BitCenterLayer.__init__(self, fp_functional=F.linear, lp_functional=bit_center_linear, bias=bias, cast_func=cast_func, n_train_sample=n_train_sample) Linear.__init__(self, in_features=in_features, out_features=out_features, bias=bias) # weight_delta is the delta tensor in the algorithm while weight_lp is the cached # lp version of weight offset self.setup_bit_center_vars() # make sure the variables are on gpu as fp16 is only supported on gpu self.cuda() self.reset_parameters_bit_center() # register backward hook to update gradient caches for output grad self.register_backward_hook(self.update_grad_output_cache)
def __init__(self, in_features: int, out_features: int, bias: bool, bias_quant_type: QuantType = QuantType.FP, bias_narrow_range: bool = False, bias_bit_width: int = None, weight_quant_override: WeightQuantProxy = None, weight_quant_type: QuantType = QuantType.FP, weight_narrow_range: bool = False, weight_bit_width_impl_override: Union[BitWidthParameter, BitWidthConst] = None, weight_bit_width_impl_type: BitWidthImplType = BitWidthImplType.CONST, weight_restrict_bit_width_type: RestrictValueType = RestrictValueType.INT, weight_bit_width: int = 32, weight_min_overall_bit_width: Optional[int] = 2, weight_max_overall_bit_width: Optional[int] = None, weight_scaling_override: Optional[Module] = None, weight_scaling_impl_type: ScalingImplType = ScalingImplType.STATS, weight_scaling_const: Optional[float] = None, weight_scaling_stats_op: StatsOp = StatsOp.MAX, weight_scaling_per_output_channel: bool = False, weight_scaling_min_val: float = SCALING_MIN_VAL, weight_ternary_threshold: float = 0.5, weight_restrict_scaling_type: RestrictValueType = RestrictValueType.LOG_FP, weight_scaling_stats_sigma: float = 3.0, weight_override_pretrained_bit_width: bool = False, compute_output_scale: bool = False, compute_output_bit_width: bool = False, return_quant_tensor: bool = False) -> None: QuantLayer.__init__(self, compute_output_scale=compute_output_scale, compute_output_bit_width=compute_output_bit_width, return_quant_tensor=return_quant_tensor) Linear.__init__(self, in_features=in_features, out_features=out_features, bias=bias) if weight_quant_type == QuantType.FP and compute_output_bit_width: raise Exception("Computing output bit width requires enabling quantization") if bias_quant_type != QuantType.FP and not (compute_output_scale and compute_output_bit_width): raise Exception("Quantizing bias requires to compute output scale and output bit width") self.per_elem_ops = 2 * in_features self.weight_reg = WeightReg() if weight_quant_override is not None: self.weight_quant = weight_quant_override self.weight_quant.add_tracked_tensor(self.weight) else: weight_scaling_stats_input_concat_dim = 1 if weight_scaling_per_output_channel: weight_stats_input_view_shape_impl = StatsInputViewShapeImpl.OVER_OUTPUT_CHANNELS weight_scaling_shape = (self.out_features, 1) weight_scaling_stats_reduce_dim = 1 else: weight_stats_input_view_shape_impl = StatsInputViewShapeImpl.OVER_TENSOR weight_scaling_shape = SCALING_SCALAR_SHAPE weight_scaling_stats_reduce_dim = None self.weight_quant = WeightQuantProxy(bit_width=weight_bit_width, quant_type=weight_quant_type, narrow_range=weight_narrow_range, scaling_override=weight_scaling_override, restrict_scaling_type=weight_restrict_scaling_type, scaling_const=weight_scaling_const, scaling_stats_op=weight_scaling_stats_op, scaling_impl_type=weight_scaling_impl_type, scaling_stats_reduce_dim=weight_scaling_stats_reduce_dim, scaling_shape=weight_scaling_shape, bit_width_impl_type=weight_bit_width_impl_type, bit_width_impl_override=weight_bit_width_impl_override, restrict_bit_width_type=weight_restrict_bit_width_type, min_overall_bit_width=weight_min_overall_bit_width, max_overall_bit_width=weight_max_overall_bit_width, tracked_parameter_list_init=self.weight, ternary_threshold=weight_ternary_threshold, scaling_stats_input_view_shape_impl=weight_stats_input_view_shape_impl, scaling_stats_input_concat_dim=weight_scaling_stats_input_concat_dim, scaling_stats_sigma=weight_scaling_stats_sigma, scaling_min_val=weight_scaling_min_val, override_pretrained_bit_width=weight_override_pretrained_bit_width) self.bias_quant = BiasQuantProxy(quant_type=bias_quant_type, narrow_range=bias_narrow_range, bit_width=bias_bit_width)