def _quantize(cls, params, in_qs, stats, **kwargs): force_out_qs, out_dtype = cls.get_pow2_opts(**kwargs) force_out_q = force_out_qs and force_out_qs[0] fusion = kwargs.get('fusion', None) G = kwargs['G'] weights_node, biases_node = cls.get_weights_and_biases_nodes( G, fusion if fusion else params) range_acc = stats['range_acc'] conv_active = fusion and fusion.fusion_type in [ 'conv_active_pool', 'conv_active' ] int_dtype = out_dtype if conv_active: # Take stats from activation after the convolution range_out = kwargs['all_stats'][NodeId( fusion, fusion.contained_nodes()[1])]['range_out'][0] out_dtype = np.int32 else: range_out = stats['range_out'][0] in_q = in_qs[0] calc_width = 32 if force_out_q: o_q = force_out_q else: o_q = QType.from_min_max_pow2(range_out['min'], range_out['max'], dtype=out_dtype) weights_q = QType.from_array_pow2(arr=weights_node.dqvalue, dtype=int_dtype) calc_q = in_q.q + weights_q.q acc_bits = bits(range_acc['max'], range_acc['min']) act_bits = bits(range_out['min'], range_out['max']) act_acc_bits = max(acc_bits, act_bits) calc_int_bits = calc_width - calc_q if calc_int_bits < act_acc_bits: # we don't have enough space for the integer portion so reduce the precision of # the weights missing_bits = act_acc_bits - calc_int_bits # TODO - This needs improving assert weights_q.q >= missing_bits, "no space in weights to reduce precision" LOG.warning( 'reducing weight precision in %s to satisfy quantization constraints', params.name) weights_q.q = weights_q.q - missing_bits calc_q = in_q.q + weights_q.q calc_int_bits = calc_width - calc_q c_q = acc_q = QType(bits=calc_width, q=calc_q, signed=True) if conv_active: o_q = c_q if not params.has_bias or np.all(biases_node.dqvalue == 0): biases_q = o_q else: biases_q = QType.from_array_pow2(arr=biases_node.dqvalue, dtype=int_dtype) # make sure that the biases are not stored more precisily than the accumulator. It's pointless and will # cause a negative shift if biases_q.q > acc_q.q: biases_q.q = acc_q.q if isinstance(params, MultiplicativeBiasParameters): if params.has_mul_bias: mb_q = QType.from_array_pow2(arr=params.mul_biases, dtype=int_dtype) else: mb_q = None return SymmetricScalableFilterQuantizationRecord( in_qs=[in_q, weights_q, biases_q], out_qs=[o_q], calc_q=c_q, acc_q=acc_q, mul_biases_q=mb_q) else: return SymmetricFilterQuantizationRecord( in_qs=[in_q, weights_q, biases_q], out_qs=[o_q], calc_q=c_q, acc_q=acc_q)
def _quantize(cls, params, in_qs, stats, **kwargs): force_out_qs, params_dtype = cls.get_pow2_opts(**kwargs) force_out_q = force_out_qs and force_out_qs[0] fusion = kwargs.get('fusion', None) pow2_biases = kwargs.get('opts')['pow2_biases'] G = kwargs['G'] weights_node, biases_node = cls.get_weights_and_biases_nodes( G, fusion if fusion else params) range_acc = stats.get('range_acc', stats['range_out'][0]) conv_active = fusion and fusion.fusion_type in [ 'conv_active_pool', 'conv_active' ] int_dtype = np.int32 cls.check_valid_ranges(params, stats, idx=0, dirs='out') if conv_active: # Take stats from activation after the convolution range_out = kwargs['all_stats'][NodeId( fusion, fusion.contained_nodes()[1])]['range_out'][0] out_dtype = np.int32 else: out_dtype = params_dtype range_out = stats['range_out'][0] in_q = deepcopy(in_qs[0]).scale_to_pow2() calc_width = 31 o_q = QType.from_min_max_pow2(range_out['min'], range_out['max'], dtype=out_dtype) if force_out_q: if o_q.scale > force_out_q.scale: return None weights_q = QType.from_array_pow2(arr=weights_node.dqvalue, dtype=params_dtype) calc_q = in_q.q + weights_q.q acc_bits = calc_bits(range_acc['max'], range_acc['min']) act_bits = calc_bits(range_out['min'], range_out['max']) act_acc_bits = max(acc_bits, act_bits) calc_int_bits = calc_width - calc_q if calc_int_bits < act_acc_bits: # we don't have enough space for the integer portion so reduce the precision of # the weights and input missing_bits = act_acc_bits - calc_int_bits if missing_bits > calc_q * 0.75: raise ValueError( f'Quantizing {params.name} at this precision will loose more than 75% of fractional part' ) prec_inp = min(math.floor(0.5 + missing_bits * in_q.q / calc_q), in_q.q) prec_w = min(math.floor(0.5 + missing_bits * weights_q.q / calc_q), weights_q.q) left = missing_bits - prec_inp - prec_w if left > 0: prec_w += left LOG.warning( 'reducing weight and input precision (%s, %s) in %s to satisfy quantization constraints', prec_w, prec_inp, params.name) weights_q.q -= prec_w in_q.q -= prec_inp calc_q = in_q.q + weights_q.q calc_int_bits = calc_width - calc_q c_q = acc_q = QType(bits=calc_width, q=calc_q, signed=True) if conv_active: o_q = c_q if pow2_biases == 0: biases_dtype = params_dtype elif pow2_biases == 8: biases_dtype = np.int8 elif pow2_biases == 16: biases_dtype = np.int16 else: biases_dtype = np.int32 biases_q = QType.from_array_pow2(arr=biases_node.dqvalue, dtype=biases_dtype) # make sure that the biases are not stored more precisily than the accumulator. It's pointless and will # cause a negative shift if biases_q.q > acc_q.q: biases_q.q = acc_q.q if isinstance(params, MultiplicativeBiasParameters) and params.has_mul_bias: mb_q = QType.from_array_pow2(arr=params.mul_biases, dtype=int_dtype) else: mb_q = None return QRec.symmetric(in_qs=[in_q, weights_q, biases_q], out_qs=[o_q], calc_q=c_q, acc_q=acc_q, mul_biases_q=mb_q)