Пример #1
0
    def _quantize(cls, params, in_qs, stats, **kwargs):
        force_out_qs, params_dtype = cls.get_pow2_opts(**kwargs)
        force_out_q = force_out_qs and force_out_qs[0]

        fusion = kwargs.get('fusion', None)

        cls.check_valid_ranges(params, stats, idx=0, dirs='out')
        if fusion:
            activation = fusion.contained_nodes()[1]
            if isinstance(activation, ReluActivationParameters):
                # Take stats from activation after the convolution
                range_out = kwargs['all_stats'][NodeId(
                    fusion, activation)]['range_out'][0]
                out_dtype = np.int32
        else:
            out_dtype = params_dtype
            range_out = stats['range_out'][0]

        in_q1 = deepcopy(in_qs[0]).scale_to_pow2()
        in_q2 = deepcopy(in_qs[0]).scale_to_pow2()
        biases_q = QType.Pow2(32, in_q1.q + in_q2.q, True)

        if force_out_q:
            o_q = force_out_q
        else:
            o_q = QType.from_min_max_pow2(range_out['min'],
                                          range_out['max'],
                                          dtype=out_dtype)
        if len(in_qs) == 3:
            return QRec.symmetric(in_qs=[in_q1, in_q2, biases_q], out_qs=[o_q])
        return QRec.symmetric(in_qs=[in_q1, in_q2], out_qs=[o_q])
    def _quantize(cls, params, in_qs, stats, **kwargs):
        force_out_qs, _ = cls.get_mult_opts(**kwargs)
        force_out_q = force_out_qs and force_out_qs[0]
        opts = kwargs['opts']
        if force_out_q:
            if force_out_q.forced_scale or force_out_q.forced_zero_point:
                return None
            if in_qs[0].dtype == np.int8:
                dtypes = [np.int8, np.int16]
            else:
                dtypes = [np.int16]
            if force_out_q.forced_dtype and force_out_q.dtype not in dtypes:
                return None

        in_qs = cls.force_symmetric_and_dtype(in_qs)
        if in_qs is None:
            return None
        # force the input to be POW2 scaled
        pow2_scale = np.power(2, np.ceil(np.log2(in_qs[0].scale)))
        in_q = QType(min_val=in_qs[0].min_val,
                     max_val=in_qs[0].max_val,
                     dtype=in_qs[0].dtype,
                     scale=pow2_scale,
                     forced=True)
        if in_q.dtype == np.int8 and (opts.get('softmax_out_8bits', None) or
                                      (force_out_q
                                       and force_out_q.dtype == np.int8)):
            # params.at_options.softmax_out_8bits = 1
            o_q = QType(min_val=-1, max_val=1, dtype=np.int8, scale=2**(-7))
        else:
            o_q = QType(min_val=-1, max_val=1, dtype=np.int16, scale=2**(-15))
            if in_q.dtype == np.int16 and o_q.dtype == np.int16:
                return QRec.symmetric(in_qs=[in_q], out_qs=[o_q])

        return QRec.scaled(in_qs=[in_q], out_qs=[o_q])
Пример #3
0
    def _quantize(cls, params, in_qs, stats, **kwargs):
        force_out_qs, _ = cls.get_mult_opts(**kwargs)
        force_out_q = force_out_qs and force_out_qs[0]
        if force_out_q:
            return None

        in_q, win_q, fft_twiddles_q, swap_table_q, rfft_twiddles_q, fft_out_q, spect_q = cls.get_spectrogram_in_out_q(
            in_qs[0], params)
        melcoeff_q = QType.Pow2(bits=16, signed=True, q=MFCC_COEFF_Q)
        mel_sparsity_table_q = QType.Pow2(bits=16, signed=False, q=0)
        dctmat_q = QType.Pow2(bits=16, signed=True, q=DCT_TWIDDLE_Q)
        if params.mel_type == "melspectrogram":
            out_q = QType.Pow2(bits=32, signed=True, q=16)
        elif params.mel_type == "logmelspectrogram":
            out_q = QType.Pow2(bits=16, signed=True, q=15 - params.quant_norm)
        else:
            out_q = QType.Pow2(bits=16,
                               signed=True,
                               q=15 - params.quant_norm - DCT_TWIDDLE_Q)

        return QRec.symmetric(in_qs=[
            in_q, win_q, fft_twiddles_q, swap_table_q, rfft_twiddles_q,
            mel_sparsity_table_q, melcoeff_q, dctmat_q
        ],
                              out_qs=[out_q],
                              fft_out_q=fft_out_q)
    def _quantize(cls, params, in_qs, stats, **kwargs):
        force_out_qs, out_dtype = cls.get_pow2_opts(**kwargs)
        force_out_q = force_out_qs and force_out_qs[0]

        fusion = kwargs.get('fusion', None)
        if not fusion and in_qs[0].dtype == np.int32:
            return None

        if params.activation == "relu6":
            int_bits = calc_bits(6)
        elif params.activation == "relun":
            relun = params.activation_params
            if isinstance(relun, list):
                relun = max(relun)
            int_bits = calc_bits(relun)
        elif params.activation in [
                "relu", "hswish", "hsigmoid", "leaky", "htanh"
        ]:
            cls.check_valid_ranges(params, stats, idx=0, dirs='out')
            int_bits = calc_bits(stats['range_out'][0]['max'],
                                 stats['range_out'][0]['min'])
        elif params.activation == "sigmoid" or params.activation == "tanh":
            if force_out_q is None:
                q = 7 if out_dtype == np.int8 else 15
                return QRec.symmetric(in_qs=[in_qs[0]],
                                      out_qs=[QType(q=q, dtype=out_dtype)])
            else:
                q = 7 if force_out_q.dtype == np.int8 else 15
                if force_out_q.q != q:
                    return None
                return QRec.symmetric(in_qs=[in_qs[0]], out_qs=[force_out_q])
        else:
            LOG.error(
                f'no support for activation {params.activation} in POW2 quantizer'
            )
            return None

        in_q = in_qs[0]
        if force_out_q is None:
            q = max(cls.get_pow2_bits(**kwargs) - int_bits, 0)
            out_q = QType(q=q, dtype=out_dtype)
        else:
            if force_out_q.bits - force_out_q.q < int_bits:
                return None
            out_q = force_out_q
        return QRec.symmetric(in_qs=[in_q], out_qs=[out_q])
Пример #5
0
 def _quantize(cls, params, in_qs, stats, **kwargs):
     force_out_qs, _ = cls.get_pow2_opts(**kwargs)
     force_out_q = force_out_qs and force_out_qs[0]
     in_q = deepcopy(in_qs[0]).scale_to_pow2()
     in_q.set_forced()
     out_q = QType.Pow2(16, 15, True, forced=True)
     if force_out_q and force_out_q != out_q:
         return None
     return QRec.symmetric(in_qs=[in_q], out_qs=[out_q])
Пример #6
0
    def _quantize(cls, params, in_qs, stats, **kwargs):
        force_out_qs, _ = cls.get_mult_opts(**kwargs)
        force_out_q = force_out_qs and force_out_qs[0]
        if force_out_q:
            return None

        in_q, win_q, fft_twiddles_q, swap_table_q, rfft_twiddles_q, fft_out_q, out_q = cls.get_spectrogram_in_out_q(
            in_qs[0], params)
        return QRec.symmetric(
            in_qs=[in_q, win_q, fft_twiddles_q, swap_table_q, rfft_twiddles_q],
            out_qs=[out_q],
            fft_out_q=fft_out_q)
Пример #7
0
    def _quantize(cls, params, in_qs, stats, **kwargs):
        force_out_qs, out_dtype = cls.get_pow2_opts(**kwargs)
        force_out_q = force_out_qs and force_out_qs[0]

        if force_out_q:
            o_q = force_out_q
        else:
            cls.check_valid_ranges(params, stats, idx=0, dirs='out')
            o_q = QType.from_min_max_pow2(stats['range_out'][0]['min'],
                                          stats['range_out'][0]['max'],
                                          dtype=out_dtype)
        o_q.is_constant = True
        return QRec.symmetric(in_qs=None, out_qs=[o_q])
Пример #8
0
    def _quantize(cls, params, in_qs, stats, **kwargs):
        force_out_qs, out_dtype = cls.get_pow2_opts(**kwargs)
        force_out_q = force_out_qs and force_out_qs[0]
        cls.check_valid_ranges(params, stats, idx=0, dirs='out')
        o_q = QType.from_min_max_pow2(stats['range_out'][0]['min'],
                                      stats['range_out'][0]['max'],
                                      dtype=out_dtype)
        if force_out_q:
            if force_out_q.is_pow2 and force_out_q.bits - force_out_q.q < o_q.bits - o_q.q:
                LOG.warning('%s is being forced to output in Q%s and may clip',
                            params.name, force_out_q.q)
            o_q = force_out_q

        return QRec.symmetric(in_qs=in_qs, out_qs=[o_q])
Пример #9
0
    def _quantize(cls, params, in_qs, stats, **kwargs):
        params = cast(QuantizeParameters, params)
        force_out_qs = kwargs.get('force_out_qs', None)
        force_out_q = force_out_qs and force_out_qs[0]
        backwards = kwargs.get('backwards')
        # keep the in_qtype for informational purposes
        params.from_qtype = in_qs[0]
        # if we are going backwards
        if backwards:
            # if output must be forced
            assert force_out_q, f'going backwards at {params.name} but output is not forced'
            # change our output to match the force
            params.to_qtype = deepcopy(force_out_q)
            return QRec(in_qs=deepcopy(in_qs), out_qs=[deepcopy(force_out_q)])

        # copy the out_qtype into the qrec
        return QRec.symmetric(in_qs=in_qs, out_qs=[deepcopy(params.to_qtype)])
Пример #10
0
    def _quantize(cls, params, in_qs, stats, **kwargs):
        force_out_qs, out_dtype = cls.get_pow2_opts(**kwargs)

        if stats is None or 'expression' not in stats:
            raise ValueError(
                f'no valid range information is present for {params.name}')

        # expressions need a symmetric input
        # this is done on the mult8 version but probably isn't necessary here
        # in_qs = cls.force_symmetric(in_qs)

        symbol_control = SymbolStats(stats['expression'])
        # preload the input and output quantization
        # This will force variables to the right scales in the expression quantizer
        # first the input
        prequant = {
            params.input_symbols[idx]: in_q
            for idx, in_q in enumerate(in_qs)
        }
        # now the output
        o_qs = []
        for idx, sym_name in enumerate(params.output_symbols):
            if force_out_qs and force_out_qs[idx]:
                o_q = force_out_qs[idx]
            else:
                cls.check_valid_ranges(params, stats, idx=idx, dirs='out')
                o_q = QType.from_min_max_pow2(stats['range_out'][idx]['min'],
                                              stats['range_out'][idx]['max'],
                                              dtype=out_dtype)
            prequant[sym_name] = o_q
            o_qs.append(o_q)

        qfunc_col = params.func_col.quantize(Q15ScaledQuantization,
                                             symbol_control,
                                             quantize_inputs=False,
                                             qtypes=prequant)

        return QRec.symmetric(in_qs=in_qs, out_qs=o_qs, qfunc_col=qfunc_col)
Пример #11
0
 def _quantize(cls, params, in_qs, stats, **kwargs):
     out_qs = kwargs['out_qs']
     return QRec.symmetric(in_qs=in_qs, out_qs=out_qs)
Пример #12
0
    def _quantize(cls, params, in_qs, stats, **kwargs):
        force_out_qs, params_dtype = cls.get_pow2_opts(**kwargs)
        force_out_q = force_out_qs and force_out_qs[0]

        fusion = kwargs.get('fusion', None)
        pow2_biases = kwargs.get('opts')['pow2_biases']
        G = kwargs['G']
        weights_node, biases_node = cls.get_weights_and_biases_nodes(
            G, fusion if fusion else params)

        range_acc = stats.get('range_acc', stats['range_out'][0])
        conv_active = fusion and fusion.fusion_type in [
            'conv_active_pool', 'conv_active'
        ]
        int_dtype = np.int32
        cls.check_valid_ranges(params, stats, idx=0, dirs='out')
        if conv_active:
            # Take stats from activation after the convolution
            range_out = kwargs['all_stats'][NodeId(
                fusion,
                fusion.contained_nodes()[1])]['range_out'][0]
            out_dtype = np.int32
        else:
            out_dtype = params_dtype
            range_out = stats['range_out'][0]

        in_q = deepcopy(in_qs[0]).scale_to_pow2()
        calc_width = 31

        o_q = QType.from_min_max_pow2(range_out['min'],
                                      range_out['max'],
                                      dtype=out_dtype)
        if force_out_q:
            if o_q.scale > force_out_q.scale:
                return None

        weights_q = QType.from_array_pow2(arr=weights_node.dqvalue,
                                          dtype=params_dtype)
        calc_q = in_q.q + weights_q.q

        acc_bits = calc_bits(range_acc['max'], range_acc['min'])
        act_bits = calc_bits(range_out['min'], range_out['max'])
        act_acc_bits = max(acc_bits, act_bits)

        calc_int_bits = calc_width - calc_q
        if calc_int_bits < act_acc_bits:
            # we don't have enough space for the integer portion so reduce the precision of
            # the weights and input
            missing_bits = act_acc_bits - calc_int_bits
            if missing_bits > calc_q * 0.75:
                raise ValueError(
                    f'Quantizing {params.name} at this precision will loose more than 75% of fractional part'
                )

            prec_inp = min(math.floor(0.5 + missing_bits * in_q.q / calc_q),
                           in_q.q)
            prec_w = min(math.floor(0.5 + missing_bits * weights_q.q / calc_q),
                         weights_q.q)
            left = missing_bits - prec_inp - prec_w
            if left > 0:
                prec_w += left
            LOG.warning(
                'reducing weight and input precision (%s, %s) in %s to satisfy quantization constraints',
                prec_w, prec_inp, params.name)
            weights_q.q -= prec_w
            in_q.q -= prec_inp
            calc_q = in_q.q + weights_q.q
            calc_int_bits = calc_width - calc_q

        c_q = acc_q = QType(bits=calc_width, q=calc_q, signed=True)

        if conv_active:
            o_q = c_q

        if pow2_biases == 0:
            biases_dtype = params_dtype
        elif pow2_biases == 8:
            biases_dtype = np.int8
        elif pow2_biases == 16:
            biases_dtype = np.int16
        else:
            biases_dtype = np.int32

        biases_q = QType.from_array_pow2(arr=biases_node.dqvalue,
                                         dtype=biases_dtype)
        # make sure that the biases are not stored more precisily than the accumulator. It's pointless and will
        # cause a negative shift
        if biases_q.q > acc_q.q:
            biases_q.q = acc_q.q

        if isinstance(params,
                      MultiplicativeBiasParameters) and params.has_mul_bias:
            mb_q = QType.from_array_pow2(arr=params.mul_biases,
                                         dtype=int_dtype)
        else:
            mb_q = None
        return QRec.symmetric(in_qs=[in_q, weights_q, biases_q],
                              out_qs=[o_q],
                              calc_q=c_q,
                              acc_q=acc_q,
                              mul_biases_q=mb_q)
Пример #13
0
    def _quantize(cls, params, in_qs, stats, **kwargs):
        force_out_qs, params_dtype = cls.get_pow2_opts(**kwargs)
        force_out_q = force_out_qs and force_out_qs[0]

        fusion = kwargs.get('fusion', None)
        pow2_biases = kwargs.get('opts')['pow2_biases']
        G = kwargs['G']
        weights_node, biases_node = cls.get_weights_and_biases_nodes(
            G, fusion if fusion else params)

        range_acc = stats['range_acc']
        conv_active = fusion and fusion.fusion_type in [
            'conv_active_pool', 'conv_active'
        ]
        int_dtype = np.int32
        cls.check_valid_ranges(params, stats, idx=0, dirs='out')
        if conv_active:
            # Take stats from activation after the convolution
            range_out = kwargs['all_stats'][NodeId(
                fusion,
                fusion.contained_nodes()[1])]['range_out'][0]
            out_dtype = np.int32
        else:
            out_dtype = params_dtype
            range_out = stats['range_out'][0]

        in_q = deepcopy(in_qs[0]).scale_to_pow2()
        calc_width = 32

        if force_out_q:
            o_q = force_out_q
        else:
            o_q = QType.from_min_max_pow2(range_out['min'],
                                          range_out['max'],
                                          dtype=out_dtype)
        weights_q = QType.from_array_pow2(arr=weights_node.dqvalue,
                                          dtype=params_dtype)
        calc_q = in_q.q + weights_q.q

        acc_bits = calc_bits(range_acc['max'], range_acc['min'])
        act_bits = calc_bits(range_out['min'], range_out['max'])
        act_acc_bits = max(acc_bits, act_bits)

        calc_int_bits = calc_width - calc_q
        if calc_int_bits < act_acc_bits:
            # we don't have enough space for the integer portion so reduce the precision of
            # the weights
            missing_bits = act_acc_bits - calc_int_bits
            # TODO - This needs improving
            assert weights_q.q >= missing_bits, "no space in weights to reduce precision"
            LOG.warning(
                'reducing weight precision in %s to satisfy quantization constraints',
                params.name)
            weights_q.q = weights_q.q - missing_bits
            calc_q = in_q.q + weights_q.q
            calc_int_bits = calc_width - calc_q

        c_q = acc_q = QType(bits=calc_width, q=calc_q, signed=True)

        if conv_active:
            o_q = c_q

        if pow2_biases == 0:
            biases_dtype = params_dtype
        elif pow2_biases == 8:
            biases_dtype = np.int8
        elif pow2_biases == 16:
            biases_dtype = np.int16
        else:
            biases_dtype = np.int32

        biases_q = QType.from_array_pow2(arr=biases_node.dqvalue,
                                         dtype=biases_dtype)
        # make sure that the biases are not stored more precisily than the accumulator. It's pointless and will
        # cause a negative shift
        if biases_q.q > acc_q.q:
            biases_q.q = acc_q.q

        if isinstance(params,
                      MultiplicativeBiasParameters) and params.has_mul_bias:
            mb_q = QType.from_array_pow2(arr=params.mul_biases,
                                         dtype=int_dtype)
        else:
            mb_q = None
        return QRec.symmetric(in_qs=[in_q, weights_q, biases_q],
                              out_qs=[o_q],
                              calc_q=c_q,
                              acc_q=acc_q,
                              mul_biases_q=mb_q)