Пример #1
0
    def _quantize(cls, params, in_qs, stats, **kwargs):
        force_out_qs, params_dtype = cls.get_pow2_opts(**kwargs)
        force_out_q = force_out_qs and force_out_qs[0]

        fusion = kwargs.get('fusion', None)

        cls.check_valid_ranges(params, stats, idx=0, dirs='out')
        if fusion:
            activation = fusion.contained_nodes()[1]
            if isinstance(activation, ReluActivationParameters):
                # Take stats from activation after the convolution
                range_out = kwargs['all_stats'][NodeId(
                    fusion, activation)]['range_out'][0]
                out_dtype = np.int32
        else:
            out_dtype = params_dtype
            range_out = stats['range_out'][0]

        in_q1 = deepcopy(in_qs[0]).scale_to_pow2()
        in_q2 = deepcopy(in_qs[0]).scale_to_pow2()
        biases_q = QType.Pow2(32, in_q1.q + in_q2.q, True)

        if force_out_q:
            o_q = force_out_q
        else:
            o_q = QType.from_min_max_pow2(range_out['min'],
                                          range_out['max'],
                                          dtype=out_dtype)
        if len(in_qs) == 3:
            return QRec.symmetric(in_qs=[in_q1, in_q2, biases_q], out_qs=[o_q])
        return QRec.symmetric(in_qs=[in_q1, in_q2], out_qs=[o_q])
 def _get_in_qs_from_stats(cls, params, stats, in_qs, **kwargs):
     return [
         QType.from_min_max_pow2(stats['range_in'][idx]['min'],
                                 stats['range_in'][idx]['max'],
                                 dtype=dtype) if dtype is not None else None
         for idx, dtype in enumerate(
             cls.get_prefered_input_dtypes(params, **kwargs))
     ]
 def force_pow2(cls, in_qs, idx=None):
     return [
         QType.from_min_max_pow2(
             in_q.min_val, in_q.max_val, dtype=in_q.dtype, forced=True)
         if in_q and
         (idx is None or idx == in_q_idx) and not in_q.is_pow2 else in_q
         for in_q_idx, in_q in enumerate(in_qs)
     ]
Пример #4
0
    def _quantize(cls, params, in_qs, stats, **kwargs):
        force_out_qs, out_dtype = cls.get_pow2_opts(**kwargs)
        force_out_q = force_out_qs and force_out_qs[0]

        if force_out_q:
            o_q = force_out_q
        else:
            o_q = QType.from_min_max_pow2(stats['range_out'][0]['min'],
                                          stats['range_out'][0]['max'],
                                          dtype=out_dtype)

        return SymmetricConstantQuantizationRecord(in_qs=None, out_qs=[o_q])
Пример #5
0
    def _quantize(cls, params, in_qs, stats, **kwargs):
        force_out_qs, out_dtype = cls.get_pow2_opts(**kwargs)
        force_out_q = force_out_qs and force_out_qs[0]

        if force_out_q:
            o_q = force_out_q
        else:
            cls.check_valid_ranges(params, stats, idx=0, dirs='out')
            o_q = QType.from_min_max_pow2(stats['range_out'][0]['min'],
                                          stats['range_out'][0]['max'],
                                          dtype=out_dtype)
        o_q.is_constant = True
        return QRec.symmetric(in_qs=None, out_qs=[o_q])
Пример #6
0
    def _quantize(cls, params, in_qs, stats, **kwargs):
        force_out_qs, out_dtype = cls.get_pow2_opts(**kwargs)
        force_out_q = force_out_qs and force_out_qs[0]
        o_q = QType.from_min_max_pow2(stats['range_out'][0]['min'],
                                      stats['range_out'][0]['max'],
                                      dtype=out_dtype)
        if force_out_q:
            if force_out_q.bits - force_out_q.q < o_q.bits - o_q.q:
                LOG.warning('%s is being forced to output in Q%s and may clip',
                            params.name, force_out_q.q)
            o_q = force_out_q

        return SymmetricQuantizationRecord(in_qs=in_qs, out_qs=[o_q])
Пример #7
0
    def _quantize(cls, params, in_qs, stats, **kwargs):
        force_out_qs, out_dtype = cls.get_pow2_opts(**kwargs)
        force_out_q = force_out_qs and force_out_qs[0]
        cls.check_valid_ranges(params, stats, idx=0, dirs='out')
        o_q = QType.from_min_max_pow2(stats['range_out'][0]['min'],
                                      stats['range_out'][0]['max'],
                                      dtype=out_dtype)
        if force_out_q:
            if force_out_q.is_pow2 and force_out_q.bits - force_out_q.q < o_q.bits - o_q.q:
                LOG.warning('%s is being forced to output in Q%s and may clip',
                            params.name, force_out_q.q)
            o_q = force_out_q

        return QRec.symmetric(in_qs=in_qs, out_qs=[o_q])
Пример #8
0
    def _quantize(cls, params, in_qs, stats, **kwargs):
        force_out_qs, out_dtype = cls.get_pow2_opts(**kwargs)

        if stats is None or 'expression' not in stats:
            raise ValueError(
                f'no valid range information is present for {params.name}')

        # expressions need a symmetric input
        # this is done on the mult8 version but probably isn't necessary here
        # in_qs = cls.force_symmetric(in_qs)

        symbol_control = SymbolStats(stats['expression'])
        # preload the input and output quantization
        # This will force variables to the right scales in the expression quantizer
        # first the input
        prequant = {
            params.input_symbols[idx]: in_q
            for idx, in_q in enumerate(in_qs)
        }
        # now the output
        o_qs = []
        for idx, sym_name in enumerate(params.output_symbols):
            if force_out_qs and force_out_qs[idx]:
                o_q = force_out_qs[idx]
            else:
                cls.check_valid_ranges(params, stats, idx=idx, dirs='out')
                o_q = QType.from_min_max_pow2(stats['range_out'][idx]['min'],
                                              stats['range_out'][idx]['max'],
                                              dtype=out_dtype)
            prequant[sym_name] = o_q
            o_qs.append(o_q)

        qfunc_col = params.func_col.quantize(Q15ScaledQuantization,
                                             symbol_control,
                                             quantize_inputs=False,
                                             qtypes=prequant)

        return QRec.symmetric(in_qs=in_qs, out_qs=o_qs, qfunc_col=qfunc_col)
Пример #9
0
    def _quantize(cls, params, in_qs, stats, **kwargs):
        force_out_qs, out_dtype = cls.get_pow2_opts(**kwargs)
        force_out_q = force_out_qs and force_out_qs[0]

        fusion = kwargs.get('fusion', None)
        G = kwargs['G']
        weights_node, biases_node = cls.get_weights_and_biases_nodes(
            G, fusion if fusion else params)

        range_acc = stats['range_acc']
        conv_active = fusion and fusion.fusion_type in [
            'conv_active_pool', 'conv_active'
        ]
        int_dtype = out_dtype
        if conv_active:
            # Take stats from activation after the convolution
            range_out = kwargs['all_stats'][NodeId(
                fusion,
                fusion.contained_nodes()[1])]['range_out'][0]
            out_dtype = np.int32
        else:
            range_out = stats['range_out'][0]

        in_q = in_qs[0]
        calc_width = 32

        if force_out_q:
            o_q = force_out_q
        else:
            o_q = QType.from_min_max_pow2(range_out['min'],
                                          range_out['max'],
                                          dtype=out_dtype)
        weights_q = QType.from_array_pow2(arr=weights_node.dqvalue,
                                          dtype=int_dtype)
        calc_q = in_q.q + weights_q.q

        acc_bits = bits(range_acc['max'], range_acc['min'])
        act_bits = bits(range_out['min'], range_out['max'])
        act_acc_bits = max(acc_bits, act_bits)

        calc_int_bits = calc_width - calc_q
        if calc_int_bits < act_acc_bits:
            # we don't have enough space for the integer portion so reduce the precision of
            # the weights
            missing_bits = act_acc_bits - calc_int_bits
            # TODO - This needs improving
            assert weights_q.q >= missing_bits, "no space in weights to reduce precision"
            LOG.warning(
                'reducing weight precision in %s to satisfy quantization constraints',
                params.name)
            weights_q.q = weights_q.q - missing_bits
            calc_q = in_q.q + weights_q.q
            calc_int_bits = calc_width - calc_q

        c_q = acc_q = QType(bits=calc_width, q=calc_q, signed=True)

        if conv_active:
            o_q = c_q

        if not params.has_bias or np.all(biases_node.dqvalue == 0):
            biases_q = o_q
        else:
            biases_q = QType.from_array_pow2(arr=biases_node.dqvalue,
                                             dtype=int_dtype)
        # make sure that the biases are not stored more precisily than the accumulator. It's pointless and will
        # cause a negative shift
        if biases_q.q > acc_q.q:
            biases_q.q = acc_q.q

        if isinstance(params, MultiplicativeBiasParameters):
            if params.has_mul_bias:
                mb_q = QType.from_array_pow2(arr=params.mul_biases,
                                             dtype=int_dtype)
            else:
                mb_q = None
            return SymmetricScalableFilterQuantizationRecord(
                in_qs=[in_q, weights_q, biases_q],
                out_qs=[o_q],
                calc_q=c_q,
                acc_q=acc_q,
                mul_biases_q=mb_q)
        else:
            return SymmetricFilterQuantizationRecord(
                in_qs=[in_q, weights_q, biases_q],
                out_qs=[o_q],
                calc_q=c_q,
                acc_q=acc_q)
Пример #10
0
    def _quantize(cls, params, in_qs, stats, **kwargs):
        force_out_qs, params_dtype = cls.get_pow2_opts(**kwargs)
        force_out_q = force_out_qs and force_out_qs[0]

        fusion = kwargs.get('fusion', None)
        pow2_biases = kwargs.get('opts')['pow2_biases']
        G = kwargs['G']
        weights_node, biases_node = cls.get_weights_and_biases_nodes(
            G, fusion if fusion else params)

        range_acc = stats.get('range_acc', stats['range_out'][0])
        conv_active = fusion and fusion.fusion_type in [
            'conv_active_pool', 'conv_active'
        ]
        int_dtype = np.int32
        cls.check_valid_ranges(params, stats, idx=0, dirs='out')
        if conv_active:
            # Take stats from activation after the convolution
            range_out = kwargs['all_stats'][NodeId(
                fusion,
                fusion.contained_nodes()[1])]['range_out'][0]
            out_dtype = np.int32
        else:
            out_dtype = params_dtype
            range_out = stats['range_out'][0]

        in_q = deepcopy(in_qs[0]).scale_to_pow2()
        calc_width = 31

        o_q = QType.from_min_max_pow2(range_out['min'],
                                      range_out['max'],
                                      dtype=out_dtype)
        if force_out_q:
            if o_q.scale > force_out_q.scale:
                return None

        weights_q = QType.from_array_pow2(arr=weights_node.dqvalue,
                                          dtype=params_dtype)
        calc_q = in_q.q + weights_q.q

        acc_bits = calc_bits(range_acc['max'], range_acc['min'])
        act_bits = calc_bits(range_out['min'], range_out['max'])
        act_acc_bits = max(acc_bits, act_bits)

        calc_int_bits = calc_width - calc_q
        if calc_int_bits < act_acc_bits:
            # we don't have enough space for the integer portion so reduce the precision of
            # the weights and input
            missing_bits = act_acc_bits - calc_int_bits
            if missing_bits > calc_q * 0.75:
                raise ValueError(
                    f'Quantizing {params.name} at this precision will loose more than 75% of fractional part'
                )

            prec_inp = min(math.floor(0.5 + missing_bits * in_q.q / calc_q),
                           in_q.q)
            prec_w = min(math.floor(0.5 + missing_bits * weights_q.q / calc_q),
                         weights_q.q)
            left = missing_bits - prec_inp - prec_w
            if left > 0:
                prec_w += left
            LOG.warning(
                'reducing weight and input precision (%s, %s) in %s to satisfy quantization constraints',
                prec_w, prec_inp, params.name)
            weights_q.q -= prec_w
            in_q.q -= prec_inp
            calc_q = in_q.q + weights_q.q
            calc_int_bits = calc_width - calc_q

        c_q = acc_q = QType(bits=calc_width, q=calc_q, signed=True)

        if conv_active:
            o_q = c_q

        if pow2_biases == 0:
            biases_dtype = params_dtype
        elif pow2_biases == 8:
            biases_dtype = np.int8
        elif pow2_biases == 16:
            biases_dtype = np.int16
        else:
            biases_dtype = np.int32

        biases_q = QType.from_array_pow2(arr=biases_node.dqvalue,
                                         dtype=biases_dtype)
        # make sure that the biases are not stored more precisily than the accumulator. It's pointless and will
        # cause a negative shift
        if biases_q.q > acc_q.q:
            biases_q.q = acc_q.q

        if isinstance(params,
                      MultiplicativeBiasParameters) and params.has_mul_bias:
            mb_q = QType.from_array_pow2(arr=params.mul_biases,
                                         dtype=int_dtype)
        else:
            mb_q = None
        return QRec.symmetric(in_qs=[in_q, weights_q, biases_q],
                              out_qs=[o_q],
                              calc_q=c_q,
                              acc_q=acc_q,
                              mul_biases_q=mb_q)