Exemplo n.º 1
0
 def calculate_output_q(self,
                        node: Parameters,
                        astats,
                        in_qs,
                        force_width=None,
                        force_out=None):
     del node
     if force_out:
         if force_out.bits:
             if force_out.q:
                 o_q = QType(bits=force_out.bits,
                             q=force_out.q,
                             signed=True)
             else:
                 o_q = QType.from_min_max(
                     max_val=astats['range_out'][0]['max'],
                     min_val=astats['range_out'][0]['min'],
                     bits=force_out.bits)
         elif force_out.q:
             o_q = QType.from_min_max(max_val=astats['range_out'][0]['max'],
                                      min_val=astats['range_out'][0]['min'],
                                      bits=force_width)
             o_q.q = force_out.q
     else:
         o_q = QType.from_min_max(max_val=astats['range_out'][0]['max'],
                                  min_val=astats['range_out'][0]['min'],
                                  bits=force_width)
     return SymmetricQuantizationRecord(in_qs=in_qs, out_qs=[o_q])
Exemplo n.º 2
0
 def quantize_fusion(self,
                     G: NNGraph,
                     node: ConvFusionParameters,
                     in_qs,
                     force_out=None) -> SymmetricQuantizationRecord:
     if node.fusion_type == 'conv_active':
         result = OrderedDict()
         nodes = node.contained_nodes()
         conv_node = nodes[0]
         conv_astats = self._activation_stats.get(NodeId(node, conv_node))
         conv_qrec = self.calculate_filter_q(conv_node,
                                             conv_astats,
                                             in_q=in_qs[0],
                                             force_width=self._force_width,
                                             out_as_acc=True)
         result[NodeId(node, conv_node)] = conv_qrec
         act_node = nodes[1]
         act_astats = self._activation_stats.get(NodeId(node, act_node))
         if force_out and force_out.bits:
             act_max_q = self.compute_activation_out_maxq(
                 act_node, force_out.bits)
             if force_out.q is not None:
                 if (act_max_q is not None and force_out.q > act_max_q
                     ) or force_out.q > conv_qrec.out_qs[0].q:
                     # We cannot shift left in the kernel
                     # TODO - This should try to increase the input q and perhaps the width
                     # Unlikely to happen
                     raise NotImplementedError()
                 act_o_q = QType(bits=force_out.bits,
                                 q=force_out.q,
                                 signed=True)
             else:
                 if act_max_q is not None:
                     act_o_q.q = min(act_max_q, act_o_q.q)
         else:
             act_o_q = QType.from_min_max(
                 max_val=act_astats['range_out'][0]['max'],
                 min_val=act_astats['range_out'][0]['min'],
                 bits=self._force_width)
             act_o_q.q = min(act_o_q.q, conv_qrec.out_qs[0].q)
             if force_out and force_out.q:
                 if force_out.q > act_max_q or force_out.q > conv_qrec.out_qs[
                         0].q:
                     # We cannot shift left in the kernel
                     # TODO - This should try to increase the input q and perhaps the width
                     # Unlikely to happen
                     raise NotImplementedError()
                 act_o_q.q = force_out.q
         act_qrec = SymmetricQuantizationRecord(in_qs=conv_qrec.out_qs,
                                                out_qs=[act_o_q])
         result[NodeId(node, act_node)] = act_qrec
         return SymmetricQuantizationRecord(in_qs=in_qs,
                                            out_qs=act_qrec.out_qs), result
     else:
         return self.default_quantize_fusion(G,
                                             node,
                                             in_qs,
                                             force_out=force_out)
Exemplo n.º 3
0
    def calculate_filter_q(self,
                           node: Parameters,
                           astats,
                           in_q: QType,
                           force_width=None,
                           force_out=None,
                           out_as_acc=False):
        #biases_bits_as_acc=False):

        w_q = QType.from_array(arr=node.weights, bits=force_width, signed=True)

        calc_width = 32
        calc_q = in_q.q + w_q.q

        acc_bits = bits(astats['range_acc']['max'], astats['range_acc']['min'])
        act_bits = bits(astats['range_out'][0]['max'],
                        astats['range_out'][0]['min'])
        act_acc_bits = max(acc_bits, act_bits)

        calc_int_bits = calc_width - calc_q
        if calc_int_bits < act_acc_bits:
            # we don't have enough space for the integer portion so reduce the precision of
            # the weights
            missing_bits = act_acc_bits - calc_int_bits
            # TODO - This needs improving
            assert w_q.q >= missing_bits, "no space in weights to reduce precision"
            w_q.q = w_q.q - missing_bits
            calc_q = in_q.q + w_q.q
            calc_int_bits = calc_width - calc_q

        c_q = acc_q = QType(bits=calc_width, q=calc_q, signed=True)

        if out_as_acc:
            o_q = c_q
            if node.has_bias:
                b_q = QType.from_array(node.biases,
                                       bits=force_width,
                                       signed=True)
        else:
            # The output size is requested to be force_out_width size
            if force_out and force_out.bits:
                # The output fixed point position is also forced
                if force_out.q:
                    if (force_out.bits - force_out.q) < act_acc_bits:
                        # clipping so cannot completely satisfy
                        o_q = QType(bits=force_out.bits,
                                    q=force_out.bits - act_acc_bits,
                                    signed=True)
                    else:
                        if force_out.q > calc_q:
                            # We cannot shift left in the kernel
                            # TODO - This should try to increase the input q
                            # Unlikely to happen
                            raise NotImplementedError()
                        # We can satisfy the force
                        o_q = QType(bits=force_out.bits,
                                    q=force_out.q,
                                    signed=True)
                else:
                    # Only the width is forced
                    o_q = QType.from_min_max(astats['range_out'][0]['max'],
                                             astats['range_out'][0]['min'],
                                             bits=force_out.bits)
            else:
                # The output width is not forced so calculate the output q normally
                o_q = QType.from_min_max(astats['range_out'][0]['max'],
                                         astats['range_out'][0]['min'],
                                         bits=force_width)
                if force_out and force_out.q:
                    # The output fixed point position is forced
                    if force_out.q > calc_q:
                        # We cannot shift left in the kernel
                        # TODO - This should try to increase the input q
                        # Unlikely to happen
                        raise NotImplementedError()
                    o_q.q = force_out.q

            if node.has_bias:
                b_q = o_q
        # make sure that the biases are not stored more precisily than the accumulator. It's pointless and will
        # cause a negative shift
        if b_q.q > acc_q.q:
            b_q.q = acc_q.q

        if isinstance(node, MultiplicativeBiasParameters):
            if node.has_mul_bias:
                mb_q = QType.from_array(node.mul_biases, bits=force_width)
            else:
                mb_q = None
            qrec = SymmetricScalableFilterQuantizationRecord(
                in_qs=[in_q],
                out_qs=[o_q],
                calc_q=c_q,
                acc_q=acc_q,
                biases_q=b_q,
                weights_q=w_q,
                mul_biases_q=mb_q,
                constants_are_quantized=False)
        else:
            qrec = SymmetricFilterQuantizationRecord(
                in_qs=[in_q],
                out_qs=[o_q],
                calc_q=c_q,
                acc_q=acc_q,
                biases_q=b_q,
                weights_q=w_q,
                constants_are_quantized=False)

        LOG.debug("filter %s qrec %s", node.name, qrec)
        return qrec