Пример #1
0
    def average_execute(cls, params, in_tensors, qrec: QuantizationRecordBase,
                        **kwargs):
        if qrec is None:
            qrec = Float32QuantizationRecord()

        in_tensor = qrec.prepare_inputs(params, in_tensors, ktype="float32")[0]
        in_dims = params.in_dims[0]
        out_dims = params.out_dims[0]
        filter_sz = params.filter.h * params.filter.w

        pool_factor = np.array(1.0 / filter_sz,
                               dtype=qrec.dtype(ktype="float32"))

        out_tensor = np.zeros(out_dims.shape,
                              dtype=qrec.dtype(ktype="float32"))

        if params.padding.h + params.padding.w > 0:
            in_tensor = np.pad(in_tensor,
                               params.padding.numpy_pad_shape(in_dims),
                               mode='constant',
                               constant_values=0.0)
            pad_w = params.padding.w
            pad_h = params.padding.h
        else:
            pad_w = pad_h = 0

        out_h = 0
        for h_idx in range(0, in_dims.h - params.filter.h + pad_h + 1,
                           params.stride.h):
            out_w = 0
            for w_idx in range(0, in_dims.w - params.filter.w + pad_w + 1,
                               params.stride.w):
                # accumulate - potentially with different Q
                out_slice_args = out_dims.srange(h=out_h, w=out_w)
                in_slice_args = in_dims.srange(
                    c=[0, out_dims.c, 1],
                    h=[h_idx, h_idx + params.filter.h, 1],
                    w=[w_idx, w_idx + params.filter.w, 1])

                res_shape = out_tensor[out_slice_args].shape
                sum_filter = np.sum(
                    in_tensor[in_slice_args],
                    dtype=qrec.dtype(ktype="float32"),
                    axis=(out_dims.keys.index('h'),
                          out_dims.keys.index('w'))).reshape(res_shape)
                sum_filter = np.multiply(sum_filter, pool_factor)
                out_tensor[out_slice_args] = sum_filter
                out_w += 1
            out_h += 1

        return qrec.get_outputs(params, [out_tensor], ktype="float32")
Пример #2
0
    def execute(cls, params,
                in_tensors,
                qrec: QuantizationRecordBase,
                **kwargs):
        details = kwargs.get('details')
        # qrec is set by default to Float32QuantizationRecord if None
        if qrec is None or isinstance(qrec, Float32QuantizationRecord):
            qrec = Float32ScalableFilterQuantizationRecord()

        in_dims = params.in_dims[0]
        out_dims = params.out_dims[0]

        prepared_in_tensors = qrec.prepare_inputs(params, in_tensors, ktype="float32")
        in_tensor = prepared_in_tensors[0]
        weights = prepared_in_tensors[1]
        biases = prepared_in_tensors[2]

        if details is not None:
            details['min_acc'] = float("Infinity")
            details['max_acc'] = float("-Infinity")

        if params.has_bias:
            acc_tensor = np.ones(out_dims.shape, dtype=qrec.dtype(ktype="float32")) * biases
        else:
            acc_tensor = np.zeros(out_dims.shape,
                                  dtype=qrec.dtype(ktype="float32"))

        in_tensor = in_tensor.reshape((in_dims.size()))
        filt = params.filter.get_filter_dims()
        for out_c in range(out_dims.c):
            # Expand and normalize the accumulator

            w_slice = weights[filt.srange(out_c=out_c)].reshape((in_dims.size()))

            res = np.dot(in_tensor, w_slice)

            if details is not None:
                details['min_acc'] = min(np.sum(res[res < 0]), details['min_acc'])
                details['max_acc'] = min(np.sum(res[res > 0]), details['max_acc'])

            acc_tensor[out_c] += res

            if details is not None:
                details['min_acc'] = min(np.min(acc_tensor[out_c]), details['min_acc'])
                details['max_acc'] = max(np.max(acc_tensor[out_c]), details['max_acc'])

        acc_tensor = qrec.apply_multiplicative_bias(
            params, acc_tensor, 0, ktype="float32")

        return qrec.get_outputs(params, [acc_tensor], ktype="float32")
Пример #3
0
    def average_execute(cls, params, in_tensors, qrec: QuantizationRecordBase):

        # Prepare the quantization levels

        in_tensor = qrec.prepare_inputs(params, in_tensors,
                                        ktype="symmetric")[0]
        in_dims = params.in_dims[0]
        out_dims = params.out_dims[0]
        filter_sz = params.filter.h * params.filter.w

        pool_factor = (1 << 16) // filter_sz

        out_tensor = np.zeros(out_dims.shape, dtype=np.int32)

        if params.padding.h + params.padding.w > 0:
            in_tensor = np.pad(in_tensor,
                               params.padding.numpy_pad_shape(in_dims),
                               mode='constant',
                               constant_values=qrec.in_qs[0].pad_zero_point)
            pad_w = params.padding.w
            pad_h = params.padding.h
        else:
            pad_w = pad_h = 0

        out_h = 0
        for h_idx in range(0, in_dims.h - params.filter.h + pad_h + 1,
                           params.stride.h):
            out_w = 0
            for w_idx in range(0, in_dims.w - params.filter.w + pad_w + 1,
                               params.stride.w):
                # accumulate - potentially with different Q
                out_slice_args = out_dims.srange(h=out_h, w=out_w)
                in_slice_args = in_dims.srange(
                    c=[0, out_dims.c, 1],
                    h=[h_idx, h_idx + params.filter.h, 1],
                    w=[w_idx, w_idx + params.filter.w, 1])

                res_shape = out_tensor[out_slice_args].shape
                sum_filter = np.sum(
                    in_tensor[in_slice_args],
                    dtype=qrec.dtype(ktype="float32"),
                    axis=(out_dims.keys.index('h'),
                          out_dims.keys.index('w'))).reshape(res_shape)
                sum_filter = np.multiply(sum_filter, pool_factor)
                out_tensor[out_slice_args] = sum_filter
                out_w += 1
            out_h += 1

        return qrec.get_outputs(params, [
            qrec.out_qs[0].clip(at_norm(out_tensor, 16), qrec.out_qs[0].dtype)
        ],
                                ktype="symmetric")
Пример #4
0
    def average_execute(cls, params, in_tensors, qrec: QuantizationRecordBase,
                        **kwargs):
        if qrec is None:
            qrec = Float32QuantizationRecord()
        in_tensor = qrec.prepare_inputs(params, in_tensors, ktype="float32")[0]

        sum_by_chan = np.sum(in_tensor,
                             dtype=qrec.dtype(ktype="float32"),
                             axis=tuple(params.axis),
                             keepdims=params.keep_dims)
        sz = reduce(
            lambda x, y: x * y,
            [i for idx, i in enumerate(in_tensor.shape) if idx in params.axis])

        return qrec.get_outputs(
            params, [(sum_by_chan / sz).reshape(params.out_dims[0].shape)],
            ktype="float32")
Пример #5
0
    def execute(cls, params,
                in_tensors,
                qrec: QuantizationRecordBase,
                **kwargs):

        in_dim = params.in_dims[0]
        out_dim = params.out_dims[0]
        res = in_tensors[0]
        res = FORMAT_CHANGES[params.format_change](res, in_dim, out_dim)
        res = NORMALIZATIONS[params.norm_func](res)
        if qrec is None or isinstance(qrec, (Float32QuantizationRecord,
                                             Float16QuantizationRecord,
                                             Bfloat16QuantizationRecord)):
            iinfo = np.iinfo(res.dtype)
            if res.dtype == np.int8 or res.dtype == np.int16:
                res = res.astype(qrec.dtype(ktype="float32")) / -iinfo.min
            else:
                raise ValueError("unsure how to dequantize this output from imageformatter")
            return [res]
        return [qrec.out_qs[0].dequantize(res) if qrec.out_qs else res]
Пример #6
0
    def execute(cls, params, in_tensors, qrec: QuantizationRecordBase,
                **kwargs):
        '''3D convolution by sub-matrix summing.
        '''
        details = kwargs.get('details')
        # qrec is set by default to Float32QuantizationRecord if None
        if qrec is None or isinstance(qrec, Float32QuantizationRecord):
            qrec = Float32ScalableFilterQuantizationRecord()
        in_dims = params.in_dims[0]
        out_dims = params.out_dims[0]
        prepared_in_tensors = qrec.prepare_inputs(params,
                                                  in_tensors,
                                                  ktype="float32")
        in_tensor = prepared_in_tensors[0]
        weights = prepared_in_tensors[1]
        biases = prepared_in_tensors[2]

        if details is not None:
            details['min_acc'] = float("Infinity")
            details['max_acc'] = float("-Infinity")
            details['min_pre_mul_bias'] = float("Infinity")
            details['max_pre_mul_bias'] = float("-Infinity")

        in_tensor = in_tensor.transpose(
            in_dims.transpose_to_order(['h', 'w', 'c']))
        if params.padding.h + params.padding.w > 0:
            in_tensor = np.pad(in_tensor,
                               ([params.padding.t, params.padding.b
                                 ], [params.padding.l, params.padding.r]) +
                               ([0, 0], ) * (np.ndim(in_tensor) - 2),
                               mode='constant',
                               constant_values=0.0)
            pad_w = params.padding.w
            pad_h = params.padding.h
        else:
            pad_w = pad_h = 0

        weights = weights.transpose(
            params.filter.transpose_to_order(['out_c', 'h', 'w', 'in_c']))

        filt_w = params.filter.w
        filt_h = params.filter.h

        in_w = in_dims.w
        in_h = in_dims.h
        out_c = params.filter.out_c

        in_c_per_group = in_dims.c // params.groups
        out_c_per_group = out_c // params.groups
        in_c_off = 0
        out_c_cnt = 0

        dillated_filter_w = (params.dilation.w - 1) * (filt_w - 1) + filt_w
        dillated_filter_h = (params.dilation.h - 1) * (filt_h - 1) + filt_h

        out_w = ((in_w - dillated_filter_w + pad_w)) // params.stride.w + 1
        out_h = ((in_h - dillated_filter_h + pad_h)) // params.stride.h + 1

        if params.has_bias:
            # biases = qrec.prepare_biases(params, params.get_uncompressed_biases(),
            #                              params.get_uncompressed_weights(), ktype="float32")
            result = np.broadcast_to(biases.reshape(out_c, 1, 1),
                                     (out_c, out_h, out_w)).copy().astype(
                                         qrec.dtype(ktype="float32"))
        else:
            result = np.zeros((out_c, out_h, out_w),
                              dtype=qrec.dtype(ktype="float32"))

        const_h = pad_h + in_h - dillated_filter_h + 1
        const_w = pad_w + in_w - dillated_filter_w + 1
        for out_c_i in range(out_dims.c):
            for cur_h in range(filt_h):
                for cur_w in range(filt_w):

                    # selects all elements that the filter element needs to multiply
                    slabhw = np.multiply(
                        in_tensor[cur_h * params.dilation.h:const_h +
                                  cur_h * params.dilation.h:params.stride.h,
                                  cur_w * params.dilation.w:const_w +
                                  cur_w * params.dilation.w:params.stride.w,
                                  in_c_off:in_c_off + in_c_per_group:1],
                        weights[out_c_i, cur_h, cur_w],
                        dtype=qrec.dtype(ktype="float32"))

                    # add depthwise
                    slabhw = slabhw.sum(axis=-1)
                    # add to the previous filter elements
                    result[out_c_i] += slabhw

                    if details is not None:
                        details['min_acc'] = min(np.min(result[out_c_i]),
                                                 details['min_acc'])
                        details['max_acc'] = max(np.max(result[out_c_i]),
                                                 details['max_acc'])

            out_c_cnt += 1
            if out_c_cnt >= out_c_per_group:
                out_c_cnt = 0
                in_c_off += in_c_per_group

        if details is not None:
            details['min_pre_mul_bias'] = min(np.min(result),
                                              details['min_pre_mul_bias'])
            details['max_pre_mul_bias'] = max(np.max(result),
                                              details['max_pre_mul_bias'])

        result = qrec.apply_multiplicative_bias(params,
                                                result,
                                                axis=0,
                                                ktype="float32")

        result = result.transpose(
            out_dims.transpose_from_order(['c', 'h', 'w']))

        return qrec.get_outputs(params, [result], ktype="float32")