Пример #1
0
    def execute(cls, params,
                in_tensors,
                qrec: QRec,
                **kwargs):

        if qrec is None:
            qrec = AllFloatQRec()
        in_tensor = qrec.prepare_inputs(params, in_tensors, ktype="float")[0]
        out_dtype = qrec.out_qs[0].dtype if qrec.ktype.startswith(
            'float') else np.float32
        output = cls.FUNC(in_tensor).astype(out_dtype)
        return qrec.get_outputs(params, [output], ktype="float")
    def execute(cls, params, in_tensors, qrec: QRec, **kwargs):

        if qrec is None:
            qrec = AllFloatQRec()
        in_tensor = qrec.prepare_inputs(params, in_tensors, ktype="float")[0]
        in_dtype = qrec.in_qs[0].dtype if qrec.ktype.startswith(
            'float') else np.float32
        return qrec.get_outputs(params, [
            np.minimum(np.maximum(in_tensor + params.offset, in_dtype(0)),
                       in_dtype(6)) / in_dtype(6)
        ],
                                ktype="float")
Пример #3
0
 def execute(cls, params,
             in_tensors,
             qrec: QRec,
             **kwargs):
     if qrec is None:
         qrec = AllFloatQRec()
     old_err = np.seterr(over='raise')
     in_tensor = qrec.prepare_inputs(params, in_tensors, ktype="float")[0]
     out_dtype = qrec.out_qs[0].dtype if qrec.ktype.startswith(
         'float') else np.float32
     in_tensor = softmax_func(in_tensor, axis=params.axis).astype(out_dtype)
     np.seterr(**old_err)
     return qrec.get_outputs(params, [in_tensor], ktype="float")
Пример #4
0
    def execute(cls, params, in_tensors, qrec: QRec, **kwargs):
        if qrec is None:
            qrec = AllFloatQRec()

        in_tensor = qrec.prepare_inputs(params, in_tensors, ktype="float")[0]
        in_dims, out_dims = tuple(dims[0]
                                  for dims in cls.calc_transposed_dims(params))
        filter_sz = params.filter.h * params.filter.w

        calc_dtype = qrec.out_qs[0].dtype if qrec.ktype.startswith(
            'float') else np.float32

        pool_factor = np.array(1.0 / filter_sz, dtype=calc_dtype)

        out_tensor = np.zeros(out_dims.shape, dtype=calc_dtype)

        if params.padding.h + params.padding.w > 0:
            in_tensor = np.pad(in_tensor,
                               params.padding.numpy_pad_shape(in_dims),
                               mode='constant',
                               constant_values=0.0)
            pad_w = params.padding.w
            pad_h = params.padding.h
        else:
            pad_w = pad_h = 0

        out_h = 0
        for h_idx in range(0, in_dims.h - params.filter.h + pad_h + 1,
                           params.stride.h):
            out_w = 0
            for w_idx in range(0, in_dims.w - params.filter.w + pad_w + 1,
                               params.stride.w):
                # accumulate - potentially with different Q
                out_slice_args = out_dims.srange(h=out_h, w=out_w)
                in_slice_args = in_dims.srange(
                    c=[0, out_dims.c, 1],
                    h=[h_idx, h_idx + params.filter.h, 1],
                    w=[w_idx, w_idx + params.filter.w, 1])

                res_shape = out_tensor[out_slice_args].shape
                sum_filter = np.sum(
                    in_tensor[in_slice_args],
                    dtype=calc_dtype,
                    axis=(out_dims.keys.index('h'),
                          out_dims.keys.index('w'))).reshape(res_shape)
                sum_filter = np.multiply(sum_filter, pool_factor)
                out_tensor[out_slice_args] = sum_filter
                out_w += 1
            out_h += 1

        return qrec.get_outputs(params, [out_tensor], ktype="float")
Пример #5
0
    def execute(cls, params, in_tensors, qrec: QRec, **kwargs):

        if qrec is None:
            qrec = AllFloatQRec()
        in_tensor = qrec.prepare_inputs(params, in_tensors, ktype="float")[0]
        if params.upper_bound is None:
            return qrec.get_outputs(
                params, [np.maximum(in_tensor, params.lower_bound)],
                ktype="float")
        return qrec.get_outputs(params, [
            np.minimum(np.maximum(in_tensor, params.lower_bound),
                       params.upper_bound)
        ],
                                ktype="float")
Пример #6
0
    def execute_piecewise(cls, params,
                          in_tensors,
                          qrec: QRec,
                          op,
                          **kwargs):
        del kwargs
        if qrec is None:
            qrec = AllFloatQRec()
        in_tensors = qrec.prepare_inputs(params, in_tensors, ktype="float")
        if isinstance(params, Broadcastable) and params.is_broadcasted:
            in_tensors = params.broadcast_inputs(in_tensors)

        out_tensor = op(in_tensors[0], in_tensors[1])
        return qrec.get_outputs(params, [out_tensor], ktype="float")
Пример #7
0
    def execute(cls, params, in_tensors, qrec: QRec, **kwargs):
        del in_tensors
        if qrec is None:
            qrec = AllFloatQRec()

        # if value_quantization is set then dequantize
        # if mutated then make a copy otherwise numpy may modify it

        if params.qtype is None:
            value = params.value if not params.is_mutated else params.value.copy(
            )
        else:
            value = params.dqvalue
        value = qrec.out_qs[0].quantize(value)
        return qrec.get_outputs(params, [value], ktype="float")
    def execute(cls, params, in_tensors, qrec: QRec, **kwargs):

        if qrec is None:
            qrec = AllFloatQRec()
        in_tensor = qrec.prepare_inputs(params, in_tensors, ktype="float")[0]
        in_dtype = qrec.in_qs[0].dtype if qrec.ktype.startswith(
            'float') else np.float32
        if params.upper_bound is None:
            return qrec.get_outputs(
                params, [np.maximum(in_tensor, in_dtype(params.lower_bound))],
                ktype="float")
        return qrec.get_outputs(params, [
            np.minimum(np.maximum(in_tensor, in_dtype(params.lower_bound)),
                       in_dtype(params.upper_bound))
        ],
                                ktype="float")
Пример #9
0
    def execute(cls, params,
                in_tensors,
                qrec: QRec,
                **kwargs):

        if qrec is None:
            qrec = AllFloatQRec()
        in_tensors = qrec.prepare_inputs(params, in_tensors, ktype="float")
        if len(in_tensors) > 2:
            biases = in_tensors[2]
            if len(biases.shape) == 1:
                biases = np.expand_dims(biases, -1)
        else:
            biases = 0
        output_tensor = np.matmul(in_tensors[0], in_tensors[1]) + biases
        return qrec.get_outputs(params, [output_tensor], ktype="float")
Пример #10
0
    def average_execute(cls, params, in_tensors, qrec: QRec, **kwargs):
        if qrec is None:
            qrec = AllFloatQRec()
        in_tensor = qrec.prepare_inputs(params, in_tensors, ktype="float")[0]

        calc_dtype = qrec.out_qs[0].dtype if qrec.ktype.startswith(
            'float') else np.float32
        sum_by_chan = np.sum(in_tensor,
                             dtype=calc_dtype,
                             axis=tuple(params.axis),
                             keepdims=params.keep_dims)
        sz = reduce(
            lambda x, y: x * y,
            [i for idx, i in enumerate(in_tensor.shape) if idx in params.axis])

        return qrec.get_outputs(
            params, [(sum_by_chan / sz).reshape(params.out_dims[0].shape)],
            ktype="float")
Пример #11
0
    def execute(cls, params, in_tensors, qrec: QRec, **kwargs):

        if qrec is None:
            qrec = AllFloatQRec()
        in_tensor = qrec.prepare_inputs(params, in_tensors, ktype="float")[0]
        in_dtype = qrec.in_qs[0].dtype if qrec.ktype.startswith(
            'float') else np.float32
        if in_dtype == np.float32:
            return qrec.get_outputs(params,
                                    [np.tanh(in_tensor).astype(in_dtype)],
                                    ktype="float")
        else:
            if qrec.cache.get('kernel_type') == "lut":
                return qrec.get_outputs(params,
                                        [tanh_lut_float(in_tensor, in_dtype)],
                                        ktype="float")
            return qrec.get_outputs(
                params, [np_fasttanh(in_tensor, dtype=in_dtype, doalt=True)],
                ktype="float")
Пример #12
0
    def execute(cls, params, in_tensors, qrec: QRec, **kwargs):

        if qrec is None:
            qrec = AllFloatQRec()
        in_tensor = in_tensors[params.index]
        if in_tensor.size == params.dims.size():
            if len(in_tensor.shape) == len(params.dims.shape):
                in_shape = tuple(dim for dim in in_tensor.shape if dim > 1)
                expected_shape = tuple(dim for dim in params.dims.shape
                                       if dim > 1)
                if in_shape != expected_shape:
                    raise ValueError(
                        f'{params.name} received input of shape {in_tensor.shape} but expecting {params.dims.shape}'
                    )
            in_tensor = in_tensor.reshape(params.dims.shape)
        else:
            in_tensor = resize(in_tensor, params.dims.shape)
        if params.transpose_out:
            in_tensor = np.transpose(in_tensor, params.transpose_out)
        return qrec.get_outputs(params, [in_tensor], ktype="float")
Пример #13
0
 def execute(cls, params, in_tensors, qrec: QRec, **kwargs):
     if qrec is None:
         qrec = AllFloatQRec()
     in_tensors = qrec.prepare_inputs(params, in_tensors, ktype="float")
     offsets = in_tensors[0]
     scores = in_tensors[1]
     anchors = in_tensors[2]
     decoded_bboxes, valid_scores = cls.decoder(params,
                                                qrec,
                                                offsets,
                                                anchors,
                                                scores,
                                                anchors_type='centers')
     out_boxes, out_scores, out_classes = cls.nms(params, qrec,
                                                  decoded_bboxes,
                                                  valid_scores)
     out_count = np.array([sum(out_classes != 0)])
     return qrec.get_outputs(
         params, [out_boxes, out_classes, out_scores, out_count],
         ktype="float")
    def execute(cls, params, in_tensors, qrec: QRec, **kwargs):

        if qrec is None:
            qrec = AllFloatQRec()
        in_tensor = in_tensors[params.index]
        if in_tensor.size == params.dims.size():
            if len(in_tensor.shape) == len(params.dims.shape):
                in_shape = tuple(dim for dim in in_tensor.shape if dim > 1)
                expected_shape = tuple(dim for dim in params.dims.shape
                                       if dim > 1)
                if in_shape != expected_shape:
                    raise ValueError(
                        f'{params.name} received input of shape {in_tensor.shape} but expecting {params.dims.shape}'
                    )
            in_tensor = in_tensor.reshape(params.dims.shape)
        else:
            in_tensor = resize(in_tensor, params.dims.shape)
        out_dtype = qrec.out_qs[0].dtype if qrec.ktype.startswith(
            'float') else (
                params.imported_dtype if params.imported_dtype else np.float32)
        in_tensor = in_tensor.astype(out_dtype)
        return qrec.get_outputs(params, [in_tensor], ktype="float")
Пример #15
0
    def execute(cls,
                params: Parameters,
                input_tensors: Sequence[np.ndarray],
                qrec: QRec,
                details: str = None) -> Sequence[np.ndarray]:
        if params.__class__ not in HANDLERS:
            raise ValueError(
                f"no handlers found for {params.__class__.__name__}")
        handlers = HANDLERS[params.__class__]
        if qrec is None:
            qrec = AllFloatQRec()
        handler = handlers.get(qrec.ktype)
        if handler is None:
            handler = handlers.get('any')
        if handler is None:
            raise ValueError(
                f"no handlers found for {params.__class__.__name__} quantization {qrec.ktype}"
            )

        if isinstance(params, Transposable) and params.transpose_in:
            input_tensors = [(np.transpose(in_tensor, params.transpose_in[idx])
                              if params.transpose_in[idx] else in_tensor)
                             for idx, in_tensor in enumerate(input_tensors)]

        output_tensors = handler.execute(params,
                                         input_tensors,
                                         qrec,
                                         details=details,
                                         qname=qrec.ktype)

        if isinstance(params, Transposable) and params.transpose_out:
            output_tensors = [
                (np.transpose(out_tensor, params.transpose_out[idx])
                 if params.transpose_out[idx] else out_tensor)
                for idx, out_tensor in enumerate(output_tensors)
            ]
        return output_tensors
Пример #16
0
    def execute(cls, params, in_tensors, qrec: QRec, **kwargs):

        if qrec is None:
            qrec = AllFloatQRec()
        in_tensor = qrec.prepare_inputs(params, in_tensors, ktype="float")[0]
        in_dim, out_dim = params.in_dims[0], params.out_dims[0]
        in_tensor = in_tensor.transpose(
            in_dim.transpose_to_order(("h", "w", "c")))
        w_out = out_dim.w
        h_out = out_dim.h
        c_out = out_dim.c
        w_in = in_dim.w
        h_in = in_dim.h
        wstep = (w_in - 1) / w_out
        hstep = (h_in - 1) / h_out
        out_tensor = np.empty((h_out, w_out, c_out))
        out_dtype = qrec.out_qs[0].dtype if qrec.ktype.startswith(
            'float') else np.float32
        for i in range(h_out):
            y_l, y_h = math.floor(hstep * i), math.ceil(hstep * i)
            hc = out_dtype((hstep * i) - y_l)
            for j in range(w_out):
                x_l, x_h = math.floor(wstep * j), math.ceil(wstep * j)
                wc = out_dtype((wstep * j) - x_l)
                P1 = in_tensor[y_l, x_l, :]
                P2 = in_tensor[y_l, x_h, :]
                P3 = in_tensor[y_h, x_l, :]
                P4 = in_tensor[y_h, x_h, :]
                out_tensor[i, j, :] = P1 * (out_dtype(1) - wc) * (out_dtype(1) - hc) \
                    + P2 * wc * (out_dtype(1) - hc) \
                    + P3 * (out_dtype(1) - wc) * hc \
                    + P4 * wc * hc

        out_tensor = out_tensor.transpose(
            out_dim.transpose_from_order(("h", "w", "c")))
        return qrec.get_outputs(params, [out_tensor], ktype="float")
Пример #17
0
    def execute(cls, params,
                in_tensors,
                qrec: QRec,
                **kwargs):
        '''3D convolution by sub-matrix summing.
        '''
        details = kwargs.get('details')
        if qrec is None:
            qrec = AllFloatQRec()
        in_dims, out_dims = params.in_dims[0], params.out_dims[0]
        prepared_in_tensors = qrec.prepare_inputs(
            params, in_tensors, ktype="float")
        in_tensor = prepared_in_tensors[0]
        weights = prepared_in_tensors[1]
        biases = prepared_in_tensors[2]

        if details is not None:
            details['min_acc'] = float("Infinity")
            details['max_acc'] = float("-Infinity")
            details['min_pre_mul_bias'] = float("Infinity")
            details['max_pre_mul_bias'] = float("-Infinity")

        in_tensor = in_tensor.transpose(
            in_dims.transpose_to_order(['h', 'w', 'c']))
        if params.padding.h + params.padding.w > 0:
            in_tensor = np.pad(in_tensor,
                               ([params.padding.t,
                                 params.padding.b],
                                [params.padding.l,
                                 params.padding.r])
                               + ([0, 0], ) * (np.ndim(in_tensor)-2),
                               mode='constant',
                               constant_values=0.0)
            pad_w = params.padding.w
            pad_h = params.padding.h
        else:
            pad_w = pad_h = 0

        weights = weights.transpose(params.filter.transpose_to_order(
            ['out_c', 'h', 'w', 'in_c']))

        filt_w = params.filter.w
        filt_h = params.filter.h

        in_w = in_dims.w
        in_h = in_dims.h
        out_c = params.filter.out_c

        in_c_per_group = in_dims.c // params.groups
        out_c_per_group = out_c // params.groups
        in_c_off = 0
        out_c_cnt = 0

        dillated_filter_w = (params.dilation.w - 1) * (filt_w - 1) + filt_w
        dillated_filter_h = (params.dilation.h - 1) * (filt_h - 1) + filt_h

        out_w = ((in_w - dillated_filter_w + pad_w)) // params.stride.w + 1
        out_h = ((in_h - dillated_filter_h + pad_h)) // params.stride.h + 1

        calc_dtype = qrec.out_qs[0].dtype if qrec.ktype.startswith(
            'float') else np.float32

        if params.has_bias:
            # biases = qrec.prepare_biases(params, params.get_uncompressed_biases(),
            #                              params.get_uncompressed_weights(), ktype="float")
            result = np.broadcast_to(biases.reshape(
                out_c, 1, 1), (out_c, out_h, out_w)).copy().astype(calc_dtype)
        else:
            result = np.zeros((out_c, out_h, out_w),
                              dtype=calc_dtype)

        const_h = pad_h + in_h - dillated_filter_h + 1
        const_w = pad_w + in_w - dillated_filter_w + 1
        for out_c_i in range(out_dims.c):
            for cur_h in range(filt_h):
                for cur_w in range(filt_w):

                    # selects all elements that the filter element needs to multiply
                    slabhw = np.multiply(in_tensor[cur_h * params.dilation.h:
                                                   const_h + cur_h * params.dilation.h:
                                                   params.stride.h,
                                                   cur_w * params.dilation.w:
                                                   const_w + cur_w * params.dilation.w:
                                                   params.stride.w,
                                                   in_c_off:
                                                   in_c_off + in_c_per_group:
                                                   1],
                                         weights[out_c_i, cur_h, cur_w],
                                         dtype=calc_dtype)

                    # add depthwise
                    slabhw = slabhw.sum(axis=-1)
                    # add to the previous filter elements
                    result[out_c_i] += slabhw

                    if details is not None:
                        details['min_acc'] = min(
                            np.min(result[out_c_i]), details['min_acc'])
                        details['max_acc'] = max(
                            np.max(result[out_c_i]), details['max_acc'])

            out_c_cnt += 1
            if out_c_cnt >= out_c_per_group:
                out_c_cnt = 0
                in_c_off += in_c_per_group

        if details is not None:
            details['min_pre_mul_bias'] = min(
                np.min(result), details['min_pre_mul_bias'])
            details['max_pre_mul_bias'] = max(
                np.max(result), details['max_pre_mul_bias'])

        result = apply_multiplicative_bias(qrec,
                                           params, result, axis=0, ktype="float")

        result = result.transpose(
            out_dims.transpose_from_order(['c', 'h', 'w']))

        return qrec.get_outputs(params, [result], ktype="float")
    def execute(cls, params,
                in_tensors,
                qrec: QRec,
                **kwargs):
        if qrec is None:
            qrec = AllFloatQRec()
        in_tensors = qrec.prepare_inputs(params, in_tensors, ktype="float")
        offsets = in_tensors[0]
        scores = in_tensors[1]
        anchors = in_tensors[2]

        anchors_type = "centers"
        if anchors_type == 'centers':
            anchors_cnts = anchors
        else:
            anchors_cnts = convert_cors2cnts(anchors)
        score_threshold = params.nms_score_threshold
        decoded_bboxes = []
        for i in range(scores.shape[0]):
            for j in range(scores.shape[1]):
                if len(decoded_bboxes) > params.max_bb_before_nms:
                    break
                if scores[i, j] <= score_threshold:
                    continue
                offset = offsets[i]
                anchor = anchors[i]
                xcenter = (offset[CNTX_IDX]/params.x_scale) * anchor[W_IDX] + anchor[CNTX_IDX]
                ycenter = (offset[CNTY_IDX]/params.y_scale) * anchor[H_IDX] + anchor[CNTY_IDX]

                half_h = 0.5 * np.exp(offset[H_IDX]/params.h_scale) * anchor[H_IDX]
                half_w = 0.5 * np.exp(offset[W_IDX]/params.w_scale) * anchor[W_IDX]

                decoded_bboxes.append({
                                       "bbox": [ycenter-half_h, xcenter-half_w, ycenter+half_h, xcenter+half_w],
                                       "score": scores[i, j],
                                       "class": j,
                                       "alive": True
                                      })

        # Bubble sort to sort the scores
        changed = True
        while changed:
            changed = False
            for i in range(len(decoded_bboxes)-1):
                if decoded_bboxes[i]['score'] < decoded_bboxes[i+1]['score']:
                    temp = decoded_bboxes[i]
                    decoded_bboxes[i] = decoded_bboxes[i+1]
                    decoded_bboxes[i+1] = temp
                    changed = True

        # NMS
        for idx in range(len(decoded_bboxes)):
            for idx_int in range(idx+1, len(decoded_bboxes)):
                if (not decoded_bboxes[idx_int]['alive']) or (decoded_bboxes[idx]['class'] != decoded_bboxes[idx_int]['class']):
                    continue
                intersection = rect_intersect_area(decoded_bboxes[idx]['bbox'], decoded_bboxes[idx_int]['bbox'])
                union = rect_union_area(decoded_bboxes[idx]['bbox'], decoded_bboxes[idx_int]['bbox'])
                if intersection >= (params.nms_iou_threshold * union):
                    decoded_bboxes[idx_int]['alive'] = False

        out_boxes = np.zeros((params.max_detections, 4), dtype=qrec.out_qs[0].dtype)
        out_classes = np.zeros(params.max_detections, dtype=qrec.out_qs[1].dtype)
        out_scores = np.zeros(params.max_detections, dtype=qrec.out_qs[2].dtype)
        out_idx = 0
        for i in range(len(decoded_bboxes)):
            if out_idx >= params.max_detections:
                break
            bbox = decoded_bboxes[i]
            if bbox['alive']:
                out_boxes[out_idx] = bbox['bbox']
                out_classes[out_idx] = bbox['class']
                out_scores[out_idx] = bbox['score']
                out_idx += 1

        # decoded_bboxes, valid_scores = cls.decoder(
        #     params, qrec, offsets, anchors, scores, anchors_type='centers')
        # out_boxes, out_scores, out_classes = cls.nms(
        #     params, qrec, decoded_bboxes, valid_scores)
        # out_count = np.array([sum(out_classes != 0)])
        return qrec.get_outputs(params, [out_boxes, out_classes, out_scores], ktype="float")
Пример #19
0
    def execute(cls, params, in_tensors, qrec: QRec, **kwargs):

        if qrec is None:
            qrec = AllFloatQRec()
        in_tensor = qrec.prepare_inputs(params, in_tensors, ktype="float")[0]
        return qrec.get_outputs(params, [np.tanh(in_tensor)], ktype="float")
Пример #20
0
    def execute(cls, params, in_tensors, qrec: QRec, **kwargs):
        details = kwargs.get('details')
        if qrec is None:
            qrec = AllFloatQRec()

        in_dims, out_dims = tuple(dims[0]
                                  for dims in cls.calc_transposed_dims(params))

        prepared_in_tensors = qrec.prepare_inputs(params,
                                                  in_tensors,
                                                  ktype="float")

        in_tensor = prepared_in_tensors[0]
        weights = prepared_in_tensors[1]
        biases = prepared_in_tensors[2]

        if details is not None:
            details['min_acc'] = float("Infinity")
            details['max_acc'] = float("-Infinity")

        calc_dtype = qrec.out_qs[0].dtype if qrec.ktype.startswith(
            'float') else np.float32

        if params.has_bias:
            acc_tensor = np.ones(out_dims.shape, dtype=calc_dtype) * biases
        else:
            acc_tensor = np.zeros(out_dims.shape, dtype=calc_dtype)
        if params.batch_size > 1:
            in_tensor = in_tensor.reshape(
                (params.batch_size, in_dims.size() // params.batch_size))
            # weights will already be transposed at import
            acc_tensor += np.dot(in_tensor, weights)
            details['min_acc'] = np.min(acc_tensor)
            details['max_acc'] = np.max(acc_tensor)
            acc_tensor = apply_multiplicative_bias(qrec,
                                                   params,
                                                   acc_tensor,
                                                   1,
                                                   ktype="float")
            if params.batch_minor:
                acc_tensor = acc_tensor.transpose(1, 0)
        else:
            in_tensor = in_tensor.reshape((in_dims.size()))
            filt = params.filter.get_filter_dims()

            for out_c in range(out_dims.c):
                # Expand and normalize the accumulator

                w_slice = weights[filt.srange(out_c=out_c)].reshape(
                    (in_dims.size()))

                res = np.dot(in_tensor, w_slice)

                if details is not None:
                    details['min_acc'] = min(np.sum(res[res < 0]),
                                             details['min_acc'])
                    details['max_acc'] = min(np.sum(res[res > 0]),
                                             details['max_acc'])

                acc_tensor[out_c] += res

                if details is not None:
                    details['min_acc'] = min(np.min(acc_tensor[out_c]),
                                             details['min_acc'])
                    details['max_acc'] = max(np.max(acc_tensor[out_c]),
                                             details['max_acc'])

            acc_tensor = apply_multiplicative_bias(qrec,
                                                   params,
                                                   acc_tensor,
                                                   0,
                                                   ktype="float")

        return qrec.get_outputs(params, [acc_tensor], ktype="float")
Пример #21
0
    def execute(cls, params, in_tensors, qrec: QRec, **kwargs):

        if qrec is None:
            qrec = AllFloatQRec()
        details = kwargs['details']
        in_tensor = qrec.prepare_inputs(params, in_tensors, ktype="float")[0]
        args = {
            params.INPUT_NAMES[idx]: in_tensors[idx]
            for idx in range(1, len(in_tensors))
        }
        if params.always_reset_state:
            for state_key in params.STATE_PARAMETERS:
                args[state_key] = args[state_key].copy()
        assert in_tensor.shape[
            0] == params.n_input_cells, "input shape incorrect - n_input_cells"
        assert in_tensor.shape[
            1] == params.n_inputs, "input shape incorrect - n_inputs"
        if params.revert:
            in_tensor = np.flip(in_tensor, axis=0)
        out_tensor = np.zeros([params.n_output_cells, params.n_states])
        out_idx = 0
        if details is not None:
            details['range_state'] = {
                'min': float('inf'),
                'max': float('-inf')
            }
            if isinstance(params, LSTMParameters):
                details['range_cell'] = {
                    'min': float('inf'),
                    'max': float('-inf')
                }

        new_c_state = None
        for idx in range(params.n_cells):
            if isinstance(params, LSTMParameters):
                res, new_c_state = cls.step_kernel(params,
                                                   args,
                                                   idx,
                                                   in_tensor,
                                                   details=details)
            else:
                res = cls.step_kernel(params,
                                      args,
                                      idx,
                                      in_tensor,
                                      details=details)
            if idx >= (params.n_cells - params.n_output_cells):
                out_tensor[out_idx] = res
                out_idx += 1

            if details is not None:
                details['range_state']['min'] = min(
                    details['range_state']['min'], res.min())
                details['range_state']['max'] = max(
                    details['range_state']['max'], res.max())
                if isinstance(params, LSTMParameters):
                    details['range_cell']['min'] = min(
                        details['range_cell']['min'], args['c_state'].min())
                    details['range_cell']['max'] = max(
                        details['range_cell']['max'], args['c_state'].max())

        if params.revert:
            out_tensor = np.flip(out_tensor, axis=0)
        if params.output_directions:
            out_tensor = np.expand_dims(out_tensor, 0)
        if new_c_state is not None:
            return [out_tensor, new_c_state]
        return [out_tensor]
Пример #22
0
    def execute(cls, params, in_tensors, qrec: QRec, **kwargs):

        if qrec is None:
            qrec = AllFloatQRec()
        details = kwargs['details']
        in_tensor = qrec.prepare_inputs(params, in_tensors, ktype="float")[0]
        args = {
            params.INPUT_NAMES[idx]: in_tensors[idx]
            for idx in range(1, len(in_tensors))
        }
        if params.always_reset_state:
            for state_key in params.STATE_PARAMETERS:
                args[state_key] = args[state_key].copy()
        assert in_tensor.shape[
            0] == params.n_input_cells, "input shape incorrect - n_input_cells"
        assert in_tensor.shape[
            1] == params.n_inputs, "input shape incorrect - n_inputs"
        if params.revert:
            in_tensor = np.flip(in_tensor, axis=0)
        out_dtype = qrec.out_qs[0].dtype if qrec.ktype.startswith(
            'float') else np.float32
        out_tensor = np.zeros([params.n_output_cells, params.n_states],
                              dtype=out_dtype)
        out_idx = 0
        if details is not None:
            init_stats(details, 'range_state')
            if isinstance(params, LSTMParameters):
                DiagCollector.active_set('__rnn_quant')
                init_stats(
                    details,
                    'range_cell',
                )
            elif isinstance(params, GRUParameters):
                DiagCollector.active_set('__rnn_quant')

        new_c_state = None
        for idx in range(params.n_cells):
            if isinstance(params, LSTMParameters):
                res, new_c_state = cls.step_kernel(params,
                                                   args,
                                                   idx,
                                                   in_tensor,
                                                   details=details)
            else:
                res = cls.step_kernel(params,
                                      args,
                                      idx,
                                      in_tensor,
                                      details=details)
            if idx >= (params.n_cells - params.n_output_cells):
                out_tensor[out_idx] = res
                out_idx += 1

            if details is not None:
                record_stat(details, 'range_state', res)
                if isinstance(params, LSTMParameters):
                    record_stat(details, 'range_cell', args['c_state'])

        if details is not None:
            if isinstance(params, LSTMParameters):
                DiagCollector.store_ranges(
                    details,
                    '__rnn_quant',
                    'i_gate_i',
                    'c_gate_i',
                    'f_gate_i',
                    'o_gate_i',
                    'i_gate_r',
                    'c_gate_r',
                    'f_gate_r',
                    'o_gate_r',
                    'i_gate',
                    'c_gate',
                    'f_gate',
                    'o_gate',
                )
                DiagCollector.deactivate()
                DiagCollector.clear(set_name='__rnn_quant')
            elif isinstance(params, GRUParameters):
                DiagCollector.store_ranges(
                    details,
                    '__rnn_quant',
                    'z_gate_inp',
                    'r_gate_inp',
                    'h_gate_inp',
                    'z_gate_state',
                    'r_gate_state',
                    'h_gate_state',
                    'z_gate',
                    'r_gate',
                    'h_gate',
                )
                DiagCollector.deactivate()
                DiagCollector.clear(set_name='__rnn_quant')

        if params.revert:
            out_tensor = np.flip(out_tensor, axis=0)
        if params.output_directions:
            out_tensor = np.expand_dims(out_tensor, 0)
        if new_c_state is not None:
            return [out_tensor, new_c_state]
        return [out_tensor]