def decoder(params, qrec, offsets, anchors, scores, anchors_type='centers'): if anchors_type == 'centers': anchors_cnts = anchors else: anchors_cnts = convert_cors2cnts(anchors) out_boxes_q = qrec.out_qs[0] scores_q = qrec.in_qs[1] score_threshold = scores_q.quantize(params.nms_score_threshold) # keep only the offsets with score > threshold bboxes_indices = np.arange(offsets.shape[0]) valid_indices = bboxes_indices[np.any(scores > score_threshold, axis=1)] valid_scores = scores[valid_indices] valid_offsets = offsets[valid_indices] valid_anchors = anchors_cnts[valid_indices] qrec.set_scales(params) # xcnt, ycnt --> Q14 # xcnt = (So*O * Sa*Aw)/params.x_scale + Sa*Ax = So*Sa/params.x_scale (O*Aw + x_scale/So * Ax) = # (scale_x * (O*Aw + (scale_x_anc*Ax)>>scale_x_ancNorm))>>scale_xNorm = # at_norm(scale_x*(O*Aw + at_norm(scale_x_anc*Ax, scale_x_ancNorm)), scale_xNorm) xcenter = qrec.scale_x_q.apply_scales( np.multiply(valid_offsets[:, CNTX_IDX], valid_anchors[:, W_IDX], dtype=np.int32) + \ qrec.scale_x_anc_q.apply_scales(valid_anchors[:, CNTX_IDX]) ) ycenter = qrec.scale_y_q.apply_scales( np.multiply(valid_offsets[:, CNTY_IDX], valid_anchors[:, H_IDX], dtype=np.int32) + \ qrec.scale_y_anc_q.apply_scales(valid_anchors[:, CNTY_IDX]) ) # half_h, half_w --> Q14 # half_h = exp(So*Off / params.h_scale) * Sa*A = Sa/So * exp(So/params.h_scale *O) * A = # (scale_ao * (A* exp17.15(scale_h*O<<15-scale_hNorm))>>scale_aoNorm) = # at_norm(scale_ao*(A*exp17.15(scale_h*O<<15-scale_hNorm)), scale_aoNorm) norm_h = 15 - qrec.scale_h_q.qnorms norm_w = 15 - qrec.scale_w_q.qnorms exp_h = exp_fp_17_15(np.multiply(valid_offsets[:, H_IDX], int(qrec.scale_h_q.qbiases), dtype=np.int32) << norm_h) exp_w = exp_fp_17_15(np.multiply(valid_offsets[:, W_IDX], int(qrec.scale_w_q.qbiases), dtype=np.int32) << norm_w) half_h = qrec.scale_ao_q.apply_scales(np.multiply(exp_h, valid_anchors[:, H_IDX], dtype=np.int32)) >> 1 half_w = qrec.scale_ao_q.apply_scales(np.multiply(exp_w, valid_anchors[:, W_IDX], dtype=np.int32)) >> 1 # min-max or corners format: required for nms decoded_anchors = np.zeros_like(valid_anchors, dtype=out_boxes_q.dtype) decoded_anchors[:, YMIN_IDX] = ycenter - half_h decoded_anchors[:, YMAX_IDX] = ycenter + half_h decoded_anchors[:, XMIN_IDX] = xcenter - half_w decoded_anchors[:, XMAX_IDX] = xcenter + half_w return decoded_anchors, valid_scores
def pow_17_15(x, y): if isinstance(y, int): y = np.full_like(x, y) elif y.size == 1 and x.size > 1: y = np.full_like(x, y[0]) assert np.all(y >= 0), "only postive exponents currently supported" assert np.all( y >> 15 == 0), "only fractional exponents currently supported" return exp_fp_17_15(gap_roundnorm(y * logn_17_15(x), 15))
def execute(cls, params, in_tensors, qrec: QRec, **kwargs): in_tensor = in_tensors[0].flatten() max_val = np.max(in_tensor) norm = 15 + np.ceil(np.log2(qrec.in_qs[0].scale)).astype(np.int32) exp = exp_fp_17_15((in_tensor.astype(np.int32) - max_val) << (norm)) sum_exp = np.sum(exp) inv_sum = (np.array([(1 << 15) - 1], dtype=np.uint32) << 15) // sum_exp res = np.abs((exp * inv_sum + (1 << 14)) >> 15) iinfo = np.iinfo(np.int16) res = np.clip(res, iinfo.min, iinfo.max).astype(np.int16).reshape( params.out_dims[0].shape) return qrec.get_outputs(params, [res], ktype="symmetric")
def pow_17_15(x, y): x = np.atleast_1d(x) y = np.atleast_1d(y) if isinstance(y, int): y = np.full_like(x, y) elif y.size == 1 and x.size > 1: y = np.full_like(x, y[0]) assert np.all(y >= 0), "only postive exponents currently supported" assert np.all( y >> 15 == 0), "only fractional exponents currently supported" return np.where(x == 0, np.where(y == 0, 1 << 15, 0), exp_fp_17_15(gap_roundnorm(y * logn_17_15(x), 15)))
def pow_17_15(x, y): x = np.atleast_1d(x) y = np.atleast_1d(y) clip_bits = gap_clb(y) limit_high = (1 << clip_bits) - 1 limit_low = -(1 << clip_bits) if isinstance(y, int): y = np.full_like(x, y) elif y.size == 1 and x.size > 1: y = np.full_like(x, y[0]) assert np.all(y >= 0), "only postive exponents currently supported" assert np.all(y >> 15 == 0), "only fractional exponents currently supported" return np.where(x == 0, np.where(y == 0, np.int32(1<<15), np.int32(0)), exp_fp_17_15(gap_roundnorm(y * np.clip(logn_17_15(x), limit_low, limit_high), 15))).astype(np.int32)
def softmax_sq8(params, in_tensors, qrec: QuantizationRecordBase, details=None): del details in_tensor = in_tensors[0].flatten() max_val = np.max(in_tensor) norm = 15 + np.ceil(np.log2(qrec.in_qs[0].scale)).astype(np.int32) exp = exp_fp_17_15((in_tensor.astype(np.int32) - max_val) << (norm)) sum_exp = np.sum(exp) inv_sum = (np.array([(1 << 15) - 1], dtype=np.uint32) << 15) // sum_exp res = np.abs((exp * inv_sum + (1 << 14)) >> 15) iinfo = np.iinfo(np.int16) res = np.clip(res, iinfo.min, iinfo.max).astype(np.int16).reshape(params.out_dims[0].shape) return qrec.get_outputs(params, [res], ktype="symmetric")
def _impl(self, *args, **kwargs): return exp_fp_17_15(np.array(args[0]))
def execute(cls, params, in_tensors, qrec: QRec, **kwargs): in_tensors = qrec.prepare_inputs(params, in_tensors, ktype="symmetric") offsets = in_tensors[0] scores = in_tensors[1] anchors = in_tensors[2] # decoded_bboxes: Q14 # valid_scores: Q7 anchors_type = "centers" if anchors_type == 'centers': anchors_cnts = anchors else: anchors_cnts = convert_cors2cnts(anchors) set_ssd_scales(qrec, params) scores_q = qrec.in_qs[1] score_threshold = scores_q.quantize(params.nms_score_threshold) decoded_bboxes = [] for i in range(scores.shape[0]): for j in range(scores.shape[1]): if len(decoded_bboxes) > params.max_bb_before_nms: break if scores[i, j] <= score_threshold: continue offset = offsets[i] anchor = anchors[i] # xcnt, ycnt --> Q14 # xcnt = (So*O * Sa*Aw)/params.x_scale + Sa*Ax = So*Sa/params.x_scale (O*Aw + x_scale/So * Ax) = # (scale_x * (O*Aw + (scale_x_anc*Ax)>>scale_x_ancNorm))>>scale_xNorm = # at_norm(scale_x*(O*Aw + at_norm(scale_x_anc*Ax, scale_x_ancNorm)), scale_xNorm) xcenter = qrec.cache['scale_x_q'].apply_scales( np.multiply( offset[CNTX_IDX], anchor[W_IDX], dtype=np.int32) + qrec.cache['scale_x_anc_q'].apply_scales(anchor[CNTX_IDX])) ycenter = qrec.cache['scale_y_q'].apply_scales( np.multiply( offset[CNTY_IDX], anchor[H_IDX], dtype=np.int32) + qrec.cache['scale_y_anc_q'].apply_scales(anchor[CNTY_IDX])) # half_h, half_w --> Q14 # half_h = exp(So*Off / params.h_scale) * Sa*A = Sa/So * exp(So/params.h_scale *O) * A = # (scale_ao * (A* exp17.15(scale_h*O<<15-scale_hNorm))>>scale_aoNorm) = # at_norm(scale_ao*(A*exp17.15(scale_h*O<<15-scale_hNorm)), scale_aoNorm) norm_h = 15 - qrec.cache['scale_h_q'].qnorms norm_w = 15 - qrec.cache['scale_w_q'].qnorms exp_h = exp_fp_17_15( np.multiply(offset[H_IDX], int(qrec.cache['scale_h_q'].qbiases), dtype=np.int32) << norm_h) exp_w = exp_fp_17_15( np.multiply(offset[W_IDX], int(qrec.cache['scale_w_q'].qbiases), dtype=np.int32) << norm_w) half_h = qrec.cache['scale_ao_q'].apply_scales( np.multiply(exp_h, anchor[H_IDX], dtype=np.int32)) >> 1 half_w = qrec.cache['scale_ao_q'].apply_scales( np.multiply(exp_w, anchor[W_IDX], dtype=np.int32)) >> 1 decoded_bboxes.append({ "bbox": [ ycenter - half_h, xcenter - half_w, ycenter + half_h, xcenter + half_w ], "score": scores[i, j], "class": j, "alive": True }) # Bubble sort to sort the scores changed = True while changed: changed = False for i in range(len(decoded_bboxes) - 1): if decoded_bboxes[i]['score'] < decoded_bboxes[i + 1]['score']: temp = decoded_bboxes[i] decoded_bboxes[i] = decoded_bboxes[i + 1] decoded_bboxes[i + 1] = temp changed = True # NMS for idx in range(len(decoded_bboxes)): for idx_int in range(idx + 1, len(decoded_bboxes)): if (not decoded_bboxes[idx_int]['alive']) or ( decoded_bboxes[idx]['class'] != decoded_bboxes[idx_int]['class']): continue intersection = rect_intersect_area( decoded_bboxes[idx]['bbox'], decoded_bboxes[idx_int]['bbox']) union = rect_union_area(decoded_bboxes[idx]['bbox'], decoded_bboxes[idx_int]['bbox']) if intersection >= at_norm( scores_q.quantize(params.nms_iou_threshold) * union, 7): decoded_bboxes[idx_int]['alive'] = False out_boxes = np.zeros((params.max_detections, 4), dtype=qrec.out_qs[0].dtype) out_classes = np.zeros(params.max_detections, dtype=qrec.out_qs[1].dtype) out_scores = np.zeros(params.max_detections, dtype=qrec.out_qs[2].dtype) out_idx = 0 for i in range(len(decoded_bboxes)): if out_idx >= params.max_detections: break bbox = decoded_bboxes[i] if bbox['alive']: out_boxes[out_idx] = bbox['bbox'] out_classes[out_idx] = bbox['class'] out_scores[out_idx] = bbox['score'] out_idx += 1 # decoded_bboxes, valid_scores = cls.decoder( # params, qrec, offsets, anchors, scores, anchors_type='centers') # out_boxes, out_scores, out_classes = cls.nms(params, qrec, decoded_bboxes, valid_scores) # out_count = np.array([sum(out_classes != 0)]) return qrec.get_outputs(params, [out_boxes, out_classes, out_scores], ktype="symmetric")