def nms(input_scores, input_boxes, threshold=0.7, numDetections=300, score_threshold=None, debugContext=''): load_lib() input_scores = input_scores.cast('FLOAT') input_boxes = input_boxes.cast('FLOAT') valid_area_mask = bF.transpose(get_valid_area_mask(input_boxes), [1, 0]) # 1,n input_scores = input_scores + 1e-6 # if score==0, proposals will be ignored local_input_scores = bF.identity(input_scores * valid_area_mask, debugContext=debugContext).detach() local_input_boxes = bF.identity(input_boxes, debugContext=debugContext).detach() if local_input_scores.shape.ndims == 1: local_input_scores = local_input_scores.unsqueeze(0) if local_input_boxes.shape.ndims == 2: local_input_boxes = local_input_boxes.unsqueeze(0) assert local_input_boxes.pureShape[0] == 1, 'only implemented batch=1' if score_threshold is not None: assert isinstance(score_threshold, float) local_mask = bF.greater( local_input_scores, bF.to_tensor(score_threshold, dtype=local_input_scores.dtype)) local_mask = bF.cast(local_mask, target_type=local_input_scores.dtype) local_input_scores = local_input_scores * local_mask with bF.name_scope("nms"): out = bF.get_builder().customOp(opName="nms", opVersion=1, domain="ai.graphcore", inputs=[ local_input_scores.getIpuIndex(), local_input_boxes.getIpuIndex() ], attributes={ "threshold": threshold, "numDetections": numDetections }, numOutputs=3, name="nmsCustomOp") # _, output_boxes, output_keep = out[0], bF.TTensor(out[1]), bF.TTensor( out[2]) targetType = input_scores.dtype roiKeeps_flag = bF.cast(bF.greater( output_keep, bF.constant(np.asarray(-1, dtype=np.int32))), target_type='INT32') num_valids = bF.reduceSum(roiKeeps_flag, axes=[1]) roiKeeps_flag = bF.cast(roiKeeps_flag, target_type=targetType) roiKeeps_flag = bF.unsqueeze(roiKeeps_flag, [-1]) output_boxes = bF.mul([output_boxes, roiKeeps_flag]) return output_boxes, output_keep, num_valids
def cast(x, dtype, name=''): if dtype == 'float32': dtype = 'float' if type(x) in [int, float]: return constant(x, dtype=dtype, name=name) elif isinstance(x, bF.TTensor): return bF.cast(x, dtype, name) else: raise NotImplementedError('type not implemented')
def get_valid_area_mask(boxes): # input boxes: 1,n,4 # output: mask: n,1 ws = boxes[:, :, 2] - boxes[:, :, 0] hs = boxes[:, :, 3] - boxes[:, :, 1] areas = ws * hs valid_flags = bF.greater( areas, bF.constant(np.asarray(0.0, dtype=bF.mappin_gc2npy[areas.dtype]))) valid_mask = bF.cast(valid_flags, target_type=boxes.dtype) return bF.transpose(valid_mask, [1, 0])
def random_shuffle(x, seed=None, debugPrefix=""): if seed is not None: raise RuntimeError( 'random seed is globally set by session.setRandomSeed') with bF.name_scope(debugPrefix): x = bF.cast(x, 'FLOAT') seeds = bF.randomuniformlike(x, high=6.0, low=-6.0) flatten_seeds = bF.flatten(seeds) flatten_seeds_shape = flatten_seeds.pureShape _K = bF.constant(np.asarray([flatten_seeds_shape[0]]).astype(np.int64)) _, shuffle_indices = bF.topk(flatten_seeds, _K, dim=0) flatten_x = bF.flatten(x) shuffle_indices = bF.cast(shuffle_indices, 'INT32') shuffled_flatten_x = bF.gather( flatten_x, shuffle_indices, dim=0, ) x_shape = x.pureShape target_shape = bF.constant(np.asarray(x_shape).astype(np.int64)) shuffled_x = bF.reshape(shuffled_flatten_x, target_shape) return shuffled_x
def select_by_idx(arr, unsigned_keeps, dim): depth = arr.pureShape[dim] one_hot_selector = bF.one_hot(unsigned_keeps, depth) one_hot_selector = bF.cast(one_hot_selector, arr.dtype) new_arr = bF.matmul(one_hot_selector, arr) return new_arr