示例#1
0
def transpose(a, perm=None, conjugate=False, name='transpose'):
    a_rank = len(a.pureShape)
    if perm is None:
        perm = [1, 0]
        assert a_rank == 2
    assert conjugate is False, 'not implement if conjugate is True'
    for dim in perm:
        assert dim < a_rank
    return bF.transpose(a, perm, debugContext=name)
示例#2
0
def nms(input_scores,
        input_boxes,
        threshold=0.7,
        numDetections=300,
        score_threshold=None,
        debugContext=''):
    load_lib()
    input_scores = input_scores.cast('FLOAT')
    input_boxes = input_boxes.cast('FLOAT')
    valid_area_mask = bF.transpose(get_valid_area_mask(input_boxes),
                                   [1, 0])  # 1,n
    input_scores = input_scores + 1e-6  # if score==0, proposals will be ignored
    local_input_scores = bF.identity(input_scores * valid_area_mask,
                                     debugContext=debugContext).detach()
    local_input_boxes = bF.identity(input_boxes,
                                    debugContext=debugContext).detach()

    if local_input_scores.shape.ndims == 1:
        local_input_scores = local_input_scores.unsqueeze(0)
    if local_input_boxes.shape.ndims == 2:
        local_input_boxes = local_input_boxes.unsqueeze(0)
    assert local_input_boxes.pureShape[0] == 1, 'only implemented batch=1'
    if score_threshold is not None:
        assert isinstance(score_threshold, float)
        local_mask = bF.greater(
            local_input_scores,
            bF.to_tensor(score_threshold, dtype=local_input_scores.dtype))
        local_mask = bF.cast(local_mask, target_type=local_input_scores.dtype)
        local_input_scores = local_input_scores * local_mask
    with bF.name_scope("nms"):
        out = bF.get_builder().customOp(opName="nms",
                                        opVersion=1,
                                        domain="ai.graphcore",
                                        inputs=[
                                            local_input_scores.getIpuIndex(),
                                            local_input_boxes.getIpuIndex()
                                        ],
                                        attributes={
                                            "threshold": threshold,
                                            "numDetections": numDetections
                                        },
                                        numOutputs=3,
                                        name="nmsCustomOp")
        #
        _, output_boxes, output_keep = out[0], bF.TTensor(out[1]), bF.TTensor(
            out[2])
        targetType = input_scores.dtype
        roiKeeps_flag = bF.cast(bF.greater(
            output_keep, bF.constant(np.asarray(-1, dtype=np.int32))),
            target_type='INT32')
        num_valids = bF.reduceSum(roiKeeps_flag, axes=[1])
        roiKeeps_flag = bF.cast(roiKeeps_flag, target_type=targetType)
        roiKeeps_flag = bF.unsqueeze(roiKeeps_flag, [-1])
        output_boxes = bF.mul([output_boxes, roiKeeps_flag])
    return output_boxes, output_keep, num_valids
示例#3
0
def get_valid_area_mask(boxes):
    # input boxes: 1,n,4
    # output: mask: n,1
    ws = boxes[:, :, 2] - boxes[:, :, 0]
    hs = boxes[:, :, 3] - boxes[:, :, 1]
    areas = ws * hs
    valid_flags = bF.greater(
        areas, bF.constant(np.asarray(0.0,
                                      dtype=bF.mappin_gc2npy[areas.dtype])))
    valid_mask = bF.cast(valid_flags, target_type=boxes.dtype)
    return bF.transpose(valid_mask, [1, 0])