示例#1
0
文件: exdet.py 项目: Jaraxxusking/333
  def merge_outputs(self, detections):
    detections = np.concatenate(
        [detection for detection in detections], axis=0).astype(np.float32)
    classes = detections[..., -1]
    keep_inds = (detections[:, 4] > 0)
    detections = detections[keep_inds]
    classes = classes[keep_inds]

    results = {}
    for j in range(self.num_classes):
      keep_inds = (classes == j)
      results[j + 1] = detections[keep_inds][:, 0:7].astype(np.float32)
      soft_nms(results[j + 1], Nt=0.5, method=2)
      results[j + 1] = results[j + 1][:, 0:5]

    scores = np.hstack([
      results[j][:, -1] 
      for j in range(1, self.num_classes + 1)
    ])
    if len(scores) > self.max_per_image:
      kth = len(scores) - self.max_per_image
      thresh = np.partition(scores, kth)[kth]
      for j in range(1, self.num_classes + 1):
        keep_inds = (results[j][:, -1] >= thresh)
        results[j] = results[j][keep_inds]
    return results
示例#2
0
 def post_process(self, dets, meta, scale=1):
     ret = {}
     for j in range(1, self.num_classes + 1):
         ret[j] = dets[dets[...,
                            -1] == j - 1][..., :-1].cpu().numpy().reshape(
                                -1, 5)  # pytorch version incompatible
         soft_nms(ret[j], Nt=0.5, method=2)
         ret[j][:, :4] /= scale
     return ret
    def save_result1(self, outputs, batch,
                     results):  # 这里的outputs包含model输出的所有特征图(3个特征图)
        # 对每一个output特征图都计算dets_out,然后将3个dets_out整合起来,根据score来排序,然后用NMS或者其他方法来过滤!!!

        dets_list = []
        dets_score_list = []
        for idx in range(len(outputs)):
            output = outputs[idx]
            reg = output['reg'] if self.opt.reg_offset else None
            dets = ctdet_decode(output['hm'],
                                output['wh'],
                                reg=reg,
                                cat_spec_wh=self.opt.cat_spec_wh,
                                K=self.opt.K)
            dets = dets.detach().cpu().numpy().reshape(1, -1, dets.shape[2])
            # print(type(dets), dets.shape)
            dets_list.append(dets)

        dets_300 = np.concatenate(
            (dets_list[0], dets_list[1], dets_list[2]),
            axis=1)  # 将3个dets:array[1,100,6]连接起来成为dets_300:array[1,300,6]
        dets_300_sort = dets_300.copy()
        array_for_sort = dets_300.reshape(
            -1, dets.shape[2])[:, :5]  # 将dets_300的最后一列改变成score,方便下面排序
        sort_idx = np.lexsort(
            -array_for_sort.T)  # sort_idx是dets_300按照score从大到小排序的索引
        # print(array_for_sort.shape, sort_idx)
        for key, item in enumerate(sort_idx):
            dets_300_sort[0][key] = dets_300[0][
                item]  # 将排序后的numpy数组保存到 dets_300_sort
        # print(dets_300_sort.shape, dets_300_sort[0][0][4],dets_300_sort[0][101][4])
        # dets_300_sort = dets_300_sort[:,:100,:] # 检测结果只取前100??
        dets = dets_300_sort  # 这个命名只是为了不改下面的两个语句中的变量名

        dets_outs = ctdet_post_process(dets.copy(),
                                       batch['meta']['c'].cpu().numpy(),
                                       batch['meta']['s'].cpu().numpy(),
                                       output['hm'].shape[2],
                                       output['hm'].shape[3],
                                       output['hm'].shape[1])
        for j in range(1, self.opt.num_classes +
                       1):  # 给数组reshape一下,要不然不符合NMS需要输入2维数组的输入要求
            dets_outs[0][j] = np.array(dets_outs[0][j],
                                       dtype=np.float32).reshape(-1, 5)
        # print(type(dets_outs),len(dets_outs[0]))
        results_nms = {}
        for j in range(1, self.opt.num_classes + 1):
            results_nms[j] = np.concatenate(
                [dets_out[j] for dets_out in dets_outs],
                axis=0).astype(np.float32)
            # print(j, results_nms[j])
            soft_nms(results_nms[j], Nt=0.5, method=2)
        # print(111, type(dets_outs[0]), dets_outs[0].keys())
        # print(222, type(results_nms), results_nms.keys())   # results_nms是dets_outs[0]经过NMS之后得到的结果
        # results[batch['meta']['img_id'].cpu().numpy()[0]] = dets_outs[0]
        results[batch['meta']['img_id'].cpu().numpy()[0]] = results_nms
示例#4
0
 def merge_outputs(self, detections):
     results = {}
     for j in range(1, self.num_classes + 1):
         results[j] = np.concatenate([detection[j] for detection in detections], axis=0).astype(np.float32)
         if len(self.scales) > 1 or self.opt.nms:
             soft_nms(results[j], Nt=0.5, method=2)
     scores = np.hstack([results[j][:, 4] for j in range(1, self.num_classes + 1)])
     if len(scores) > self.max_per_image:
         kth = len(scores) - self.max_per_image
         thresh = np.partition(scores, kth)[kth]
         for j in range(1, self.num_classes + 1):
             keep_inds = (results[j][:, 4] >= thresh)
             results[j] = results[j][keep_inds]
     return results
示例#5
0
def nms_multi(results):
    bbox_change = 0
    for img_id in results[0].keys():
        for categories_id in range(1, 5):
            res_stack = [
                results[i][img_id][categories_id] for i in range(len(results))
            ]
            results[0][img_id][categories_id] = np.vstack(res_stack)
            bbox_num_before = cal_bbox(results[0])
            soft_nms(results[0][img_id][categories_id], Nt=0.5, method=2)
            bbox_num_after = cal_bbox(results[0])
            bbox_change += bbox_num_before - bbox_num_after
    print('Bounding Box change: %s' % bbox_change)
    return results[0]
 def merge_outputs(self, detections):
     # print(111, len(detections), detections)
     results = {}
     for j in range(1, self.num_classes + 1):
         results[j] = np.concatenate(
             [detection[j] for detection in detections],
             axis=0).astype(np.float32)
         if len(self.scales) > 1 or self.opt.nms:
             # print(j, results[j].shape, results[j])  # results[j] = [ [tlx,tly,brx,bry,score], [], [], [], [],... ], 其中j为类别标签
             soft_nms(results[j], Nt=0.5, method=2)
     scores = np.hstack(
         [results[j][:, 4] for j in range(1, self.num_classes + 1)])
     if len(scores) > self.max_per_image:
         kth = len(scores) - self.max_per_image
         thresh = np.partition(scores, kth)[kth]
         for j in range(1, self.num_classes + 1):
             keep_inds = (results[j][:, 4] >= thresh)
             results[j] = results[j][keep_inds]
     return results
示例#7
0
 def merge_outputs(self, detections):
     # detections 是一个 list, 每一个元素表示一个 scale 上的 detection, detection
     results = {}
     for j in range(1, self.num_classes + 1):
         # 把所有 scale 上所有属于某个类的 detection 整合起来
         results[j] = np.concatenate(
             [detection[j] for detection in detections],
             axis=0).astype(np.float32)
         if len(self.scales) > 1 or self.opt.nms:
             soft_nms(results[j], Nt=0.5, method=2)
     scores = np.hstack(
         [results[j][:, 4] for j in range(1, self.num_classes + 1)])
     if len(scores) > self.max_per_image:
         kth = len(scores) - self.max_per_image
         # np.partition 返回的数组的第 kth 个元素是第数组中第 kth 大的数, 前面的 kth 个元素都小于它, 后面的所有的元素都大于它
         # 但是前后两部分内部不一定是排好序的
         thresh = np.partition(scores, kth)[kth]
         for j in range(1, self.num_classes + 1):
             keep_inds = (results[j][:, 4] >= thresh)
             results[j] = results[j][keep_inds]
     return results
示例#8
0
def kp_detection(db, nnet, result_dir, debug=False, decode_func=kp_decode):
    debug_dir = os.path.join(result_dir, "debug")
    if not os.path.exists(debug_dir):
        os.makedirs(debug_dir)

    if db.split != "trainval":
        db_inds = db.db_inds[:100] if debug else db.db_inds
    else:
        db_inds = db.db_inds[:100] if debug else db.db_inds[:5000]
    num_images = db_inds.size

    K = db.configs["top_k"]
    aggr_weight = db.configs["aggr_weight"]
    scores_thresh = db.configs["scores_thresh"]
    center_thresh = db.configs["center_thresh"]
    suppres_ghost = db.configs["suppres_ghost"]
    nms_kernel = db.configs["nms_kernel"]

    scales = db.configs["test_scales"]
    categories = db.configs["categories"]
    nms_threshold = db.configs["nms_threshold"]
    max_per_image = db.configs["max_per_image"]

    cluster_radius = db.configs["cluster_radius"]

    nms_algorithm = {
        "nms": 0,
        "linear_soft_nms": 1,
        "exp_soft_nms": 2
    }[db.configs["nms_algorithm"]]

    top_bboxes = {}

    for ind in tqdm(range(0, num_images), ncols=80, desc="locating kps"):
        db_ind = db_inds[ind]

        image_id = db.image_ids(db_ind)
        image_file = db.image_file(db_ind)
        image = cv2.imread(image_file)

        height, width = image.shape[0:2]

        detections = []

        for scale in scales:
            new_height = int(height * scale)
            new_width = int(width * scale)
            new_center = np.array([new_height // 2, new_width // 2])

            inp_height = new_height | 127
            inp_width = new_width | 127

            images = np.zeros((1, 3, inp_height, inp_width), dtype=np.float32)
            ratios = np.zeros((1, 2), dtype=np.float32)
            borders = np.zeros((1, 4), dtype=np.float32)
            sizes = np.zeros((1, 2), dtype=np.float32)

            out_height, out_width = (inp_height + 1) // 4, (inp_width + 1) // 4
            height_ratio = out_height / inp_height
            width_ratio = out_width / inp_width

            resized_image = cv2.resize(image, (new_width, new_height))
            resized_image, border, offset = crop_image(resized_image,
                                                       new_center,
                                                       [inp_height, inp_width])

            resized_image = resized_image / 255.
            normalize_(resized_image, db.mean, db.std)

            images[0] = resized_image.transpose((2, 0, 1))
            borders[0] = border
            sizes[0] = [int(height * scale), int(width * scale)]
            ratios[0] = [height_ratio, width_ratio]

            images = np.concatenate((images, images[:, :, :, ::-1]), axis=0)
            images = torch.from_numpy(images)
            dets = decode_func(nnet,
                               images,
                               K,
                               aggr_weight=aggr_weight,
                               scores_thresh=scores_thresh,
                               center_thresh=center_thresh,
                               kernel=nms_kernel,
                               debug=debug)
            dets = dets.reshape(2, -1, 14)
            dets[1, :, [0, 2]] = out_width - dets[1, :, [2, 0]]
            dets[1, :, [5, 7, 9, 11]] = out_width - dets[1, :, [5, 7, 9, 11]]
            dets[1, :, [7, 8, 11, 12]] = dets[1, :, [11, 12, 7, 8]].copy()
            dets = dets.reshape(1, -1, 14)

            _rescale_dets(dets, ratios, borders, sizes)
            _rescale_ex_pts(dets, ratios, borders, sizes)
            dets[:, :, 0:4] /= scale
            dets[:, :, 5:13] /= scale
            detections.append(dets)

        detections = np.concatenate(detections, axis=1)

        classes = detections[..., -1]
        classes = classes[0]
        detections = detections[0]

        keep_inds = (detections[:, 4] > 0)
        detections = detections[keep_inds]
        classes = classes[keep_inds]

        top_bboxes[image_id] = {}
        for j in range(categories):

            keep_inds = (classes == j)

            top_bboxes[image_id][j + 1] = detections[keep_inds].astype(
                np.float32)

            soft_nms(top_bboxes[image_id][j + 1],
                     Nt=nms_threshold,
                     method=nms_algorithm)

        scores = np.hstack(
            [top_bboxes[image_id][j][:, 4] for j in range(1, categories + 1)])

        if len(scores) > max_per_image:

            kth = len(scores) - max_per_image
            thresh = np.partition(scores, kth)[kth]
            for j in range(1, categories + 1):
                keep_inds = (top_bboxes[image_id][j][:, 4] >= thresh)

                top_bboxes[image_id][j] = top_bboxes[image_id][j][keep_inds]

        for j in range(1, categories + 1):
            keep = []
            i = 0
            for bbox in top_bboxes[image_id][j]:

                sc = bbox[4]
                ex = bbox[5:13].astype(np.int32).reshape(4, 2)
                feature_val = feature(ex)
                if feature_val > cluster_radius:
                    keep.append(i)
                i = i + 1
                top_bboxes[image_id][j] = np.delete(top_bboxes[image_id][j],
                                                    keep,
                                                    axis=0)

        if suppres_ghost:

            for j in range(1, categories + 1):
                n = len(top_bboxes[image_id][j])
                for k in range(n):
                    inside_score = 0
                    if top_bboxes[image_id][j][k, 4] > 0.2:
                        for t in range(n):
                            if _box_inside(top_bboxes[image_id][j][t],
                                           top_bboxes[image_id][j][k]):
                                inside_score += top_bboxes[image_id][j][t, 4]
                        if inside_score > top_bboxes[image_id][j][k, 4] * 3:
                            top_bboxes[image_id][j][k, 4] /= 2

        if debug:

            image_file = db.image_file(db_ind)
            image = cv2.imread(image_file)

            bboxes = {}
            for j in range(1, categories + 1):
                keep_inds = (top_bboxes[image_id][j][:, 4] > 0.3)
                cat_name = db.class_name(j)
                cat_size = cv2.getTextSize(cat_name + '0',
                                           cv2.FONT_HERSHEY_SIMPLEX, 0.5, 2)[0]
                color = np.random.random((3, )) * 0.6 + 0.4
                color = color * 255
                color = color.astype(np.int32).tolist()
                for bbox in top_bboxes[image_id][j][keep_inds]:

                    sc = bbox[4]
                    bbox = bbox[0:4].astype(np.int32)
                    txt = '{}{:.0f}'.format(cat_name, sc * 10)
                    if bbox[1] - cat_size[1] - 2 < 0:
                        cv2.rectangle(
                            image, (bbox[0], bbox[1] + 2),
                            (bbox[0] + cat_size[0], bbox[1] + cat_size[1] + 2),
                            color, -1)
                        cv2.putText(image,
                                    txt, (bbox[0], bbox[1] + cat_size[1] + 2),
                                    cv2.FONT_HERSHEY_SIMPLEX,
                                    0.5, (0, 0, 0),
                                    thickness=1,
                                    lineType=cv2.LINE_AA)
                    else:
                        cv2.rectangle(image,
                                      (bbox[0], bbox[1] - cat_size[1] - 2),
                                      (bbox[0] + cat_size[0], bbox[1] - 2),
                                      color, -1)
                        cv2.putText(image,
                                    txt, (bbox[0], bbox[1] - 2),
                                    cv2.FONT_HERSHEY_SIMPLEX,
                                    0.5, (0, 0, 0),
                                    thickness=1,
                                    lineType=cv2.LINE_AA)
                    cv2.rectangle(image, (bbox[0], bbox[1]),
                                  (bbox[2], bbox[3]), color, 2)
            debug_file = os.path.join(debug_dir, "{}.jpg".format(db_ind))
            cv2.imwrite(debug_file, image)
            cv2.imshow('out', image)
            cv2.waitKey()

    result_json = os.path.join(result_dir, "results.json")
    detections = db.convert_to_coco(top_bboxes)
    with open(result_json, "w") as f:
        json.dump(detections, f)

    cls_ids = list(range(1, categories + 1))
    image_ids = [db.image_ids(ind) for ind in db_inds]
    db.evaluate(result_json, cls_ids, image_ids)
    return 0
def worker(input_q, output_q):
    # Load a (frozen) Tensorflow model into memory.

    cfg_file = os.path.join(
        "/data1/hhq/project/extremenet-inference/config/ExtremeNet.json")
    print("cfg_file: {}".format(cfg_file))

    with open(cfg_file, "r") as f:
        configs = json.load(f)

    configs["system"]["snapshot_name"] = "ExtremeNet"
    system_configs.update_config(configs["system"])
    print("system config...")

    K = configs["db"]["top_k"]
    aggr_weight = configs["db"]["aggr_weight"]
    scores_thresh = configs["db"]["scores_thresh"]
    center_thresh = configs["db"]["center_thresh"]
    suppres_ghost = True
    nms_kernel = 3

    scales = configs["db"]["test_scales"]
    weight_exp = 8
    categories = configs["db"]["categories"]
    nms_threshold = configs["db"]["nms_threshold"]
    max_per_image = configs["db"]["max_per_image"]
    nms_algorithm = {
        "nms": 0,
        "linear_soft_nms": 1,
        "exp_soft_nms": 2
    }["exp_soft_nms"]
    nnet = NetworkFactory(None)
    nnet.load_pretrained_params(
        "/data1/hhq/project/extremenet-inference/model/ExtremeNet_250000.pkl")
    nnet.cuda()
    nnet.eval_mode()

    fps = FPS().start()
    mean = np.array([0.40789654, 0.44719302, 0.47026115], dtype=np.float32)
    std = np.array([0.28863828, 0.27408164, 0.27809835], dtype=np.float32)
    top_bboxes = {}

    while True:
        fps.update()
        frame = input_q.get()
        # image = cv2.imread("/data/project/extremenet-inference/inputs/16004479832_a748d55f21_k.jpg")
        height, width = frame[1].shape[0:2]
        detections = []

        for scale in scales:
            new_height = int(height * scale)
            new_width = int(width * scale)
            new_center = np.array([new_height // 2, new_width // 2])

            inp_height = new_height | 127
            inp_width = new_width | 127

            images = np.zeros((1, 3, inp_height, inp_width), dtype=np.float32)
            ratios = np.zeros((1, 2), dtype=np.float32)
            borders = np.zeros((1, 4), dtype=np.float32)
            sizes = np.zeros((1, 2), dtype=np.float32)

            out_height, out_width = (inp_height + 1) // 4, (inp_width + 1) // 4
            height_ratio = out_height / inp_height
            width_ratio = out_width / inp_width

            resized_image = cv2.resize(frame[1], (new_width, new_height))
            resized_image, border, offset = crop_image(resized_image,
                                                       new_center,
                                                       [inp_height, inp_width])

            resized_image = resized_image / 255.
            normalize_(resized_image, mean, std)

            images[0] = resized_image.transpose((2, 0, 1))
            borders[0] = border
            sizes[0] = [int(height * scale), int(width * scale)]
            ratios[0] = [height_ratio, width_ratio]

            images = np.concatenate((images, images[:, :, :, ::-1]), axis=0)
            images = torch.from_numpy(images)
            dets = kp_decode(nnet,
                             images,
                             K,
                             aggr_weight=aggr_weight,
                             scores_thresh=scores_thresh,
                             center_thresh=center_thresh,
                             kernel=nms_kernel,
                             debug=True)
            dets = dets.reshape(2, -1, 14)
            dets[1, :, [0, 2]] = out_width - dets[1, :, [2, 0]]
            dets[1, :, [5, 7, 9, 11]] = out_width - dets[1, :, [5, 7, 9, 11]]
            dets[1, :, [7, 8, 11, 12]] = dets[1, :, [11, 12, 7, 8]].copy()
            dets = dets.reshape(1, -1, 14)

            _rescale_dets(dets, ratios, borders, sizes)
            _rescale_ex_pts(dets, ratios, borders, sizes)
            dets[:, :, 0:4] /= scale
            dets[:, :, 5:13] /= scale
            detections.append(dets)

        detections = np.concatenate(detections, axis=1)

        classes = detections[..., -1]
        classes = classes[0]
        detections = detections[0]

        # reject detections with negative scores
        keep_inds = (detections[:, 4] > 0)
        detections = detections[keep_inds]
        classes = classes[keep_inds]

        image_id = 0  # image ids
        top_bboxes[image_id] = {}
        for j in range(categories):
            keep_inds = (classes == j)
            top_bboxes[image_id][j + 1] = \
                detections[keep_inds].astype(np.float32)
            soft_nms(top_bboxes[image_id][j + 1],
                     Nt=nms_threshold,
                     method=nms_algorithm)

        scores = np.hstack(
            [top_bboxes[image_id][j][:, 4] for j in range(1, categories + 1)])
        if len(scores) > max_per_image:
            kth = len(scores) - max_per_image
            thresh = np.partition(scores, kth)[kth]
            for j in range(1, categories + 1):
                keep_inds = (top_bboxes[image_id][j][:, 4] >= thresh)
                top_bboxes[image_id][j] = top_bboxes[image_id][j][keep_inds]

        if suppres_ghost:
            for j in range(1, categories + 1):
                n = len(top_bboxes[image_id][j])
                for k in range(n):
                    inside_score = 0
                    if top_bboxes[image_id][j][k, 4] > 0.2:
                        for t in range(n):
                            if _box_inside(top_bboxes[image_id][j][t],
                                           top_bboxes[image_id][j][k]):
                                inside_score += top_bboxes[image_id][j][t, 4]
                        if inside_score > top_bboxes[image_id][j][k, 4] * 3:
                            top_bboxes[image_id][j][k, 4] /= 2

        # plot bound box and oct mask
        color_list = colormap(rgb=True)
        mask_color_id = 0
        # image = cv2.imread("/data/project/extremenet-inference/inputs/16004479832_a748d55f21_k.jpg")
        image = frame[1]
        input_image = image.copy()
        mask_image = image.copy()
        bboxes = {}
        for j in range(1, categories + 1):
            keep_inds = (top_bboxes[image_id][j][:, 4] > 0.5)
            cat_name = class_name[j]
            for bbox in top_bboxes[image_id][j][keep_inds]:
                sc = bbox[4]
                ex = bbox[5:13].astype(np.int32).reshape(4, 2)
                bbox = bbox[0:4].astype(np.int32)
                txt = '{}{:.2f}'.format(cat_name, sc)
                color_mask = color_list[mask_color_id % len(color_list), :3]
                mask_color_id += 1
                image = vis_bbox(
                    image,
                    (bbox[0], bbox[1], bbox[2] - bbox[0], bbox[3] - bbox[1]))
                image = vis_class(image, (bbox[0], bbox[1] - 2), txt)
                image = vis_octagon(image, ex, color_mask)
                image = vis_ex(image, ex, color_mask)

        output_q.put((frame[0], image))
    fps.stop()
示例#10
0
文件: lv.py 项目: zsq-cv/CenterNet
def kp_detection_image(image, db: LV, nnet: NetworkFactory,
                       debug=False, decode_func=kp_decode, db_ind=None,
                       debug_dir=None):
    """对单张图做detection

    :param image: 使用cv2.imread读入的图
    :param db:
    :param nnet:
    :param debug:
    :param decode_func:
    :param db_ind:
    :param debug_dir:
    :return: {[1-5] -> (该类中检测到的数目, 5)}, 分别为tl_xs, tl_ys, br_xs, br_ys, scores
    """
    if debug and (db_ind is None or debug_dir is None):
        raise ValueError(
            "db_ind and debug_dir should be specified when debug is turned on")

    K = db.configs["top_k"]
    ae_threshold = db.configs["ae_threshold"]
    nms_kernel = db.configs["nms_kernel"]

    scales = db.configs["test_scales"]
    weight_exp = db.configs["weight_exp"]
    merge_bbox = db.configs["merge_bbox"]
    categories = db.configs["categories"]
    nms_threshold = db.configs["nms_threshold"]
    max_per_image = db.configs["max_per_image"]
    nms_algorithm = {
        "nms": 0,
        "linear_soft_nms": 1,
        "exp_soft_nms": 2
    }[db.configs["nms_algorithm"]]

    height, width = image.shape[0:2]

    detections = []
    center_points = []

    for scale in scales:
        new_height = int(height * scale)
        new_width = int(width * scale)
        new_center = np.array([new_height // 2, new_width // 2])

        # 不懂为什么要做这个按位或
        inp_height = new_height | 127
        inp_width = new_width | 127

        images = np.zeros((1, 3, inp_height, inp_width), dtype=np.float32)
        ratios = np.zeros((1, 2), dtype=np.float32)
        borders = np.zeros((1, 4), dtype=np.float32)
        sizes = np.zeros((1, 2), dtype=np.float32)

        # (inp_height + 1)、(inp_width + 1)肯定可以被4除尽
        out_height, out_width = (inp_height + 1) // 4, (inp_width + 1) // 4
        height_ratio = out_height / inp_height
        width_ratio = out_width / inp_width

        # 先按照scale来resize
        resized_image = cv2.resize(image, (new_width, new_height))
        # 然后使用scale后的image的中心点,与inp_height、inp_width进行crop
        # 由于inp_height、inp_width一定是比new_height、new_width大的,故这一步
        # 实际上是在按照中心,扩大图片,并在周围补黑边。
        resized_image, border, offset = crop_image(
            resized_image, new_center, [inp_height, inp_width])

        resized_image = resized_image / 255.
        normalize_(resized_image, db.mean, db.std)

        # resized_image是(H, W, C),现在改成(C, H, W)以供pytorch使用
        images[0] = resized_image.transpose((2, 0, 1))
        borders[0] = border
        # 这个size是有内容的图片大小,resized_image的大小为[inp_height, inp_width]
        sizes[0] = [int(height * scale), int(width * scale)]
        # 这个是out比上inp
        ratios[0] = [height_ratio, width_ratio]

        # 这个似乎是把原图和垂直翻折后的图片放在一起
        images = np.concatenate((images, images[:, :, :, ::-1]), axis=0)
        images = torch.from_numpy(images)
        # dets: (batch, 2 * num_dets, 8)
        # center: (batch, 2 * K, 4)
        dets, center = decode_func(nnet, images, K,
                                   ae_threshold=ae_threshold,
                                   kernel=nms_kernel)
        dets = dets.reshape(2, -1, 8)
        center = center.reshape(2, -1, 4)
        # 这两步是把垂直翻折后图片的检测结果,变换到原图上
        dets[1, :, [0, 2]] = out_width - dets[1, :, [2, 0]]
        center[1, :, [0]] = out_width - center[1, :, [0]]
        dets = dets.reshape(1, -1, 8)  # (1, 2 * num_dets, 8)
        center = center.reshape(1, -1, 4)  # (1, 2 * K, 4)

        # 去除在原图中不合法的框
        _rescale_dets(dets, ratios, borders, sizes)

        center[..., [0]] /= ratios[:, 1][:, None, None]
        center[..., [1]] /= ratios[:, 0][:, None, None]
        center[..., [0]] -= borders[:, 2][:, None, None]
        center[..., [1]] -= borders[:, 0][:, None, None]
        np.clip(center[..., [0]], 0, sizes[:, 1][:, None, None],
                out=center[..., [0]])
        np.clip(center[..., [1]], 0, sizes[:, 0][:, None, None],
                out=center[..., [1]])

        # 回复到原图中的坐标
        dets[:, :, 0:4] /= scale
        center[:, :, 0:2] /= scale

        # center point只使用scale为1的时候
        if scale == 1:
            center_points.append(center)
        detections.append(dets)

    # 把所有scale下检测出的统一合并起来
    detections = np.concatenate(detections, axis=1)         # (1, 2 * num_dets * len(scales), 8)
    center_points = np.concatenate(center_points, axis=1)   # (1, 2 * K, 4)

    classes = detections[..., -1]
    classes = classes[0]            # (2 * num_dets * len(scales),)
    detections = detections[0]      # (2 * num_dets * len(scales), 8)
    center_points = center_points[0]    # (2 * K, 4)

    # 获得所有的合法候选框
    valid_ind = detections[:, 4] > -1
    valid_detections = detections[valid_ind]    # (合法候选框, 8)

    box_width = valid_detections[:, 2] - valid_detections[:, 0]     # (合法候选框,)
    box_height = valid_detections[:, 3] - valid_detections[:, 1]    # (合法候选框,)

    # 小候选框与大候选框
    s_ind = (box_width * box_height <= 22500)
    l_ind = (box_width * box_height > 22500)

    s_detections = valid_detections[s_ind]  # (小框, 8)
    l_detections = valid_detections[l_ind]  # (大框, 8)

    # 小框:判断中心区域是否有中心点
    # 只要中心区域有一个同类中心点即可,分数按最高的算
    s_left_x = (2 * s_detections[:, 0] + s_detections[:, 2]) / 3
    s_right_x = (s_detections[:, 0] + 2 * s_detections[:, 2]) / 3
    s_top_y = (2 * s_detections[:, 1] + s_detections[:, 3]) / 3
    s_bottom_y = (s_detections[:, 1] + 2 * s_detections[:, 3]) / 3

    s_temp_score = copy.copy(s_detections[:, 4])
    s_detections[:, 4] = -1

    center_x = center_points[:, 0][:, np.newaxis]
    center_y = center_points[:, 1][:, np.newaxis]
    s_left_x = s_left_x[np.newaxis, :]
    s_right_x = s_right_x[np.newaxis, :]
    s_top_y = s_top_y[np.newaxis, :]
    s_bottom_y = s_bottom_y[np.newaxis, :]

    ind_lx = (center_x - s_left_x) > 0
    ind_rx = (center_x - s_right_x) < 0
    ind_ty = (center_y - s_top_y) > 0
    ind_by = (center_y - s_bottom_y) < 0
    ind_cls = (center_points[:, 2][:, np.newaxis] - s_detections[:, -1][np.newaxis, :]) == 0
    ind_s_new_score = np.max(((ind_lx + 0) & (ind_rx + 0) & (ind_ty + 0) & (ind_by + 0) & (ind_cls + 0)), axis=0) == 1
    index_s_new_score = np.argmax(
        ((ind_lx + 0) & (ind_rx + 0) & (ind_ty + 0) & (ind_by + 0) & (ind_cls + 0))[:, ind_s_new_score],
        axis=0)
    s_detections[:, 4][ind_s_new_score] = \
        (s_temp_score[ind_s_new_score] * 2 + center_points[index_s_new_score, 3]) / 3

    # 大框:判断中心区域是否有中心点
    l_left_x = (3 * l_detections[:, 0] + 2 * l_detections[:, 2]) / 5
    l_right_x = (2 * l_detections[:, 0] + 3 * l_detections[:, 2]) / 5
    l_top_y = (3 * l_detections[:, 1] + 2 * l_detections[:, 3]) / 5
    l_bottom_y = (2 * l_detections[:, 1] + 3 * l_detections[:, 3]) / 5

    l_temp_score = copy.copy(l_detections[:, 4])
    l_detections[:, 4] = -1

    center_x = center_points[:, 0][:, np.newaxis]
    center_y = center_points[:, 1][:, np.newaxis]
    l_left_x = l_left_x[np.newaxis, :]
    l_right_x = l_right_x[np.newaxis, :]
    l_top_y = l_top_y[np.newaxis, :]
    l_bottom_y = l_bottom_y[np.newaxis, :]

    ind_lx = (center_x - l_left_x) > 0
    ind_rx = (center_x - l_right_x) < 0
    ind_ty = (center_y - l_top_y) > 0
    ind_by = (center_y - l_bottom_y) < 0
    ind_cls = (center_points[:, 2][:, np.newaxis] - l_detections[:, -1][
                                                    np.newaxis, :]) == 0
    ind_l_new_score = np.max(
        ((ind_lx + 0) & (ind_rx + 0) & (ind_ty + 0) &
         (ind_by + 0) & (ind_cls + 0)),
        axis=0) == 1
    index_l_new_score = np.argmax(
        ((ind_lx + 0) & (ind_rx + 0) & (ind_ty + 0) & (ind_by + 0) &
         (ind_cls + 0))[:, ind_l_new_score],
        axis=0)
    l_detections[:, 4][ind_l_new_score] = \
        (l_temp_score[ind_l_new_score] * 2 + center_points[index_l_new_score, 3]) / 3

    # 合并大框小框的检测结果,并按照score排序
    detections = np.concatenate([l_detections, s_detections], axis=0)
    detections = detections[np.argsort(-detections[:, 4])]
    classes = detections[..., -1]

    # reject detections with negative scores
    keep_inds = (detections[:, 4] > -1)
    detections = detections[keep_inds]
    classes = classes[keep_inds]

    ret = {}
    for j in range(categories):
        keep_inds = (classes == j)
        ret[j + 1] = detections[keep_inds][:, 0:7].astype(
            np.float32)
        if merge_bbox:
            soft_nms_merge(ret[j + 1], Nt=nms_threshold,
                           method=nms_algorithm, weight_exp=weight_exp)
        else:
            soft_nms(ret[j + 1], Nt=nms_threshold,
                     method=nms_algorithm)
        ret[j + 1] = ret[j + 1][:, 0:5]

    scores = np.hstack([
        ret[j][:, -1]
        for j in range(1, categories + 1)
    ])
    if len(scores) > max_per_image:
        kth = len(scores) - max_per_image
        thresh = np.partition(scores, kth)[kth]
        for j in range(1, categories + 1):
            keep_inds = (ret[j][:, -1] >= thresh)
            ret[j] = ret[j][keep_inds]

    if debug:
        image_file = db.image_file(db_ind)
        image = cv2.imread(image_file)
        im = image[:, :, (2, 1, 0)]
        fig, ax = plt.subplots(figsize=(12, 12))
        fig = ax.imshow(im, aspect='equal')
        plt.axis('off')
        fig.axes.get_xaxis().set_visible(False)
        fig.axes.get_yaxis().set_visible(False)
        # bboxes = {}
        for j in range(1, categories + 1):
            keep_inds = (ret[j][:, -1] >= 0.4)      # 这边调整画图时接收的阈值
            cat_name = db.class_name(j)
            for bbox in ret[j][keep_inds]:
                score = bbox[4]
                bbox = bbox[0:4].astype(np.int32)
                xmin = bbox[0]
                ymin = bbox[1]
                xmax = bbox[2]
                ymax = bbox[3]
                # if (xmax - xmin) * (ymax - ymin) > 5184:
                ax.add_patch(
                    plt.Rectangle((xmin, ymin), xmax - xmin, ymax - ymin,
                                  fill=False, edgecolor=colours[j - 1],
                                  linewidth=4.0))
                ax.text(xmin + 1, ymin - 3, '{} {:.3f}'.format(cat_name, score),
                        bbox=dict(facecolor=colours[j - 1], ec='black',
                                  lw=2, alpha=0.5),
                        fontsize=15, color='white', weight='bold')

        # debug_file1 = os.path.join(debug_dir, "{}.pdf".format(db_ind))
        debug_file2 = os.path.join(debug_dir, "{}.jpg".format(db_ind))
        # plt.savefig(debug_file1)
        plt.savefig(debug_file2, bbox_inches='tight', pad_inches=0)
        plt.close()
        # cv2.imwrite(debug_file, image, [int(cv2.IMWRITE_JPEG_QUALITY), 100])

        # 同时保存gt图以供对比
        db.display(db_ind, os.path.join(debug_dir, "{}_gt.jpg".format(db_ind)),
                   show=False)

    return ret
示例#11
0
def kp_detection(db, nnet, result_dir, debug=False, decode_func=kp_decode):
    debug_dir = os.path.join(result_dir, "debug")
    if not os.path.exists(debug_dir):
        os.makedirs(debug_dir)

    if db.split != "trainval":
        db_inds = db.db_inds[:100] if debug else db.db_inds
    else:
        db_inds = db.db_inds[:100] if debug else db.db_inds[:5000]
    num_images = db_inds.size

    K = db.configs["top_k"]
    ae_threshold = db.configs["ae_threshold"]
    nms_kernel = db.configs["nms_kernel"]

    scales = db.configs["test_scales"]
    weight_exp = db.configs["weight_exp"]
    merge_bbox = db.configs["merge_bbox"]
    categories = db.configs["categories"]
    nms_threshold = db.configs["nms_threshold"]
    max_per_image = db.configs["max_per_image"]
    nms_algorithm = {
        "nms": 0,
        "linear_soft_nms": 1,
        "exp_soft_nms": 2
    }[db.configs["nms_algorithm"]]

    top_bboxes = {}
    for ind in tqdm(range(0, num_images), ncols=80, desc="locating kps"):
        db_ind = db_inds[ind]

        image_id = db.image_ids(db_ind)
        image_file = db.image_file(db_ind)
        image = cv2.imread(image_file)

        height, width = image.shape[0:2]

        detections = []
        center_points = []

        for scale in scales:
            new_height = int(height * scale)
            new_width = int(width * scale)
            new_center = np.array([new_height // 2, new_width // 2])

            inp_height = new_height | 127
            inp_width = new_width | 127

            images = np.zeros((1, 3, inp_height, inp_width), dtype=np.float32)
            ratios = np.zeros((1, 2), dtype=np.float32)
            borders = np.zeros((1, 4), dtype=np.float32)
            sizes = np.zeros((1, 2), dtype=np.float32)

            out_height, out_width = (inp_height + 1) // 4, (inp_width + 1) // 4
            height_ratio = out_height / inp_height
            width_ratio = out_width / inp_width

            resized_image = cv2.resize(image, (new_width, new_height))
            resized_image, border, offset = crop_image(resized_image,
                                                       new_center,
                                                       [inp_height, inp_width])

            resized_image = resized_image / 255.
            normalize_(resized_image, db.mean, db.std)

            images[0] = resized_image.transpose((2, 0, 1))
            borders[0] = border
            sizes[0] = [int(height * scale), int(width * scale)]
            ratios[0] = [height_ratio, width_ratio]

            images = np.concatenate((images, images[:, :, :, ::-1]), axis=0)
            images = torch.from_numpy(images)
            dets, center = decode_func(nnet,
                                       images,
                                       K,
                                       ae_threshold=ae_threshold,
                                       kernel=nms_kernel)
            dets = dets.reshape(2, -1, 8)
            center = center.reshape(2, -1, 4)
            dets[1, :, [0, 2]] = out_width - dets[1, :, [2, 0]]
            center[1, :, [0]] = out_width - center[1, :, [0]]
            dets = dets.reshape(1, -1, 8)
            center = center.reshape(1, -1, 4)

            _rescale_dets(dets, ratios, borders, sizes)
            center[..., [0]] /= ratios[:, 1][:, None, None]
            center[..., [1]] /= ratios[:, 0][:, None, None]
            center[..., [0]] -= borders[:, 2][:, None, None]
            center[..., [1]] -= borders[:, 0][:, None, None]
            np.clip(center[..., [0]],
                    0,
                    sizes[:, 1][:, None, None],
                    out=center[..., [0]])
            np.clip(center[..., [1]],
                    0,
                    sizes[:, 0][:, None, None],
                    out=center[..., [1]])
            dets[:, :, 0:4] /= scale
            center[:, :, 0:2] /= scale

            if scale == 1:
                center_points.append(center)
            detections.append(dets)

        detections = np.concatenate(detections, axis=1)
        center_points = np.concatenate(center_points, axis=1)

        classes = detections[..., -1]
        classes = classes[0]
        detections = detections[0]
        center_points = center_points[0]

        valid_ind = detections[:, 4] > -1
        valid_detections = detections[valid_ind]

        box_width = valid_detections[:, 2] - valid_detections[:, 0]
        box_height = valid_detections[:, 3] - valid_detections[:, 1]

        s_ind = (box_width * box_height <= 22500)
        l_ind = (box_width * box_height > 22500)

        s_detections = valid_detections[s_ind]
        l_detections = valid_detections[l_ind]

        s_left_x = (2 * s_detections[:, 0] + s_detections[:, 2]) / 3
        s_right_x = (s_detections[:, 0] + 2 * s_detections[:, 2]) / 3
        s_top_y = (2 * s_detections[:, 1] + s_detections[:, 3]) / 3
        s_bottom_y = (s_detections[:, 1] + 2 * s_detections[:, 3]) / 3

        s_temp_score = copy.copy(s_detections[:, 4])
        s_detections[:, 4] = -1

        center_x = center_points[:, 0][:, np.newaxis]
        center_y = center_points[:, 1][:, np.newaxis]
        s_left_x = s_left_x[np.newaxis, :]
        s_right_x = s_right_x[np.newaxis, :]
        s_top_y = s_top_y[np.newaxis, :]
        s_bottom_y = s_bottom_y[np.newaxis, :]

        ind_lx = (center_x - s_left_x) > 0
        ind_rx = (center_x - s_right_x) < 0
        ind_ty = (center_y - s_top_y) > 0
        ind_by = (center_y - s_bottom_y) < 0
        ind_cls = (center_points[:, 2][:, np.newaxis] -
                   s_detections[:, -1][np.newaxis, :]) == 0
        ind_s_new_score = np.max(((ind_lx + 0) & (ind_rx + 0) & (ind_ty + 0) &
                                  (ind_by + 0) & (ind_cls + 0)),
                                 axis=0) == 1
        index_s_new_score = np.argmax(
            ((ind_lx + 0) & (ind_rx + 0) & (ind_ty + 0) & (ind_by + 0) &
             (ind_cls + 0))[:, ind_s_new_score],
            axis=0)
        s_detections[:, 4][ind_s_new_score] = (
            s_temp_score[ind_s_new_score] * 2 +
            center_points[index_s_new_score, 3]) / 3

        l_left_x = (3 * l_detections[:, 0] + 2 * l_detections[:, 2]) / 5
        l_right_x = (2 * l_detections[:, 0] + 3 * l_detections[:, 2]) / 5
        l_top_y = (3 * l_detections[:, 1] + 2 * l_detections[:, 3]) / 5
        l_bottom_y = (2 * l_detections[:, 1] + 3 * l_detections[:, 3]) / 5

        l_temp_score = copy.copy(l_detections[:, 4])
        l_detections[:, 4] = -1

        center_x = center_points[:, 0][:, np.newaxis]
        center_y = center_points[:, 1][:, np.newaxis]
        l_left_x = l_left_x[np.newaxis, :]
        l_right_x = l_right_x[np.newaxis, :]
        l_top_y = l_top_y[np.newaxis, :]
        l_bottom_y = l_bottom_y[np.newaxis, :]

        ind_lx = (center_x - l_left_x) > 0
        ind_rx = (center_x - l_right_x) < 0
        ind_ty = (center_y - l_top_y) > 0
        ind_by = (center_y - l_bottom_y) < 0
        ind_cls = (center_points[:, 2][:, np.newaxis] -
                   l_detections[:, -1][np.newaxis, :]) == 0
        ind_l_new_score = np.max(((ind_lx + 0) & (ind_rx + 0) & (ind_ty + 0) &
                                  (ind_by + 0) & (ind_cls + 0)),
                                 axis=0) == 1
        index_l_new_score = np.argmax(
            ((ind_lx + 0) & (ind_rx + 0) & (ind_ty + 0) & (ind_by + 0) &
             (ind_cls + 0))[:, ind_l_new_score],
            axis=0)
        l_detections[:, 4][ind_l_new_score] = (
            l_temp_score[ind_l_new_score] * 2 +
            center_points[index_l_new_score, 3]) / 3

        detections = np.concatenate([l_detections, s_detections], axis=0)
        detections = detections[np.argsort(-detections[:, 4])]
        classes = detections[..., -1]

        #for i in range(detections.shape[0]):
        #   box_width = detections[i,2]-detections[i,0]
        #   box_height = detections[i,3]-detections[i,1]
        #   if box_width*box_height<=22500 and detections[i,4]!=-1:
        #     left_x = (2*detections[i,0]+1*detections[i,2])/3
        #     right_x = (1*detections[i,0]+2*detections[i,2])/3
        #     top_y = (2*detections[i,1]+1*detections[i,3])/3
        #     bottom_y = (1*detections[i,1]+2*detections[i,3])/3
        #     temp_score = copy.copy(detections[i,4])
        #     detections[i,4] = -1
        #     for j in range(center_points.shape[0]):
        #        if (classes[i] == center_points[j,2])and \
        #           (center_points[j,0]>left_x and center_points[j,0]< right_x) and \
        #           ((center_points[j,1]>top_y and center_points[j,1]< bottom_y)):
        #           detections[i,4] = (temp_score*2 + center_points[j,3])/3
        #           break
        #   elif box_width*box_height > 22500 and detections[i,4]!=-1:
        #     left_x = (3*detections[i,0]+2*detections[i,2])/5
        #     right_x = (2*detections[i,0]+3*detections[i,2])/5
        #     top_y = (3*detections[i,1]+2*detections[i,3])/5
        #     bottom_y = (2*detections[i,1]+3*detections[i,3])/5
        #     temp_score = copy.copy(detections[i,4])
        #     detections[i,4] = -1
        #     for j in range(center_points.shape[0]):
        #        if (classes[i] == center_points[j,2])and \
        #           (center_points[j,0]>left_x and center_points[j,0]< right_x) and \
        #           ((center_points[j,1]>top_y and center_points[j,1]< bottom_y)):
        #           detections[i,4] = (temp_score*2 + center_points[j,3])/3
        #           break
        # reject detections with negative scores
        keep_inds = (detections[:, 4] > -1)
        detections = detections[keep_inds]
        classes = classes[keep_inds]

        top_bboxes[image_id] = {}
        for j in range(categories):
            keep_inds = (classes == j)
            top_bboxes[image_id][j + 1] = detections[keep_inds][:, 0:7].astype(
                np.float32)
            if merge_bbox:
                soft_nms_merge(top_bboxes[image_id][j + 1],
                               Nt=nms_threshold,
                               method=nms_algorithm,
                               weight_exp=weight_exp)
            else:
                soft_nms(top_bboxes[image_id][j + 1],
                         Nt=nms_threshold,
                         method=nms_algorithm)
            top_bboxes[image_id][j + 1] = top_bboxes[image_id][j + 1][:, 0:5]

        scores = np.hstack(
            [top_bboxes[image_id][j][:, -1] for j in range(1, categories + 1)])
        if len(scores) > max_per_image:
            kth = len(scores) - max_per_image
            thresh = np.partition(scores, kth)[kth]
            for j in range(1, categories + 1):
                keep_inds = (top_bboxes[image_id][j][:, -1] >= thresh)
                top_bboxes[image_id][j] = top_bboxes[image_id][j][keep_inds]

        if debug:
            image_file = db.image_file(db_ind)
            image = cv2.imread(image_file)
            im = image[:, :, (2, 1, 0)]
            fig, ax = plt.subplots(figsize=(12, 12))
            fig = ax.imshow(im, aspect='equal')
            plt.axis('off')
            fig.axes.get_xaxis().set_visible(False)
            fig.axes.get_yaxis().set_visible(False)
            #bboxes = {}
            for j in range(1, categories + 1):
                keep_inds = (top_bboxes[image_id][j][:, -1] >= 0.4)
                cat_name = db.class_name(j)
                for bbox in top_bboxes[image_id][j][keep_inds]:
                    bbox = bbox[0:4].astype(np.int32)
                    xmin = bbox[0]
                    ymin = bbox[1]
                    xmax = bbox[2]
                    ymax = bbox[3]
                    #if (xmax - xmin) * (ymax - ymin) > 5184:
                    ax.add_patch(
                        plt.Rectangle((xmin, ymin),
                                      xmax - xmin,
                                      ymax - ymin,
                                      fill=False,
                                      edgecolor=colours[j - 1],
                                      linewidth=4.0))
                    ax.text(xmin + 1,
                            ymin - 3,
                            '{:s}'.format(cat_name),
                            bbox=dict(facecolor=colours[j - 1],
                                      ec='black',
                                      lw=2,
                                      alpha=0.5),
                            fontsize=15,
                            color='white',
                            weight='bold')

            debug_file1 = os.path.join(debug_dir, "{}.pdf".format(db_ind))
            debug_file2 = os.path.join(debug_dir, "{}.jpg".format(db_ind))
            plt.savefig(debug_file1)
            plt.savefig(debug_file2)
            plt.close()
            #cv2.imwrite(debug_file, image, [int(cv2.IMWRITE_JPEG_QUALITY), 100])

    result_json = os.path.join(result_dir, "results.json")
    detections = db.convert_to_coco(top_bboxes)
    with open(result_json, "w") as f:
        json.dump(detections, f)

    cls_ids = list(range(1, categories + 1))
    image_ids = [db.image_ids(ind) for ind in db_inds]
    db.evaluate(result_json, cls_ids, image_ids)
    return 0
示例#12
0
文件: coco.py 项目: yawudede/CPNDet
def post_process(db, debug, num_images, weight_exp, merge_bbox, categories, 
            nms_threshold, max_per_image, nms_algorithm, det_queue, top_bboxes_queue): 
    top_bboxes = {}
    for ind in range(0, num_images):
        det_bboxes = det_queue.get(block=True)
        detections = det_bboxes[0]
        classes = det_bboxes[1]
        image_id = det_bboxes[2]
        
        top_bboxes[image_id] = {}
        for j in range(categories):
            keep_inds = (classes == j)
            top_bboxes[image_id][j + 1] = detections[keep_inds][:, 0:7].astype(np.float32)
            if merge_bbox:
                soft_nms_merge(top_bboxes[image_id][j + 1], Nt=nms_threshold, method=nms_algorithm, weight_exp=weight_exp)
            else:
                soft_nms(top_bboxes[image_id][j + 1], Nt=nms_threshold, method=nms_algorithm)
            top_bboxes[image_id][j + 1] = top_bboxes[image_id][j + 1][:, 0:5]

        scores = np.hstack([
            top_bboxes[image_id][j][:, -1] 
            for j in range(1, categories + 1)
        ])
        if len(scores) > max_per_image:
            kth    = len(scores) - max_per_image
            thresh = np.partition(scores, kth)[kth]
            for j in range(1, categories + 1):
                keep_inds = (top_bboxes[image_id][j][:, -1] >= thresh)
                top_bboxes[image_id][j] = top_bboxes[image_id][j][keep_inds]
#         if debug:
#             image_file = db.image_file(ind)
#             image      = cv2.imread(image_file)
#             im         = image[:, :, (2, 1, 0)]
#             fig, ax    = plt.subplots(figsize=(12, 12)) 
#             fig        = ax.imshow(im, aspect='equal')
#             plt.axis('off')
#             fig.axes.get_xaxis().set_visible(False)
#             fig.axes.get_yaxis().set_visible(False)
#             #bboxes = {}
#             for j in range(1, categories + 1):
#                 keep_inds = (top_bboxes[image_id][j][:, -1] >= 0)
#                 cat_name  = db.class_name(j)
#                 for bbox in top_bboxes[image_id][j][keep_inds]:
#                   bbox  = bbox[0:4].astype(np.int32)
#                   xmin     = bbox[0]
#                   ymin     = bbox[1]
#                   xmax     = bbox[2]
#                   ymax     = bbox[3]
#                   #if (xmax - xmin) * (ymax - ymin) > 5184:
#                   ax.add_patch(plt.Rectangle((xmin, ymin),xmax - xmin, ymax - ymin, fill=False, edgecolor= colours[j-1], 
#                                linewidth=4.0))
#                   ax.text(xmin+1, ymin-3, '{:s}'.format(cat_name), bbox=dict(facecolor= colours[j-1], ec='black', lw=2,alpha=0.5),
#                           fontsize=15, color='white', weight='bold')

#             #debug_file1 = os.path.join("validations/{}.pdf".format(db_ind))
#             #debug_file2 = os.path.join("validations/{}.jpg".format(db_ind))
#             #plt.savefig(debug_file1)
#             #plt.savefig(debug_file2)
#             plt.close()
#             #cv2.imwrite(debug_file, image, [int(cv2.IMWRITE_JPEG_QUALITY), 100])

    top_bboxes_queue.put(top_bboxes)
示例#13
0
def inference(db, nnet, image, decode_func=kp_decode):
    K = db.configs["top_k"]
    ae_threshold = db.configs["ae_threshold"]
    nms_kernel = db.configs["nms_kernel"]

    scales = db.configs["test_scales"]
    weight_exp = db.configs["weight_exp"]
    merge_bbox = db.configs["merge_bbox"]
    categories = db.configs["categories"]
    nms_threshold = db.configs["nms_threshold"]
    max_per_image = db.configs["max_per_image"]
    nms_algorithm = {
        "nms": 0,
        "linear_soft_nms": 1,
        "exp_soft_nms": 2
    }[db.configs["nms_algorithm"]]

    height, width = image.shape[0:2]
    detections, center_points = [], []

    for scale in scales:
        new_height = int(height * scale)
        new_width = int(width * scale)
        new_center = np.array([new_height // 2, new_width // 2])

        inp_height = new_height | 127
        inp_width = new_width | 127

        images = np.zeros((1, 3, inp_height, inp_width), dtype=np.float32)
        ratios = np.zeros((1, 2), dtype=np.float32)
        borders = np.zeros((1, 4), dtype=np.float32)
        sizes = np.zeros((1, 2), dtype=np.float32)

        out_height, out_width = (inp_height + 1) // 4, (inp_width + 1) // 4
        height_ratio = out_height / inp_height
        width_ratio = out_width / inp_width

        resized_image = cv2.resize(image, (new_width, new_height))
        resized_image, border, offset = crop_image(resized_image, new_center,
                                                   [inp_height, inp_width])

        resized_image = resized_image / 255.
        normalize_(resized_image, db.mean, db.std)

        images[0] = resized_image.transpose((2, 0, 1))
        borders[0] = border
        sizes[0] = [int(height * scale), int(width * scale)]
        ratios[0] = [height_ratio, width_ratio]

        images = np.concatenate((images, images[:, :, :, ::-1]), axis=0)
        images = torch.from_numpy(images)
        dets, center = decode_func(nnet,
                                   images,
                                   K,
                                   ae_threshold=ae_threshold,
                                   kernel=nms_kernel)
        dets = dets.reshape(2, -1,
                            8)  # bboxes, scores, tl_scores, br_scores, clses
        center = center.reshape(2, -1, 4)  # ct_xs, ct_ys, ct_clses, ct_scores
        dets[1, :, [0, 2]] = out_width - dets[1, :, [2, 0]]  # flip
        center[1, :, [0]] = out_width - center[1, :, [0]]  # horizontal flip
        dets = dets.reshape(1, -1, 8)
        center = center.reshape(1, -1, 4)

        _rescale_dets(dets, ratios, borders, sizes)
        center[..., [0]] /= ratios[:, 1][:, None,
                                         None]  # remap to origin image
        center[..., [1]] /= ratios[:, 0][:, None, None]
        center[..., [0]] -= borders[:, 2][:, None, None]
        center[..., [1]] -= borders[:, 0][:, None, None]
        np.clip(center[..., [0]],
                0,
                sizes[:, 1][:, None, None],
                out=center[..., [0]])
        np.clip(center[..., [1]],
                0,
                sizes[:, 0][:, None, None],
                out=center[..., [1]])
        dets[:, :, 0:4] /= scale
        center[:, :, 0:2] /= scale  # remap to origin image

        if scale == 1:
            center_points.append(center)
        detections.append(dets)

    detections = np.concatenate(detections, axis=1)
    center_points = np.concatenate(center_points, axis=1)

    classes = detections[..., -1]
    classes = classes[0]
    detections = detections[0]
    center_points = center_points[0]

    valid_ind = detections[:, 4] > -1
    valid_detections = detections[valid_ind]

    box_width = valid_detections[:, 2] - valid_detections[:, 0]
    box_height = valid_detections[:, 3] - valid_detections[:, 1]

    s_ind = (box_width * box_height <= 22500)
    l_ind = (box_width * box_height > 22500)

    s_detections = valid_detections[s_ind]
    l_detections = valid_detections[l_ind]
    # trisection
    s_left_x = (2 * s_detections[:, 0] + s_detections[:, 2]) / 3  # x + (y-x)/3
    s_right_x = (s_detections[:, 0] +
                 2 * s_detections[:, 2]) / 3  # x +2(y-x)/3
    s_top_y = (2 * s_detections[:, 1] + s_detections[:, 3]) / 3
    s_bottom_y = (s_detections[:, 1] + 2 * s_detections[:, 3]) / 3

    s_temp_score = copy.copy(s_detections[:, 4])
    s_detections[:, 4] = -1

    center_x = center_points[:, 0][:, np.newaxis]
    center_y = center_points[:, 1][:, np.newaxis]
    s_left_x = s_left_x[np.newaxis, :]
    s_right_x = s_right_x[np.newaxis, :]
    s_top_y = s_top_y[np.newaxis, :]
    s_bottom_y = s_bottom_y[np.newaxis, :]
    # located in center region
    ind_lx = (center_x - s_left_x) > 0
    ind_rx = (center_x - s_right_x) < 0
    ind_ty = (center_y - s_top_y) > 0
    ind_by = (center_y - s_bottom_y) < 0
    # same classes
    ind_cls = (center_points[:, 2][:, np.newaxis] -
               s_detections[:, -1][np.newaxis, :]) == 0
    ind_s_new_score = np.max(((ind_lx + 0) & (ind_rx + 0) & (ind_ty + 0) &
                              (ind_by + 0) & (ind_cls + 0)),
                             axis=0) == 1
    index_s_new_score = np.argmax(
        ((ind_lx + 0) & (ind_rx + 0) & (ind_ty + 0) & (ind_by + 0) &
         (ind_cls + 0))[:, ind_s_new_score],
        axis=0)  # select the box having center located in the center region
    s_detections[:, 4][ind_s_new_score] = (
        s_temp_score[ind_s_new_score] * 2 +
        center_points[index_s_new_score, 3]) / 3

    l_left_x = (3 * l_detections[:, 0] + 2 * l_detections[:, 2]) / 5
    l_right_x = (2 * l_detections[:, 0] + 3 * l_detections[:, 2]) / 5
    l_top_y = (3 * l_detections[:, 1] + 2 * l_detections[:, 3]) / 5
    l_bottom_y = (2 * l_detections[:, 1] + 3 * l_detections[:, 3]) / 5

    l_temp_score = copy.copy(l_detections[:, 4])
    l_detections[:, 4] = -1

    center_x = center_points[:, 0][:, np.newaxis]
    center_y = center_points[:, 1][:, np.newaxis]
    l_left_x = l_left_x[np.newaxis, :]
    l_right_x = l_right_x[np.newaxis, :]
    l_top_y = l_top_y[np.newaxis, :]
    l_bottom_y = l_bottom_y[np.newaxis, :]

    ind_lx = (center_x - l_left_x) > 0
    ind_rx = (center_x - l_right_x) < 0
    ind_ty = (center_y - l_top_y) > 0
    ind_by = (center_y - l_bottom_y) < 0
    ind_cls = (center_points[:, 2][:, np.newaxis] -
               l_detections[:, -1][np.newaxis, :]) == 0
    ind_l_new_score = np.max(((ind_lx + 0) & (ind_rx + 0) & (ind_ty + 0) &
                              (ind_by + 0) & (ind_cls + 0)),
                             axis=0) == 1
    index_l_new_score = np.argmax(
        ((ind_lx + 0) & (ind_rx + 0) & (ind_ty + 0) & (ind_by + 0) &
         (ind_cls + 0))[:, ind_l_new_score],
        axis=0)
    l_detections[:, 4][ind_l_new_score] = (
        l_temp_score[ind_l_new_score] * 2 +
        center_points[index_l_new_score, 3]) / 3

    detections = np.concatenate([l_detections, s_detections], axis=0)
    detections = detections[np.argsort(
        -detections[:, 4])]  # resort according to new scores
    classes = detections[..., -1]

    # reject detections with negative scores
    keep_inds = (detections[:, 4] > -1)
    detections = detections[keep_inds]
    classes = classes[keep_inds]

    # soft_nms
    top_bboxes = {}
    for j in range(categories):
        keep_inds = (classes == j)
        top_bboxes[j + 1] = detections[keep_inds][:, 0:7].astype(np.float32)
        if merge_bbox:
            soft_nms_merge(top_bboxes[j + 1],
                           Nt=nms_threshold,
                           method=nms_algorithm,
                           weight_exp=weight_exp)
        else:
            soft_nms(top_bboxes[j + 1], Nt=nms_threshold, method=nms_algorithm)
        top_bboxes[j + 1] = top_bboxes[j + 1][:, 0:5]

    scores = np.hstack(
        [top_bboxes[j][:, -1] for j in range(1, categories + 1)])
    # select boxes
    if len(scores) > max_per_image:
        kth = len(scores) - max_per_image
        thresh = np.partition(scores, kth)[kth]
        for j in range(1, categories + 1):
            keep_inds = (top_bboxes[j][:, -1] >= thresh)
            top_bboxes[j] = top_bboxes[j][keep_inds]

    return top_bboxes
示例#14
0
def kp_detection(db, nnet, result_dir, debug=True, decode_func=kp_decode):

    db_inds = db.db_inds[:10] if debug else db.db_inds
    num_images = db_inds.size

    K = db.configs["top_k"]
    ae_threshold = db.configs["ae_threshold"]
    nms_kernel = db.configs["nms_kernel"]

    scales = db.configs["test_scales"]
    weight_exp = db.configs["weight_exp"]
    merge_bbox = db.configs["merge_bbox"]
    categories = db.configs["categories"]
    nms_threshold = db.configs["nms_threshold"]
    max_per_image = db.configs["max_per_image"]
    nms_algorithm = {
        "nms": 0,
        "linear_soft_nms": 1,
        "exp_soft_nms": 2
    }[db.configs["nms_algorithm"]]

    top_bboxes = {}

    for ind in tqdm(range(0, num_images), ncols=80, desc="locating kps"):
        db_ind = db_inds[ind]
        image_id = db.image_ids(db_ind)
        image_file = db.image_file(db_ind)
        image = cv2.imread(image_file)

        # Paths
        result_path = result_dir + "/{}".format(image_id[:-4])
        result_json = os.path.join(result_path, "results.json")
        result_debug = os.path.join(result_path, "{}.jpg".format(db_ind))

        if pexists(result_json):
            continue

        # Create dirs
        Path(result_path).mkdir(parents=True, exist_ok=True)

        height, width = image.shape[0:2]

        detections = []
        center_points = []

        for scale in scales:
            new_height = int(height * scale)
            new_width = int(width * scale)
            new_center = np.array([new_height // 2, new_width // 2])

            inp_height = new_height | 127
            inp_width = new_width | 127

            images = np.zeros((1, 3, inp_height, inp_width), dtype=np.float32)
            ratios = np.zeros((1, 2), dtype=np.float32)
            borders = np.zeros((1, 4), dtype=np.float32)
            sizes = np.zeros((1, 2), dtype=np.float32)

            out_height, out_width = (inp_height + 1) // 4, (inp_width + 1) // 4
            height_ratio = out_height / inp_height
            width_ratio = out_width / inp_width

            resized_image = cv2.resize(image, (new_width, new_height))
            resized_image, border, offset = crop_image(resized_image,
                                                       new_center,
                                                       [inp_height, inp_width])

            resized_image = resized_image / 255.
            normalize_(resized_image, db.mean, db.std)

            images[0] = resized_image.transpose((2, 0, 1))
            borders[0] = border
            sizes[0] = [int(height * scale), int(width * scale)]
            ratios[0] = [height_ratio, width_ratio]

            images = np.concatenate((images, images[:, :, :, ::-1]), axis=0)
            images = torch.from_numpy(images)
            dets, center = decode_func(nnet,
                                       images,
                                       K,
                                       ae_threshold=ae_threshold,
                                       kernel=nms_kernel)
            dets = dets.reshape(2, -1, 8)
            center = center.reshape(2, -1, 4)
            dets[1, :, [0, 2]] = out_width - dets[1, :, [2, 0]]
            center[1, :, [0]] = out_width - center[1, :, [0]]
            dets = dets.reshape(1, -1, 8)
            center = center.reshape(1, -1, 4)

            _rescale_dets(dets, ratios, borders, sizes)
            center[..., [0]] /= ratios[:, 1][:, None, None]
            center[..., [1]] /= ratios[:, 0][:, None, None]
            center[..., [0]] -= borders[:, 2][:, None, None]
            center[..., [1]] -= borders[:, 0][:, None, None]
            np.clip(center[..., [0]],
                    0,
                    sizes[:, 1][:, None, None],
                    out=center[..., [0]])
            np.clip(center[..., [1]],
                    0,
                    sizes[:, 0][:, None, None],
                    out=center[..., [1]])
            dets[:, :, 0:4] /= scale
            center[:, :, 0:2] /= scale

            if scale == 1:
                center_points.append(center)
            detections.append(dets)

        detections = np.concatenate(detections, axis=1)
        center_points = np.concatenate(center_points, axis=1)

        classes = detections[..., -1]
        classes = classes[0]
        detections = detections[0]
        center_points = center_points[0]

        valid_ind = detections[:, 4] > -1
        valid_detections = detections[valid_ind]

        box_width = valid_detections[:, 2] - valid_detections[:, 0]
        box_height = valid_detections[:, 3] - valid_detections[:, 1]

        s_ind = (box_width * box_height <= 22500)
        l_ind = (box_width * box_height > 22500)

        s_detections = valid_detections[s_ind]
        l_detections = valid_detections[l_ind]

        s_left_x = (2 * s_detections[:, 0] + s_detections[:, 2]) / 3
        s_right_x = (s_detections[:, 0] + 2 * s_detections[:, 2]) / 3
        s_top_y = (2 * s_detections[:, 1] + s_detections[:, 3]) / 3
        s_bottom_y = (s_detections[:, 1] + 2 * s_detections[:, 3]) / 3

        s_temp_score = copy.copy(s_detections[:, 4])
        s_detections[:, 4] = -1

        center_x = center_points[:, 0][:, np.newaxis]
        center_y = center_points[:, 1][:, np.newaxis]
        s_left_x = s_left_x[np.newaxis, :]
        s_right_x = s_right_x[np.newaxis, :]
        s_top_y = s_top_y[np.newaxis, :]
        s_bottom_y = s_bottom_y[np.newaxis, :]

        ind_lx = (center_x - s_left_x) > 0
        ind_rx = (center_x - s_right_x) < 0
        ind_ty = (center_y - s_top_y) > 0
        ind_by = (center_y - s_bottom_y) < 0
        ind_cls = (center_points[:, 2][:, np.newaxis] -
                   s_detections[:, -1][np.newaxis, :]) == 0
        ind_s_new_score = np.max(((ind_lx + 0) & (ind_rx + 0) & (ind_ty + 0) &
                                  (ind_by + 0) & (ind_cls + 0)),
                                 axis=0) == 1
        index_s_new_score = np.argmax(
            ((ind_lx + 0) & (ind_rx + 0) & (ind_ty + 0) & (ind_by + 0) &
             (ind_cls + 0))[:, ind_s_new_score],
            axis=0)
        s_detections[:, 4][ind_s_new_score] = (
            s_temp_score[ind_s_new_score] * 2 +
            center_points[index_s_new_score, 3]) / 3

        l_left_x = (3 * l_detections[:, 0] + 2 * l_detections[:, 2]) / 5
        l_right_x = (2 * l_detections[:, 0] + 3 * l_detections[:, 2]) / 5
        l_top_y = (3 * l_detections[:, 1] + 2 * l_detections[:, 3]) / 5
        l_bottom_y = (2 * l_detections[:, 1] + 3 * l_detections[:, 3]) / 5

        l_temp_score = copy.copy(l_detections[:, 4])
        l_detections[:, 4] = -1

        center_x = center_points[:, 0][:, np.newaxis]
        center_y = center_points[:, 1][:, np.newaxis]
        l_left_x = l_left_x[np.newaxis, :]
        l_right_x = l_right_x[np.newaxis, :]
        l_top_y = l_top_y[np.newaxis, :]
        l_bottom_y = l_bottom_y[np.newaxis, :]

        ind_lx = (center_x - l_left_x) > 0
        ind_rx = (center_x - l_right_x) < 0
        ind_ty = (center_y - l_top_y) > 0
        ind_by = (center_y - l_bottom_y) < 0
        ind_cls = (center_points[:, 2][:, np.newaxis] -
                   l_detections[:, -1][np.newaxis, :]) == 0
        ind_l_new_score = np.max(((ind_lx + 0) & (ind_rx + 0) & (ind_ty + 0) &
                                  (ind_by + 0) & (ind_cls + 0)),
                                 axis=0) == 1
        index_l_new_score = np.argmax(
            ((ind_lx + 0) & (ind_rx + 0) & (ind_ty + 0) & (ind_by + 0) &
             (ind_cls + 0))[:, ind_l_new_score],
            axis=0)
        l_detections[:, 4][ind_l_new_score] = (
            l_temp_score[ind_l_new_score] * 2 +
            center_points[index_l_new_score, 3]) / 3

        detections = np.concatenate([l_detections, s_detections], axis=0)
        detections = detections[np.argsort(-detections[:, 4])]
        classes = detections[..., -1]

        keep_inds = (detections[:, 4] > -1)
        detections = detections[keep_inds]
        classes = classes[keep_inds]

        top_bboxes[image_id] = {}
        for j in range(categories):
            keep_inds = (classes == j)
            top_bboxes[image_id][j + 1] = detections[keep_inds][:, 0:7].astype(
                np.float32)
            if merge_bbox:
                soft_nms_merge(top_bboxes[image_id][j + 1],
                               Nt=nms_threshold,
                               method=nms_algorithm,
                               weight_exp=weight_exp)
            else:
                soft_nms(top_bboxes[image_id][j + 1],
                         Nt=nms_threshold,
                         method=nms_algorithm)
            top_bboxes[image_id][j + 1] = top_bboxes[image_id][j + 1][:, 0:5]

        scores = np.hstack(
            [top_bboxes[image_id][j][:, -1] for j in range(1, categories + 1)])
        if len(scores) > max_per_image:
            kth = len(scores) - max_per_image
            thresh = np.partition(scores, kth)[kth]
            for j in range(1, categories + 1):
                keep_inds = (top_bboxes[image_id][j][:, -1] >= thresh)
                top_bboxes[image_id][j] = top_bboxes[image_id][j][keep_inds]

        detections = db.parse_detections(top_bboxes[image_id])

        # if no valid detections
        if len(detections) == 0:
            # shutil.rmtree(Path(result_dir + "/{}".format(image_id[:-4])))
            continue
        else:
            # Save JSON
            with open(result_json, "w") as f:
                json.dump(detections, f)

        # Save also images with labels
        if debug:
            # Get image
            image_file = db.image_file(db_ind)
            image = cv2.imread(image_file)
            im = image[:, :, (2, 1, 0)]

            # Create matplotlib fig
            fig, ax = plt.subplots(figsize=(12, 12))
            fig = ax.imshow(im, aspect='equal')
            plt.axis('off')
            fig.axes.get_xaxis().set_visible(False)
            fig.axes.get_yaxis().set_visible(False)

            for x in detections:
                bbox = x["bbox"]

                # Get points from width and height
                bbox[2] += bbox[0]
                bbox[3] += bbox[1]

                xmin = bbox[0]
                ymin = bbox[1]
                xmax = bbox[2]
                ymax = bbox[3]

                ax.add_patch(
                    plt.Rectangle((xmin, ymin),
                                  xmax - xmin,
                                  ymax - ymin,
                                  fill=False,
                                  edgecolor=colours[j - 1],
                                  linewidth=4.0))
                ax.text(xmin + 1,
                        ymin - 3,
                        '{:s}'.format(x["category_id"]),
                        bbox=dict(facecolor=colours[j - 1],
                                  ec='black',
                                  lw=2,
                                  alpha=0.5),
                        fontsize=15,
                        color='white',
                        weight='bold')

            plt.savefig(result_debug)
            plt.close()

    return 0
示例#15
0
def test(db, split, testiter, debug=False, suffix=None):
    result_dir = system_configs.result_dir
    result_dir = os.path.join(result_dir, str(testiter), split)
    class_name = []
    for i in range(1, len(db._coco.cats)):
        # if db._coco.cats[i] is None:
        #     continue
        # else:
        ind = db._cat_ids[i]
        class_name.append(db._coco.cats[ind]['name'])
    if suffix is not None:
        result_dir = os.path.join(result_dir, suffix)

    make_dirs([result_dir])

    test_iter = system_configs.max_iter if testiter is None else testiter
    print("loading parameters at iteration: {}".format(test_iter))

    print("building neural network...")
    nnet = NetworkFactory(db)
    print("loading parameters...")
    nnet.load_params(test_iter)

    # test_file = "test.{}".format(db.data)
    # testing = importlib.import_module(test_file).testing

    nnet.cuda()
    nnet.eval_mode()

    debug_dir = os.path.join(result_dir, "debug")
    if not os.path.exists(debug_dir):
        os.makedirs(debug_dir)

    if db.split != "trainval":
        db_inds = db.db_inds[:100] if debug else db.db_inds
    else:
        db_inds = db.db_inds[:100] if debug else db.db_inds[:5000]

    K = db.configs["top_k"]
    ae_threshold = db.configs["ae_threshold"]
    nms_kernel = db.configs["nms_kernel"]

    scales = db.configs["test_scales"]
    weight_exp = db.configs["weight_exp"]
    merge_bbox = db.configs["merge_bbox"]
    categories = db.configs["categories"]
    nms_threshold = db.configs["nms_threshold"]
    max_per_image = db.configs["max_per_image"]
    nms_algorithm = {
        "nms": 0,
        "linear_soft_nms": 1,
        "exp_soft_nms": 2
    }[db.configs["nms_algorithm"]]

    img_name = os.listdir(db._image_dir)
    for i in range(0, len(img_name)):
        top_bboxes = {}
        # for ind in tqdm(range(0, num_images), ncols=80, desc="locating kps"):
        db_ind = i + 1

        # image_id = db.image_ids(db_ind)
        image_id = img_name[i]
        image_file = db._image_dir + '/' + img_name[i]
        image = cv2.imread(image_file)

        height, width = image.shape[0:2]

        detections = []

        for scale in scales:
            new_height = int(height * scale)
            new_width = int(width * scale)
            new_center = np.array([new_height // 2, new_width // 2])

            inp_height = new_height | 127
            inp_width = new_width | 127

            images = np.zeros((1, 3, inp_height, inp_width), dtype=np.float32)
            ratios = np.zeros((1, 2), dtype=np.float32)
            borders = np.zeros((1, 4), dtype=np.float32)
            sizes = np.zeros((1, 2), dtype=np.float32)

            out_height, out_width = (inp_height + 1) // 4, (inp_width + 1) // 4
            height_ratio = out_height / inp_height
            width_ratio = out_width / inp_width

            resized_image = cv2.resize(image, (new_width, new_height))
            resized_image, border, offset = crop_image(resized_image, new_center, [inp_height, inp_width])

            resized_image = resized_image / 255.
            normalize_(resized_image, db.mean, db.std)

            images[0] = resized_image.transpose((2, 0, 1))
            borders[0] = border
            sizes[0] = [int(height * scale), int(width * scale)]
            ratios[0] = [height_ratio, width_ratio]

            images = np.concatenate((images, images[:, :, :, ::-1]), axis=0)
            images = torch.from_numpy(images)
            dets = kp_decode(nnet, images, K, ae_threshold=ae_threshold, kernel=nms_kernel)
            dets = dets.reshape(2, -1, 8)
            dets[1, :, [0, 2]] = out_width - dets[1, :, [2, 0]]
            dets = dets.reshape(1, -1, 8)

            _rescale_dets(dets, ratios, borders, sizes)
            dets[:, :, 0:4] /= scale
            detections.append(dets)

        detections = np.concatenate(detections, axis=1)

        classes = detections[..., -1]
        classes = classes[0]
        detections = detections[0]

        # reject detections with negative scores
        keep_inds = (detections[:, 4] > -1)
        detections = detections[keep_inds]
        classes = classes[keep_inds]

        top_bboxes[image_id] = {}
        for j in range(categories):
            keep_inds = (classes == j)
            top_bboxes[image_id][j + 1] = detections[keep_inds][:, 0:7].astype(np.float32)
            if merge_bbox:
                soft_nms_merge(top_bboxes[image_id][j + 1], Nt=nms_threshold, method=nms_algorithm,
                               weight_exp=weight_exp)
            else:
                soft_nms(top_bboxes[image_id][j + 1], Nt=nms_threshold, method=nms_algorithm)
            top_bboxes[image_id][j + 1] = top_bboxes[image_id][j + 1][:, 0:5]

        scores = np.hstack([
            top_bboxes[image_id][j][:, -1]
            for j in range(1, categories + 1)
        ])
        if len(scores) > max_per_image:
            kth = len(scores) - max_per_image
            thresh = np.partition(scores, kth)[kth]
            for j in range(1, categories + 1):
                keep_inds = (top_bboxes[image_id][j][:, -1] >= thresh)
                top_bboxes[image_id][j] = top_bboxes[image_id][j][keep_inds]

        # result_json = os.path.join(result_dir, "results.json")
        detections = db.convert_to_list(top_bboxes)
        print('demo for {}'.format(image_id))
        img = cv2.imread(image_file)
        box = []
        if detections is not None:
            for i in range(len(detections)):
                name = db._coco.cats[detections[i][1]]['name']  #db._coco.cats[ind]['name']
                confi = detections[i][-1]
                if confi <0.3:
                    continue
                for j in range(0, 4):
                    box.append(detections[i][j + 2])
                cv2.rectangle(img, (int(box[0]), int(box[1])), (int(box[2]), int(box[3])), (0, 255, 255), 1)
                # cv2.putText(img, name[0] + '  ' + '{:.3f}'.format(confi), (int(box[0]), int(box[1] - 10)),
                #             cv2.FONT_ITALIC, 1, (0, 0, 255), 1)
                while (box):
                    box.pop(-1)
        cv2.imshow('Detecting image...', img)
        # timer.total_time = 0
        if cv2.waitKey(3000) & 0xFF == ord('q'):
            break
        print(detections)
示例#16
0
def test_MatrixNetCorners(db, nnet, result_dir, debug=False, decode_func=kp_decode):
    debug_dir = os.path.join(result_dir, "debug")
    if not os.path.exists(debug_dir):
        os.makedirs(debug_dir)

    if db.split != "trainval":
        db_inds = db.db_inds[:200] if debug else db.db_inds
    else:
        db_inds = db.db_inds[:100] if debug else db.db_inds[:100]
    num_images = db_inds.size

    K             = db.configs["top_k"]
    matching_threshold  = db.configs["matching_threshold"]
    nms_kernel    = db.configs["nms_kernel"]
    flag_flip_images=db.configs["test_flip_images"]
    max_dim = db.configs["test_image_max_dim"]
    
    scales        = db.configs["test_scales"]
    weight_exp    = db.configs["weight_exp"]
    merge_bbox    = db.configs["merge_bbox"]
    categories    = db.configs["categories"]
    nms_threshold = db.configs["nms_threshold"]
    max_per_image = db.configs["max_per_image"]
    layers_range = db.configs["layers_range"]
    input_size = db.configs["input_size"]
    output_kernel_size = db.configs["output_kernel_size"]
    
    _dict={}
    output_sizes=[]
    for i,l in enumerate(layers_range):
        for j,e in enumerate(l):
            if e !=-1:
                output_sizes.append([input_size[0]//(8*2**(j)), input_size[1]//(8*2**(i))])
                _dict[(i+1)*10+(j+1)]=e
    layers_range=[_dict[i] for i in sorted(_dict)]

    
    layers_range = [[lr[0] * os[0]/input_size[0], lr[1] * os[0]/input_size[0],
                    lr[2] * os[1]/input_size[1], lr[3] * os[1]/input_size[1]] for (lr, os) in zip (layers_range, output_sizes)]
    
    
    nms_algorithm = {
        "nms": 0,
        "linear_soft_nms": 1, 
        "exp_soft_nms": 2
    }[db.configs["nms_algorithm"]]
    

    top_bboxes = {}
    for ind in tqdm(range(0, num_images), ncols=80, desc="locating kps"):
        db_ind = db_inds[ind]

        image_id   = db.image_ids(db_ind)
        image_file = db.image_file(db_ind)
        image      = cv2.imread(image_file)

        height, width = image.shape[0:2]
        detections = []
        for scale in scales:
            org_scale = scale

            scale = scale * min((max_dim)/float(height), (max_dim)/float(width))
            new_height = int(height * scale)
            new_width  = int(width * scale)
            new_center = np.array([new_height // 2, new_width // 2])

            inp_height = ((new_height // 128) + 1) * 128
            inp_width  = ((new_width  // 128) + 1) * 128
        
            images  = np.zeros((1, 3, inp_height, inp_width), dtype=np.float32)
            ratios  = np.zeros((1, 2), dtype=np.float32)
            borders = np.zeros((1, 4), dtype=np.float32)
            sizes   = np.zeros((1, 2), dtype=np.float32)

            out_height, out_width = ((inp_height) // 8, (inp_width) // 8)
            height_ratio = out_height / inp_height
            width_ratio  = out_width  / inp_width

            resized_image = cv2.resize(image, (new_width, new_height))
            resized_image, border, offset = crop_image(resized_image, new_center, [inp_height, inp_width])

            resized_image = resized_image / 255.
            

            images[0]  = resized_image.transpose((2, 0, 1))
            borders[0] = border
            sizes[0]   = [int(height * scale), int(width * scale)]
            ratios[0]  = [height_ratio, width_ratio]
            if flag_flip_images:
                
                images = np.concatenate((images, images[:, :, :, ::-1]), axis=0)
            images = torch.from_numpy(images)
            dets   = decode_func(nnet, images, K, matching_threshold=matching_threshold, kernel=nms_kernel,
                                 layers_range=layers_range, output_kernel_size = output_kernel_size, output_sizes=output_sizes,input_size=input_size)
        
            if flag_flip_images:
                dets   = dets.reshape(2, -1, 8)
                dets[1, :, [0, 2]] = out_width - dets[1, :, [2, 0]]
                dets   = dets.reshape(1, -1, 8)

            _rescale_dets(dets, ratios, borders, sizes)
            dets[:, :, 0:4] /= scale
            detections.append(dets)

        detections = np.concatenate(detections, axis=1)

        classes    = detections[..., -1]
        classes    = classes[0]
        detections = detections[0]

        # reject detections with negative scores
        keep_inds  = (detections[:, 4] > 0)
        detections = detections[keep_inds]
        classes    = classes[keep_inds]

        top_bboxes[image_id] = {}
        for j in range(categories):
            keep_inds = (classes == j)
            top_bboxes[image_id][j + 1] = detections[keep_inds][:, 0:7].astype(np.float32)
            if merge_bbox:
                soft_nms_merge(top_bboxes[image_id][j + 1], Nt=nms_threshold, method=nms_algorithm, weight_exp=weight_exp)
            else:
                soft_nms(top_bboxes[image_id][j + 1], Nt=nms_threshold, method=nms_algorithm)
            top_bboxes[image_id][j + 1] = top_bboxes[image_id][j + 1][:, 0:5]

        scores = np.hstack([
            top_bboxes[image_id][j][:, -1] 
            for j in range(1, categories + 1)
        ])
        if len(scores) > max_per_image:
            kth    = len(scores) - max_per_image
            thresh = np.partition(scores, kth)[kth]
            for j in range(1, categories + 1):
                keep_inds = (top_bboxes[image_id][j][:, -1] >= thresh)
                top_bboxes[image_id][j] = top_bboxes[image_id][j][keep_inds]
        if debug:
            image_file = db.image_file(db_ind)
            image      = cv2.imread(image_file)
            bboxes = {}
            for j in range(categories, 0, -1):
                keep_inds = (top_bboxes[image_id][j][:, -1] > 0.2)
                cat_name  = db.class_name(j)
                cat_size  = cv2.getTextSize(cat_name, cv2.FONT_HERSHEY_SIMPLEX, 0.5, 2)[0]
                color     = np.random.random((3, )) * 0.6 + 0.4
                color     = color * 255
                color     = color.astype(np.int32).tolist()
                for bbox in top_bboxes[image_id][j][keep_inds]:
                    bbox  = bbox[0:4].astype(np.int32)
                    if bbox[1] - cat_size[1] - 2 < 0:
                        cv2.rectangle(image,
                            (bbox[0], bbox[1] + 2),
                            (bbox[0] + cat_size[0], bbox[1] + cat_size[1] + 2),
                            color, -1
                        )
                        cv2.putText(image, cat_name, 
                            (bbox[0], bbox[1] + cat_size[1] + 2), 
                            cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 0), thickness=1
                        )
                    else:
                        cv2.rectangle(image,
                            (bbox[0], bbox[1] - cat_size[1] - 2),
                            (bbox[0] + cat_size[0], bbox[1] - 2),
                            color, -1
                        )
                        cv2.putText(image, cat_name, 
                            (bbox[0], bbox[1] - 2), 
                            cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 0), thickness=1
                        )
                    cv2.rectangle(image,
                        (bbox[0], bbox[1]),
                        (bbox[2], bbox[3]),
                        color, 2
                    )
            debug_file = os.path.join(debug_dir, "{}.jpg".format(db_ind))
            print(debug_file)
            cv2.imwrite(debug_file,image)

    result_json = os.path.join(result_dir, "results.json")

    detections  = db.convert_to_coco(top_bboxes)
    with open(result_json, "w") as f:
        json.dump(detections, f)
    
    cls_ids   = list(range(1, categories + 1))
    image_ids = [db.image_ids(ind) for ind in db_inds]
    db.evaluate(result_json, cls_ids, image_ids)
    return 0
示例#17
0
def apply_detection(image,
                    nnet,
                    scales,
                    decode_func,
                    top_k,
                    avg,
                    std,
                    categories,
                    merge_bbox,
                    max_per_image=100,
                    ae_threshold=0.5,
                    nms_kernel=3,
                    nms_algorithm=2,
                    nms_threshold=0.45,
                    weight_exp=1):

    height, width = image.shape[0:2]

    detections = []
    center_points = []

    for scale in scales:
        new_height = int(height * scale)
        new_width = int(width * scale)
        new_center = np.array([new_height // 2, new_width // 2])

        # N | M = M if N <= M else (N%M)*M+1
        inp_height = new_height | 127
        inp_width = new_width | 127

        images = np.zeros((1, 3, inp_height, inp_width), dtype=np.float32)
        ratios = np.zeros((1, 2), dtype=np.float32)
        borders = np.zeros((1, 4), dtype=np.float32)
        sizes = np.zeros((1, 2), dtype=np.float32)

        out_height, out_width = (inp_height + 1) // 4, (inp_width + 1) // 4
        height_ratio = out_height / inp_height
        width_ratio = out_width / inp_width

        resized_image = cv2.resize(image, (new_width, new_height))
        resized_image, border, offset = crop_image(resized_image, new_center,
                                                   [inp_height, inp_width])

        resized_image = resized_image / 255.
        normalize_(resized_image, avg, std)

        images[0] = resized_image.transpose((2, 0, 1))
        borders[0] = border
        sizes[0] = [int(height * scale), int(width * scale)]
        ratios[0] = [height_ratio, width_ratio]

        images = np.concatenate((images, images[:, :, :, ::-1]), axis=0)
        images = torch.from_numpy(images)
        # do detection
        dets, center = decode_func(nnet,
                                   images,
                                   top_k,
                                   ae_threshold=ae_threshold,
                                   kernel=nms_kernel)
        # post processing
        dets = dets.reshape(2, -1, 8)
        center = center.reshape(2, -1, 4)
        dets[1, :, [0, 2]] = out_width - dets[1, :, [2, 0]]
        center[1, :, [0]] = out_width - center[1, :, [0]]
        dets = dets.reshape(1, -1, 8)
        center = center.reshape(1, -1, 4)

        _rescale_dets(dets, ratios, borders, sizes)
        center[..., [0]] /= ratios[:, 1][:, None, None]
        center[..., [1]] /= ratios[:, 0][:, None, None]
        center[..., [0]] -= borders[:, 2][:, None, None]
        center[..., [1]] -= borders[:, 0][:, None, None]
        np.clip(center[..., [0]],
                0,
                sizes[:, 1][:, None, None],
                out=center[..., [0]])
        np.clip(center[..., [1]],
                0,
                sizes[:, 0][:, None, None],
                out=center[..., [1]])
        dets[:, :, 0:4] /= scale
        center[:, :, 0:2] /= scale

        if scale == 1:
            center_points.append(center)
        detections.append(dets)

    detections = np.concatenate(detections, axis=1)
    center_points = np.concatenate(center_points, axis=1)

    classes = detections[..., -1]
    classes = classes[0]
    detections = detections[0]
    center_points = center_points[0]

    valid_ind = detections[:, 4] > -1
    valid_detections = detections[valid_ind]

    box_width = valid_detections[:, 2] - valid_detections[:, 0]
    box_height = valid_detections[:, 3] - valid_detections[:, 1]

    s_ind = (box_width * box_height <= 22500)
    l_ind = (box_width * box_height > 22500)

    s_detections = valid_detections[s_ind]
    l_detections = valid_detections[l_ind]

    s_left_x = (2 * s_detections[:, 0] + s_detections[:, 2]) / 3
    s_right_x = (s_detections[:, 0] + 2 * s_detections[:, 2]) / 3
    s_top_y = (2 * s_detections[:, 1] + s_detections[:, 3]) / 3
    s_bottom_y = (s_detections[:, 1] + 2 * s_detections[:, 3]) / 3

    s_temp_score = copy.copy(s_detections[:, 4])
    s_detections[:, 4] = -1

    center_x = center_points[:, 0][:, np.newaxis]
    center_y = center_points[:, 1][:, np.newaxis]
    s_left_x = s_left_x[np.newaxis, :]
    s_right_x = s_right_x[np.newaxis, :]
    s_top_y = s_top_y[np.newaxis, :]
    s_bottom_y = s_bottom_y[np.newaxis, :]

    ind_lx = (center_x - s_left_x) > 0
    ind_rx = (center_x - s_right_x) < 0
    ind_ty = (center_y - s_top_y) > 0
    ind_by = (center_y - s_bottom_y) < 0
    ind_cls = (center_points[:, 2][:, np.newaxis] -
               s_detections[:, -1][np.newaxis, :]) == 0
    ind_s_new_score = np.max(((ind_lx + 0) & (ind_rx + 0) & (ind_ty + 0) &
                              (ind_by + 0) & (ind_cls + 0)),
                             axis=0) == 1
    index_s_new_score = np.argmax(
        ((ind_lx + 0) & (ind_rx + 0) & (ind_ty + 0) & (ind_by + 0) &
         (ind_cls + 0))[:, ind_s_new_score],
        axis=0)
    s_detections[:, 4][ind_s_new_score] = (
        s_temp_score[ind_s_new_score] * 2 +
        center_points[index_s_new_score, 3]) / 3

    l_left_x = (3 * l_detections[:, 0] + 2 * l_detections[:, 2]) / 5
    l_right_x = (2 * l_detections[:, 0] + 3 * l_detections[:, 2]) / 5
    l_top_y = (3 * l_detections[:, 1] + 2 * l_detections[:, 3]) / 5
    l_bottom_y = (2 * l_detections[:, 1] + 3 * l_detections[:, 3]) / 5

    l_temp_score = copy.copy(l_detections[:, 4])
    l_detections[:, 4] = -1

    center_x = center_points[:, 0][:, np.newaxis]
    center_y = center_points[:, 1][:, np.newaxis]
    l_left_x = l_left_x[np.newaxis, :]
    l_right_x = l_right_x[np.newaxis, :]
    l_top_y = l_top_y[np.newaxis, :]
    l_bottom_y = l_bottom_y[np.newaxis, :]

    ind_lx = (center_x - l_left_x) > 0
    ind_rx = (center_x - l_right_x) < 0
    ind_ty = (center_y - l_top_y) > 0
    ind_by = (center_y - l_bottom_y) < 0
    ind_cls = (center_points[:, 2][:, np.newaxis] -
               l_detections[:, -1][np.newaxis, :]) == 0
    ind_l_new_score = np.max(((ind_lx + 0) & (ind_rx + 0) & (ind_ty + 0) &
                              (ind_by + 0) & (ind_cls + 0)),
                             axis=0) == 1
    index_l_new_score = np.argmax(
        ((ind_lx + 0) & (ind_rx + 0) & (ind_ty + 0) & (ind_by + 0) &
         (ind_cls + 0))[:, ind_l_new_score],
        axis=0)
    l_detections[:, 4][ind_l_new_score] = (
        l_temp_score[ind_l_new_score] * 2 +
        center_points[index_l_new_score, 3]) / 3

    detections = np.concatenate([l_detections, s_detections], axis=0)
    detections = detections[np.argsort(-detections[:, 4])]
    classes = detections[..., -1]

    #for i in range(detections.shape[0]):
    #   box_width = detections[i,2]-detections[i,0]
    #   box_height = detections[i,3]-detections[i,1]
    #   if box_width*box_height<=22500 and detections[i,4]!=-1:
    #     left_x = (2*detections[i,0]+1*detections[i,2])/3
    #     right_x = (1*detections[i,0]+2*detections[i,2])/3
    #     top_y = (2*detections[i,1]+1*detections[i,3])/3
    #     bottom_y = (1*detections[i,1]+2*detections[i,3])/3
    #     temp_score = copy.copy(detections[i,4])
    #     detections[i,4] = -1
    #     for j in range(center_points.shape[0]):
    #        if (classes[i] == center_points[j,2])and \
    #           (center_points[j,0]>left_x and center_points[j,0]< right_x) and \
    #           ((center_points[j,1]>top_y and center_points[j,1]< bottom_y)):
    #           detections[i,4] = (temp_score*2 + center_points[j,3])/3
    #           break
    #   elif box_width*box_height > 22500 and detections[i,4]!=-1:
    #     left_x = (3*detections[i,0]+2*detections[i,2])/5
    #     right_x = (2*detections[i,0]+3*detections[i,2])/5
    #     top_y = (3*detections[i,1]+2*detections[i,3])/5
    #     bottom_y = (2*detections[i,1]+3*detections[i,3])/5
    #     temp_score = copy.copy(detections[i,4])
    #     detections[i,4] = -1
    #     for j in range(center_points.shape[0]):
    #        if (classes[i] == center_points[j,2])and \
    #           (center_points[j,0]>left_x and center_points[j,0]< right_x) and \
    #           ((center_points[j,1]>top_y and center_points[j,1]< bottom_y)):
    #           detections[i,4] = (temp_score*2 + center_points[j,3])/3
    #           break
    # reject detections with negative scores
    keep_inds = (detections[:, 4] > -1)
    detections = detections[keep_inds]
    classes = classes[keep_inds]

    final_bboxes = {}
    for j in range(categories):
        keep_inds = (classes == j)
        final_bboxes[j + 1] = detections[keep_inds][:, 0:7].astype(np.float32)
        if merge_bbox:
            soft_nms_merge(final_bboxes[j + 1],
                           Nt=nms_threshold,
                           method=nms_algorithm,
                           weight_exp=weight_exp)
        else:
            soft_nms(final_bboxes[j + 1],
                     Nt=nms_threshold,
                     method=nms_algorithm)
        final_bboxes[j + 1] = final_bboxes[j + 1][:, 0:5]

    scores = np.hstack(
        [final_bboxes[j][:, -1] for j in range(1, categories + 1)])
    if len(scores) > max_per_image:
        kth = len(scores) - max_per_image
        thresh = np.partition(scores, kth)[kth]
        for j in range(1, categories + 1):
            keep_inds = (final_bboxes[j][:, -1] >= thresh)
            final_bboxes[j] = final_bboxes[j][keep_inds]
    return final_bboxes
示例#18
0
def kp_detection(db, nnet, result_dir, debug=False, decode_func=kp_decode):
    debug_dir = os.path.join(result_dir, "debug")
    if not os.path.exists(debug_dir):
        os.makedirs(debug_dir)

    partial_num = 3000
    db_inds = db.db_inds[:partial_num] if debug else db.db_inds

    K = db.configs["top_k"]
    ae_threshold = db.configs["ae_threshold"]
    nms_kernel = db.configs["nms_kernel"]

    scales = db.configs["test_scales"]
    weight_exp = db.configs["weight_exp"]
    merge_bbox = db.configs["merge_bbox"]
    nms_threshold = db.configs["nms_threshold"]
    max_per_image = db.configs["max_per_image"]
    nms_algorithm = {
        "nms": 0,
        "linear_soft_nms": 1,
        "exp_soft_nms": 2
    }[db.configs["nms_algorithm"]]

    seq_length = db.configs["max_query_len"]
    bert_model = db.configs["bert_model"]
    textdim = 768 if bert_model == 'bert-base-uncased' else 1024

    top_bboxes = {}
    best_bboxes = {}
    for ind in tqdm(range(db_inds.size), ncols=80, desc="locating kps"):

        db_ind = db_inds[ind]
        image_file = db.images[db_ind][0]

        image, bert_feature, gt_detections, phrase = db.detections_with_phrase(
            db_ind)

        height, width = image.shape[0:2]

        detections = []
        center_points = []
        tl_hms = []
        br_hms = []
        ct_hms = []

        for scale in scales:
            new_height = int(height * scale)
            new_width = int(width * scale)
            new_center = np.array([new_height // 2, new_width // 2])

            inp_height = new_height | 127
            inp_width = new_width | 127

            images = np.zeros((1, 3, inp_height, inp_width), dtype=np.float32)
            bert_features = np.zeros((1, textdim), dtype=np.float32)
            ratios = np.zeros((1, 2), dtype=np.float32)
            borders = np.zeros((1, 4), dtype=np.float32)
            sizes = np.zeros((1, 2), dtype=np.float32)

            bert_features[0] = bert_feature

            out_height, out_width = (inp_height + 1) // 4, (inp_width + 1) // 4
            height_ratio = out_height / inp_height
            width_ratio = out_width / inp_width

            resized_image = cv2.resize(image, (new_width, new_height))
            resized_image, border, offset = crop_image(resized_image,
                                                       new_center,
                                                       [inp_height, inp_width])

            resized_image = resized_image / 255.
            normalize_(resized_image, db.mean, db.std)

            images[0] = resized_image.transpose((2, 0, 1))
            borders[0] = border
            sizes[0] = [int(height * scale), int(width * scale)]
            ratios[0] = [height_ratio, width_ratio]

            # Flip to perform detection twice
            images = np.concatenate((images, images[:, :, :, ::-1]), axis=0)
            bert_features = np.concatenate((bert_features, bert_features),
                                           axis=0)

            images = torch.from_numpy(images)
            bert_features = torch.from_numpy(bert_features)
            dets, center, heatmaps = decode_func(nnet, [images, bert_features],
                                                 K,
                                                 ae_threshold=ae_threshold,
                                                 kernel=nms_kernel)
            dets = dets.reshape(2, -1, 8)
            center = center.reshape(2, -1, 4)
            dets[1, :, [0, 2]] = out_width - dets[1, :, [2, 0]]
            center[1, :, [0]] = out_width - center[1, :, [0]]
            dets = dets.reshape(1, -1, 8)
            center = center.reshape(1, -1, 4)

            tl_hm, br_hm, ct_hm = heatmaps

            _rescale_dets(dets, ratios, borders, sizes)
            center[..., [0]] /= ratios[:, 1][:, None, None]
            center[..., [1]] /= ratios[:, 0][:, None, None]
            center[..., [0]] -= borders[:, 2][:, None, None]
            center[..., [1]] -= borders[:, 0][:, None, None]
            np.clip(center[..., [0]],
                    0,
                    sizes[:, 1][:, None, None],
                    out=center[..., [0]])
            np.clip(center[..., [1]],
                    0,
                    sizes[:, 0][:, None, None],
                    out=center[..., [1]])
            dets[:, :, 0:4] /= scale
            center[:, :, 0:2] /= scale

            if scale == 1:
                center_points.append(center)
                tl_hms.append(tl_hm)
                br_hms.append(br_hm)
                ct_hms.append(ct_hm)
            detections.append(dets)

        detections = np.concatenate(detections, axis=1)
        center_points = np.concatenate(center_points, axis=1)
        tl_hms = np.concatenate(tl_hms, axis=1)
        br_hms = np.concatenate(br_hms, axis=1)
        ct_hms = np.concatenate(ct_hms, axis=1)

        classes = detections[..., -1]
        classes = classes[0]
        detections = detections[0]
        center_points = center_points[0]
        tl_hms = tl_hms[0]
        br_hms = br_hms[0]
        ct_hms = ct_hms[0]

        valid_ind = detections[:, 4] > -1
        valid_detections = detections[valid_ind]

        box_width = valid_detections[:, 2] - valid_detections[:, 0]
        box_height = valid_detections[:, 3] - valid_detections[:, 1]

        s_ind = (box_width * box_height <= 22500)
        l_ind = (box_width * box_height > 22500)

        s_detections = valid_detections[s_ind]
        l_detections = valid_detections[l_ind]

        s_left_x = (2 * s_detections[:, 0] + s_detections[:, 2]) / 3
        s_right_x = (s_detections[:, 0] + 2 * s_detections[:, 2]) / 3
        s_top_y = (2 * s_detections[:, 1] + s_detections[:, 3]) / 3
        s_bottom_y = (s_detections[:, 1] + 2 * s_detections[:, 3]) / 3

        s_temp_score = copy.copy(s_detections[:, 4])
        s_detections[:, 4] = -1

        center_x = center_points[:, 0][:, np.newaxis]
        center_y = center_points[:, 1][:, np.newaxis]
        s_left_x = s_left_x[np.newaxis, :]
        s_right_x = s_right_x[np.newaxis, :]
        s_top_y = s_top_y[np.newaxis, :]
        s_bottom_y = s_bottom_y[np.newaxis, :]

        ind_lx = (center_x - s_left_x) > 0
        ind_rx = (center_x - s_right_x) < 0
        ind_ty = (center_y - s_top_y) > 0
        ind_by = (center_y - s_bottom_y) < 0
        ind_cls = (center_points[:, 2][:, np.newaxis] -
                   s_detections[:, -1][np.newaxis, :]) == 0
        ind_s_new_score = np.max(((ind_lx + 0) & (ind_rx + 0) & (ind_ty + 0) &
                                  (ind_by + 0) & (ind_cls + 0)),
                                 axis=0) == 1
        index_s_new_score = np.argmax(
            ((ind_lx + 0) & (ind_rx + 0) & (ind_ty + 0) & (ind_by + 0) &
             (ind_cls + 0))[:, ind_s_new_score],
            axis=0)
        s_detections[:, 4][ind_s_new_score] = (
            s_temp_score[ind_s_new_score] * 2 +
            center_points[index_s_new_score, 3]) / 3

        l_left_x = (3 * l_detections[:, 0] + 2 * l_detections[:, 2]) / 5
        l_right_x = (2 * l_detections[:, 0] + 3 * l_detections[:, 2]) / 5
        l_top_y = (3 * l_detections[:, 1] + 2 * l_detections[:, 3]) / 5
        l_bottom_y = (2 * l_detections[:, 1] + 3 * l_detections[:, 3]) / 5

        l_temp_score = copy.copy(l_detections[:, 4])
        l_detections[:, 4] = -1

        center_x = center_points[:, 0][:, np.newaxis]
        center_y = center_points[:, 1][:, np.newaxis]
        l_left_x = l_left_x[np.newaxis, :]
        l_right_x = l_right_x[np.newaxis, :]
        l_top_y = l_top_y[np.newaxis, :]
        l_bottom_y = l_bottom_y[np.newaxis, :]

        ind_lx = (center_x - l_left_x) > 0
        ind_rx = (center_x - l_right_x) < 0
        ind_ty = (center_y - l_top_y) > 0
        ind_by = (center_y - l_bottom_y) < 0
        ind_cls = (center_points[:, 2][:, np.newaxis] -
                   l_detections[:, -1][np.newaxis, :]) == 0
        ind_l_new_score = np.max(((ind_lx + 0) & (ind_rx + 0) & (ind_ty + 0) &
                                  (ind_by + 0) & (ind_cls + 0)),
                                 axis=0) == 1
        index_l_new_score = np.argmax(
            ((ind_lx + 0) & (ind_rx + 0) & (ind_ty + 0) & (ind_by + 0) &
             (ind_cls + 0))[:, ind_l_new_score],
            axis=0)
        l_detections[:, 4][ind_l_new_score] = (
            l_temp_score[ind_l_new_score] * 2 +
            center_points[index_l_new_score, 3]) / 3

        detections = np.concatenate([l_detections, s_detections], axis=0)
        detections = detections[np.argsort(-detections[:, 4])]
        classes = detections[..., -1]

        # reject detections with negative scores
        keep_inds = (detections[:, 4] > -1)
        detections = detections[keep_inds]
        classes = classes[keep_inds]

        top_bboxes[db_ind] = {}

        top_bboxes[db_ind] = detections[:, 0:7].astype(np.float32)
        if merge_bbox:
            soft_nms_merge(top_bboxes[db_ind],
                           Nt=nms_threshold,
                           method=nms_algorithm,
                           weight_exp=weight_exp)
        else:
            soft_nms(top_bboxes[db_ind],
                     Nt=nms_threshold,
                     method=nms_algorithm)
        top_bboxes[db_ind] = top_bboxes[db_ind][:, 0:5]

        scores = top_bboxes[db_ind][:, -1]
        if scores is not None and len(scores) > 0:
            best_bboxes[db_ind] = top_bboxes[db_ind][np.argmax(scores)]
        else:
            best_bboxes[db_ind] = None

        if len(scores) > max_per_image:
            kth = len(scores) - max_per_image
            thresh = np.partition(scores, kth)[kth]
            keep_inds = (top_bboxes[db_ind][:, -1] >= thresh)
            top_bboxes[db_ind] = top_bboxes[db_ind][keep_inds]

        if debug:
            image_file = db.image_file(db_ind)
            image = cv2.imread(image_file)
            im = image[:, :, (2, 1, 0)]
            fig, ax = plt.subplots(figsize=(28, 12))

            ax = plt.subplot(152)
            fig = ax.imshow(im, aspect='equal')
            plt.axis('off')
            fig.axes.get_xaxis().set_visible(False)
            fig.axes.get_yaxis().set_visible(False)

            if best_bboxes[db_ind] is not None:
                bbox = best_bboxes[db_ind].astype(np.int32)
                xmin = bbox[0]
                ymin = bbox[1]
                xmax = bbox[2]
                ymax = bbox[3]
                ax.add_patch(
                    plt.Rectangle((xmin, ymin),
                                  xmax - xmin,
                                  ymax - ymin,
                                  fill=False,
                                  edgecolor='red',
                                  linewidth=5.0))
                ax.text(xmin + 1,
                        ymin - 3,
                        'prediction',
                        bbox=dict(facecolor='red', ec='black', lw=2,
                                  alpha=0.5),
                        fontsize=15,
                        color='white',
                        weight='bold')

            ax = plt.subplot(151)
            fig = ax.imshow(im, aspect='equal')
            plt.axis('off')
            fig.axes.get_xaxis().set_visible(False)
            fig.axes.get_yaxis().set_visible(False)

            bbox = gt_detections[0].astype(np.int32)
            xmin = bbox[0]
            ymin = bbox[1]
            xmax = bbox[2]
            ymax = bbox[3]
            ax.add_patch(
                plt.Rectangle((xmin, ymin),
                              xmax - xmin,
                              ymax - ymin,
                              fill=False,
                              edgecolor='red',
                              linewidth=5.0))
            ax.text(xmin + 1,
                    ymin - 3,
                    phrase,
                    bbox=dict(facecolor='red', ec='black', lw=2, alpha=0.5),
                    fontsize=15,
                    color='white',
                    weight='bold')

            ax = plt.subplot(153)
            ax.imshow(tl_hms[0], cmap='jet')
            plt.axis('off')
            fig.axes.get_xaxis().set_visible(False)
            fig.axes.get_yaxis().set_visible(False)

            ax = plt.subplot(154)
            ax.imshow(br_hms[0], cmap='jet')
            plt.axis('off')
            fig.axes.get_xaxis().set_visible(False)
            fig.axes.get_yaxis().set_visible(False)

            ax = plt.subplot(155)
            ax.imshow(ct_hms[0], cmap='jet')
            plt.axis('off')
            fig.axes.get_xaxis().set_visible(False)
            fig.axes.get_yaxis().set_visible(False)

            # debug_file1 = os.path.join(debug_dir, "{}.pdf".format(db_ind))
            debug_file2 = os.path.join(debug_dir, "{}.jpg".format(db_ind))
            # plt.savefig(debug_file1)
            plt.savefig(debug_file2)
            plt.close()

    result_json = os.path.join(result_dir, "results.json")
    detections = db.convert_to_json(top_bboxes)
    with open(result_json, "w") as f:
        json.dump(detections, f)

    db.evaluate(best_bboxes)
    return 0
示例#19
0
def kp_detection(db, nnet, result_dir, debug=False, decode_func=kp_decode):
    debug_dir = os.path.join(result_dir, "debug")
    NT = 20  # NT:测试图片的数量
    if not os.path.exists(debug_dir):
        os.makedirs(debug_dir)             # 创建目录
    if db.split != "trainval":
        db_inds = db.db_inds[:NT] if debug else db.db_inds         # 如果不是debug模式,则将数据集中的每张图片进行检测
    else:
        db_inds = db.db_inds[:NT] if debug else db.db_inds[:5000]  # debug模式,则只选NT张图片
    num_images = db_inds.size   # 检测图片的个数
    K = db.configs["top_k"]     # 每张图片保留的检测结果
    ae_threshold = db.configs["ae_threshold"]  # IoU大小
    nms_kernel = db.configs["nms_kernel"]
    scales = db.configs["test_scales"]
    weight_exp = db.configs["weight_exp"]
    merge_bbox = db.configs["merge_bbox"]
    categories = db.configs["categories"]
    nms_threshold = db.configs["nms_threshold"]
    max_per_image = db.configs["max_per_image"]
    nms_algorithm = {
        "nms": 0,
        "linear_soft_nms": 1,
        "exp_soft_nms": 2
    }[db.configs["nms_algorithm"]]
    top_bboxes = {}   # 用来记录top-k的检测框
    for ind in tqdm(range(0, num_images), ncols=80, desc="locating kps"):
        # 获取图片
        db_ind = db_inds[ind]
        image_id = db.image_ids(db_ind)
        image_file = db.image_file(db_ind)
        image = cv2.imread(image_file)
        height, width = image.shape[0:2]
        # 记录检测结果以及中心点
        detections = []
        center_points = []
        for scale in scales:
            # 当前尺度下图片的一系列处理
            new_height = int(height * scale)
            new_width = int(width * scale)
            new_center = np.array([new_height // 2, new_width // 2])
            inp_height = new_height | 127  # 防止超边框
            inp_width = new_width | 127    # 防止超边框
            images = np.zeros((1, 3, inp_height, inp_width), dtype=np.float32)
            ratios = np.zeros((1, 2), dtype=np.float32)
            borders = np.zeros((1, 4), dtype=np.float32)
            sizes = np.zeros((1, 2), dtype=np.float32)
            out_height, out_width = (inp_height + 1) // 4, (inp_width + 1) // 4
            height_ratio = out_height / inp_height
            width_ratio = out_width / inp_width
            resized_image = cv2.resize(image, (new_width, new_height))
            resized_image, border, offset = crop_image(resized_image, new_center, [inp_height, inp_width])
            resized_image = resized_image / 255.
            normalize_(resized_image, db.mean, db.std)
            images[0] = resized_image.transpose((2, 0, 1))
            borders[0] = border
            sizes[0] = [int(height * scale), int(width * scale)]
            ratios[0] = [height_ratio, width_ratio]
            images = np.concatenate((images, images[:, :, :, ::-1]), axis=0)
            images = torch.from_numpy(images)
            # 检测函数
            dets, center = decode_func(nnet, images, K, ae_threshold=ae_threshold, kernel=nms_kernel)
            dets = dets.reshape(2, -1, 8)
            center = center.reshape(2, -1, 4)
            dets[1, :, [0, 2]] = out_width - dets[1, :, [2, 0]]
            center[1, :, [0]] = out_width - center[1, :, [0]]
            dets = dets.reshape(1, -1, 8)
            center = center.reshape(1, -1, 4)
            _rescale_dets(dets, ratios, borders, sizes)
            center[..., [0]] /= ratios[:, 1][:, None, None]
            center[..., [1]] /= ratios[:, 0][:, None, None]
            center[..., [0]] -= borders[:, 2][:, None, None]
            center[..., [1]] -= borders[:, 0][:, None, None]
            np.clip(center[..., [0]], 0, sizes[:, 1][:, None, None], out=center[..., [0]])
            np.clip(center[..., [1]], 0, sizes[:, 0][:, None, None], out=center[..., [1]])
            dets[:, :, 0:4] /= scale
            center[:, :, 0:2] /= scale
            if scale == 1:
                center_points.append(center)   # 只记录选图大小的中心点
            detections.append(dets)            # 检测结果
        # 对当前图片的检测结果进行整理
        detections = np.concatenate(detections, axis=1)
        center_points = np.concatenate(center_points, axis=1)
        classes = detections[..., -1]          # 检测类别信息
        classes = classes[0]                   # 类别
        detections = detections[0]
        center_points = center_points[0]
        valid_ind = detections[:, 4] > -1
        valid_detections = detections[valid_ind]
        box_width = valid_detections[:, 2] - valid_detections[:, 0]
        box_height = valid_detections[:, 3] - valid_detections[:, 1]
        s_ind = (box_width * box_height <= 22500)
        l_ind = (box_width * box_height > 22500)
        s_detections = valid_detections[s_ind]
        l_detections = valid_detections[l_ind]
        s_left_x = (2 * s_detections[:, 0] + s_detections[:, 2]) / 3
        s_right_x = (s_detections[:, 0] + 2 * s_detections[:, 2]) / 3
        s_top_y = (2 * s_detections[:, 1] + s_detections[:, 3]) / 3
        s_bottom_y = (s_detections[:, 1] + 2 * s_detections[:, 3]) / 3
        s_temp_score = copy.copy(s_detections[:, 4])     # 每个bbx对应的Score
        s_detections[:, 4] = -1
        center_x = center_points[:, 0][:, np.newaxis]
        center_y = center_points[:, 1][:, np.newaxis]
        s_left_x = s_left_x[np.newaxis, :]
        s_right_x = s_right_x[np.newaxis, :]
        s_top_y = s_top_y[np.newaxis, :]
        s_bottom_y = s_bottom_y[np.newaxis, :]

        ind_lx = (center_x - s_left_x) > 0
        ind_rx = (center_x - s_right_x) < 0
        ind_ty = (center_y - s_top_y) > 0
        ind_by = (center_y - s_bottom_y) < 0
        ind_cls = (center_points[:, 2][:, np.newaxis] - s_detections[:, -1][np.newaxis, :]) == 0
        ind_s_new_score = np.max(((ind_lx + 0) & (ind_rx + 0) & (ind_ty + 0) & (ind_by + 0) & (ind_cls + 0)),
                                 axis=0) == 1
        index_s_new_score = np.argmax(
            ((ind_lx + 0) & (ind_rx + 0) & (ind_ty + 0) & (ind_by + 0) & (ind_cls + 0))[:, ind_s_new_score], axis=0)
        s_detections[:, 4][ind_s_new_score] = (s_temp_score[ind_s_new_score] * 2 + center_points[
            index_s_new_score, 3]) / 3

        l_left_x = (3 * l_detections[:, 0] + 2 * l_detections[:, 2]) / 5
        l_right_x = (2 * l_detections[:, 0] + 3 * l_detections[:, 2]) / 5
        l_top_y = (3 * l_detections[:, 1] + 2 * l_detections[:, 3]) / 5
        l_bottom_y = (2 * l_detections[:, 1] + 3 * l_detections[:, 3]) / 5

        l_temp_score = copy.copy(l_detections[:, 4])
        l_detections[:, 4] = -1

        center_x = center_points[:, 0][:, np.newaxis]
        center_y = center_points[:, 1][:, np.newaxis]
        l_left_x = l_left_x[np.newaxis, :]
        l_right_x = l_right_x[np.newaxis, :]
        l_top_y = l_top_y[np.newaxis, :]
        l_bottom_y = l_bottom_y[np.newaxis, :]

        ind_lx = (center_x - l_left_x) > 0
        ind_rx = (center_x - l_right_x) < 0
        ind_ty = (center_y - l_top_y) > 0
        ind_by = (center_y - l_bottom_y) < 0
        ind_cls = (center_points[:, 2][:, np.newaxis] - l_detections[:, -1][np.newaxis, :]) == 0
        ind_l_new_score = np.max(((ind_lx + 0) & (ind_rx + 0) & (ind_ty + 0) & (ind_by + 0) & (ind_cls + 0)),
                                 axis=0) == 1
        index_l_new_score = np.argmax(
            ((ind_lx + 0) & (ind_rx + 0) & (ind_ty + 0) & (ind_by + 0) & (ind_cls + 0))[:, ind_l_new_score], axis=0)
        l_detections[:, 4][ind_l_new_score] = (l_temp_score[ind_l_new_score] * 2 + center_points[
            index_l_new_score, 3]) / 3

        detections = np.concatenate([l_detections, s_detections], axis=0)
        detections = detections[np.argsort(-detections[:, 4])]
        classes = detections[..., -1]
        keep_inds = (detections[:, 4] > -1)
        detections = detections[keep_inds]
        classes = classes[keep_inds]
        # NMS处理
        top_bboxes[image_id] = {}
        for j in range(categories):
            keep_inds = (classes == j)
            top_bboxes[image_id][j + 1] = detections[keep_inds][:, 0:7].astype(np.float32)
            if merge_bbox:
                soft_nms_merge(top_bboxes[image_id][j + 1], Nt=nms_threshold, method=nms_algorithm,
                               weight_exp=weight_exp)
            else:
                soft_nms(top_bboxes[image_id][j + 1], Nt=nms_threshold, method=nms_algorithm)
            top_bboxes[image_id][j + 1] = top_bboxes[image_id][j + 1][:, 0:5]

        scores = np.hstack([
            top_bboxes[image_id][j][:, -1]
            for j in range(1, categories + 1)
        ])
        if len(scores) > max_per_image:
            kth = len(scores) - max_per_image
            thresh = np.partition(scores, kth)[kth]
            for j in range(1, categories + 1):
                keep_inds = (top_bboxes[image_id][j][:, -1] >= thresh)
                top_bboxes[image_id][j] = top_bboxes[image_id][j][keep_inds]
        # debug模式
        if debug:
            image_file = db.image_file(db_ind)
            _, filename0 = os.path.split(image_file)      # 分离出文件名
            img_name0, _ = os.path.splitext(filename0)    # 去掉后缀的文件
            FileTXT = open(debug_dir + "/" + img_name0 + ".txt", mode="a")   # 文件流,用来记录检测框位置
            image = cv2.imread(image_file)
            im = image[:, :, (2, 1, 0)]
            fig, ax = plt.subplots(figsize=(12, 12))
            fig = ax.imshow(im, aspect='equal')
            plt.axis('off')
            fig.axes.get_xaxis().set_visible(False)
            fig.axes.get_yaxis().set_visible(False)
            # bboxes = {}
            for j in range(1, categories + 1):
                keep_inds = (top_bboxes[image_id][j][:, -1] >= 0.4)
                cat_name = db.class_name(j)
                for bbox in top_bboxes[image_id][j][keep_inds]:
                    bbox = bbox[0:4].astype(np.int32)
                    xmin = bbox[0]
                    ymin = bbox[1]
                    xmax = bbox[2]
                    ymax = bbox[3]
                    FileTXT.write(str(1) + ' ' + str(int(xmin)) + ' ' + str(int(ymin))
                                  + ' ' + str(int(xmax)) + ' ' + str(int(ymax)) + ' ' + str(1))
                    FileTXT.write('\n')   # bbx位置大小信息
                    # 画框
                    ax.add_patch(plt.Rectangle((xmin, ymin), xmax - xmin, ymax - ymin, fill=False, edgecolor=colours[j - 1],
                                      linewidth=4.0))
                    ax.text(xmin + 1, ymin - 3, '{:s}'.format(cat_name),
                            bbox=dict(facecolor=colours[j - 1], ec='black', lw=2, alpha=0.5),
                            fontsize=15, color='white', weight='bold')

            # debug_file1 = os.path.join(debug_dir, "{}.pdf".format(db_ind))   # 用来生成pdf图片
            debug_file2 = os.path.join(debug_dir, "{}.jpg".format(img_name0))  # jpg格式
            # plt.savefig(debug_file1)
            plt.savefig(debug_file2)  # 保存图片
            plt.close()
    result_json = os.path.join(result_dir, "results.json")   # 用json脚本存储检测结果
    detections = db.convert_to_coco(top_bboxes)
    with open(result_json, "w") as f:
        json.dump(detections, f)
    cls_ids = list(range(1, categories + 1))
    image_ids = [db.image_ids(ind) for ind in db_inds]
    db.evaluate(result_json, cls_ids, image_ids)   # 验证
    return 0
示例#20
0
        classes = detections[..., -1]
        classes = classes[0]
        detections = detections[0]

        # reject detections with negative scores
        keep_inds = (detections[:, 4] > 0)
        detections = detections[keep_inds]
        classes = classes[keep_inds]

        top_bboxes[image_id] = {}
        for j in range(categories):
            keep_inds = (classes == j)
            top_bboxes[image_id][j + 1] = \
                detections[keep_inds].astype(np.float32)
            soft_nms(top_bboxes[image_id][j + 1],
                     Nt=nms_threshold,
                     method=nms_algorithm)

        scores = np.hstack(
            [top_bboxes[image_id][j][:, 4] for j in range(1, categories + 1)])
        if len(scores) > max_per_image:
            kth = len(scores) - max_per_image
            thresh = np.partition(scores, kth)[kth]
            for j in range(1, categories + 1):
                keep_inds = (top_bboxes[image_id][j][:, 4] >= thresh)
                top_bboxes[image_id][j] = top_bboxes[image_id][j][keep_inds]

        if suppres_ghost:
            for j in range(1, categories + 1):
                n = len(top_bboxes[image_id][j])
                for k in range(n):
示例#21
0
    def kp_detection(self, image, db, result_dir, debug=False):
        K = db.configs["top_k"]
        ae_threshold = db.configs["ae_threshold"]
        nms_kernel = db.configs["nms_kernel"]
        weight_exp = db.configs["weight_exp"]
        merge_bbox = db.configs["merge_bbox"]
        categories = db.configs["categories"]
        nms_threshold = db.configs["nms_threshold"]
        max_per_image = db.configs["max_per_image"]
        nms_algorithm = {
            "nms": 0,
            "linear_soft_nms": 1,
            "exp_soft_nms": 2
        }[db.configs["nms_algorithm"]]
        top_bboxes = {}
        if True:
            #db_ind = db_inds[ind]
            image_id = 0
            height, width = image.shape[0:2]

            detections = []
            center_points = []

            if True:
                scale = 1
                new_height = int(height * scale)
                new_width = int(width * scale)
                new_center = np.array([new_height // 2, new_width // 2])

                inp_height = new_height | 127
                inp_width = new_width | 127

                images = np.zeros((1, 3, inp_height, inp_width),
                                  dtype=np.float32)
                ratios = np.zeros((1, 2), dtype=np.float32)
                borders = np.zeros((1, 4), dtype=np.float32)
                sizes = np.zeros((1, 2), dtype=np.float32)

                out_height, out_width = (inp_height + 1) // 4, (inp_width +
                                                                1) // 4
                height_ratio = out_height / inp_height
                width_ratio = out_width / inp_width

                resized_image = cv2.resize(image, (new_width, new_height))
                resized_image, border, offset = crop_image(
                    resized_image, new_center, [inp_height, inp_width])

                resized_image = resized_image / 255.
                normalize_(resized_image, db.mean, db.std)

                images[0] = resized_image.transpose((2, 0, 1))
                borders[0] = border
                sizes[0] = [int(height * scale), int(width * scale)]
                ratios[0] = [height_ratio, width_ratio]

                images = np.concatenate((images, images[:, :, :, ::-1]),
                                        axis=0)
                images = torch.from_numpy(images)
                dets, center = self.kp_decode(images,
                                              K,
                                              ae_threshold=ae_threshold,
                                              kernel=nms_kernel)
                dets = dets.reshape(2, -1, 8)
                center = center.reshape(2, -1, 4)
                dets[1, :, [0, 2]] = out_width - dets[1, :, [2, 0]]
                center[1, :, [0]] = out_width - center[1, :, [0]]
                dets = dets.reshape(1, -1, 8)
                center = center.reshape(1, -1, 4)

                self._rescale_dets(dets, ratios, borders, sizes)
                center[..., [0]] /= ratios[:, 1][:, None, None]
                center[..., [1]] /= ratios[:, 0][:, None, None]
                center[..., [0]] -= borders[:, 2][:, None, None]
                center[..., [1]] -= borders[:, 0][:, None, None]
                np.clip(center[..., [0]],
                        0,
                        sizes[:, 1][:, None, None],
                        out=center[..., [0]])
                np.clip(center[..., [1]],
                        0,
                        sizes[:, 0][:, None, None],
                        out=center[..., [1]])
                dets[:, :, 0:4] /= scale
                center[:, :, 0:2] /= scale

                if scale == 1:
                    center_points.append(center)
                detections.append(dets)

            detections = np.concatenate(detections, axis=1)
            center_points = np.concatenate(center_points, axis=1)

            classes = detections[..., -1]
            classes = classes[0]
            detections = detections[0]
            center_points = center_points[0]

            valid_ind = detections[:, 4] > -1
            valid_detections = detections[valid_ind]

            box_width = valid_detections[:, 2] - valid_detections[:, 0]
            box_height = valid_detections[:, 3] - valid_detections[:, 1]

            s_ind = (box_width * box_height <= 22500)
            l_ind = (box_width * box_height > 22500)

            s_detections = valid_detections[s_ind]
            l_detections = valid_detections[l_ind]

            s_left_x = (2 * s_detections[:, 0] + s_detections[:, 2]) / 3
            s_right_x = (s_detections[:, 0] + 2 * s_detections[:, 2]) / 3
            s_top_y = (2 * s_detections[:, 1] + s_detections[:, 3]) / 3
            s_bottom_y = (s_detections[:, 1] + 2 * s_detections[:, 3]) / 3

            s_temp_score = copy.copy(s_detections[:, 4])
            s_detections[:, 4] = -1

            center_x = center_points[:, 0][:, np.newaxis]
            center_y = center_points[:, 1][:, np.newaxis]
            s_left_x = s_left_x[np.newaxis, :]
            s_right_x = s_right_x[np.newaxis, :]
            s_top_y = s_top_y[np.newaxis, :]
            s_bottom_y = s_bottom_y[np.newaxis, :]

            ind_lx = (center_x - s_left_x) > 0
            ind_rx = (center_x - s_right_x) < 0
            ind_ty = (center_y - s_top_y) > 0
            ind_by = (center_y - s_bottom_y) < 0
            ind_cls = (center_points[:, 2][:, np.newaxis] -
                       s_detections[:, -1][np.newaxis, :]) == 0
            ind_s_new_score = np.max(
                ((ind_lx + 0) & (ind_rx + 0) & (ind_ty + 0) & (ind_by + 0) &
                 (ind_cls + 0)),
                axis=0) == 1
            index_s_new_score = np.argmax(
                ((ind_lx + 0) & (ind_rx + 0) & (ind_ty + 0) & (ind_by + 0) &
                 (ind_cls + 0))[:, ind_s_new_score],
                axis=0)
            s_detections[:, 4][ind_s_new_score] = (
                s_temp_score[ind_s_new_score] * 2 +
                center_points[index_s_new_score, 3]) / 3

            l_left_x = (3 * l_detections[:, 0] + 2 * l_detections[:, 2]) / 5
            l_right_x = (2 * l_detections[:, 0] + 3 * l_detections[:, 2]) / 5
            l_top_y = (3 * l_detections[:, 1] + 2 * l_detections[:, 3]) / 5
            l_bottom_y = (2 * l_detections[:, 1] + 3 * l_detections[:, 3]) / 5

            l_temp_score = copy.copy(l_detections[:, 4])
            l_detections[:, 4] = -1

            center_x = center_points[:, 0][:, np.newaxis]
            center_y = center_points[:, 1][:, np.newaxis]
            l_left_x = l_left_x[np.newaxis, :]
            l_right_x = l_right_x[np.newaxis, :]
            l_top_y = l_top_y[np.newaxis, :]
            l_bottom_y = l_bottom_y[np.newaxis, :]

            ind_lx = (center_x - l_left_x) > 0
            ind_rx = (center_x - l_right_x) < 0
            ind_ty = (center_y - l_top_y) > 0
            ind_by = (center_y - l_bottom_y) < 0
            ind_cls = (center_points[:, 2][:, np.newaxis] -
                       l_detections[:, -1][np.newaxis, :]) == 0
            ind_l_new_score = np.max(
                ((ind_lx + 0) & (ind_rx + 0) & (ind_ty + 0) & (ind_by + 0) &
                 (ind_cls + 0)),
                axis=0) == 1
            index_l_new_score = np.argmax(
                ((ind_lx + 0) & (ind_rx + 0) & (ind_ty + 0) & (ind_by + 0) &
                 (ind_cls + 0))[:, ind_l_new_score],
                axis=0)
            l_detections[:, 4][ind_l_new_score] = (
                l_temp_score[ind_l_new_score] * 2 +
                center_points[index_l_new_score, 3]) / 3

            detections = np.concatenate([l_detections, s_detections], axis=0)
            detections = detections[np.argsort(-detections[:, 4])]
            classes = detections[..., -1]

            keep_inds = (detections[:, 4] > -1)
            detections = detections[keep_inds]
            classes = classes[keep_inds]

            top_bboxes[image_id] = {}
            for j in range(categories):
                keep_inds = (classes == j)
                top_bboxes[image_id][j +
                                     1] = detections[keep_inds][:, 0:7].astype(
                                         np.float32)
                if merge_bbox:
                    soft_nms_merge(top_bboxes[image_id][j + 1],
                                   Nt=nms_threshold,
                                   method=nms_algorithm,
                                   weight_exp=weight_exp)
                else:
                    soft_nms(top_bboxes[image_id][j + 1],
                             Nt=nms_threshold,
                             method=nms_algorithm)
                top_bboxes[image_id][j + 1] = top_bboxes[image_id][j + 1][:,
                                                                          0:5]

            scores = np.hstack([
                top_bboxes[image_id][j][:, -1]
                for j in range(1, categories + 1)
            ])
            if len(scores) > max_per_image:
                kth = len(scores) - max_per_image
                thresh = np.partition(scores, kth)[kth]
                for j in range(1, categories + 1):
                    keep_inds = (top_bboxes[image_id][j][:, -1] >= thresh)
                    top_bboxes[image_id][j] = top_bboxes[image_id][j][
                        keep_inds]

            return top_bboxes[image_id]

        return 0
示例#22
0
def kp_detection(db, nnet, result_dir, debug=False, decode_func=kp_decode):
    result_json = os.path.join(result_dir, "results.json")
    debug_dir = os.path.join(result_dir, "debug")
    if not os.path.exists(debug_dir):
        os.makedirs(debug_dir)
    if db.split != "trainval":
        db_inds = db.db_inds[:100] if debug else db.db_inds
    else:
        db_inds = db.db_inds[:100] if debug else db.db_inds[:5000]
    num_images = db_inds.size

    K = db.configs["top_k"]
    ae_threshold = db.configs["ae_threshold"]
    nms_kernel = db.configs["nms_kernel"]

    scales = db.configs["test_scales"]
    weight_exp = db.configs["weight_exp"]
    merge_bbox = db.configs["merge_bbox"]
    categories = db.configs["categories"]
    nms_threshold = db.configs["nms_threshold"]
    max_per_image = db.configs["max_per_image"]
    nms_algorithm = {
        "nms": 0,
        "linear_soft_nms": 1,
        "exp_soft_nms": 2
    }[db.configs["nms_algorithm"]]
    if True:

        top_bboxes = {}
        for ind in tqdm(range(0, num_images), ncols=80, desc="locating kps"):
            db_ind = db_inds[ind]
            image_id = db.image_ids(db_ind)
            image_file = db.image_file(db_ind)
            image = cv2.imread(image_file)
            image_xy = np.zeros((image.shape[0], image.shape[1], 2),
                                dtype=np.float32)
            x_mark = np.arange(image.shape[1],
                               dtype=np.float32) / image.shape[1]
            for i in range(image.shape[0]):
                image_xy[i, :, 0] = x_mark
            y_mark = np.arange(image.shape[0],
                               dtype=np.float32) / image.shape[0]
            for i in range(image.shape[1]):
                image_xy[:, i, 1] = y_mark
            height, width = image.shape[0:2]

            detections = []

            for scale in scales:
                new_height = int(height * scale)
                new_width = int(width * scale)
                new_center = np.array([new_height // 2, new_width // 2])

                inp_height = new_height | 127
                inp_width = new_width | 127
                images = np.zeros((1, 5, inp_height, inp_width),
                                  dtype=np.float32)
                ratios = np.zeros((1, 2), dtype=np.float32)
                borders = np.zeros((1, 4), dtype=np.float32)
                sizes = np.zeros((1, 2), dtype=np.float32)

                out_height, out_width = (inp_height + 1) // 4, (inp_width +
                                                                1) // 4
                height_ratio = out_height / inp_height
                width_ratio = out_width / inp_width
                resized_image = cv2.resize(image, (new_width, new_height))
                resized_image_xy = cv2.resize(image_xy,
                                              (new_width, new_height))
                resized_image, border, offset = crop_image(
                    resized_image, new_center, [inp_height, inp_width])
                resized_image_xy, border, offset = crop_image(
                    resized_image_xy, new_center, [inp_height, inp_width])
                resized_image = resized_image / 255.
                normalize_(resized_image, db.mean, db.std)

                images[0, 0:3] = resized_image.transpose((2, 0, 1))
                images[0, 3:5] = resized_image_xy.transpose((2, 0, 1))
                borders[0] = border
                sizes[0] = [int(height * scale), int(width * scale)]
                ratios[0] = [height_ratio, width_ratio]

                images = torch.from_numpy(images)
                dets, dets_tl, dets_br, flag = decode_func(
                    nnet,
                    images,
                    K,
                    ae_threshold=ae_threshold,
                    kernel=nms_kernel)
                if not flag:
                    print("error when try to test %s" % image_file)
                    continue
                dets = dets.reshape(1, -1, 8)

                _rescale_dets(dets, ratios, borders, sizes)
                dets[:, :, 0:4] /= scale
                detections.append(dets)
            if len(detections) == 0:
                continue
            detections = np.concatenate(detections, axis=1)

            classes = detections[..., -1]
            classes = classes[0]
            detections = detections[0]

            # reject detections with negative scores
            keep_inds = (detections[:, 4] > -1)
            detections = detections[keep_inds]
            classes = classes[keep_inds]

            top_bboxes[image_id] = {}
            for j in range(categories):
                keep_inds = (classes == j)
                top_bboxes[image_id][j +
                                     1] = detections[keep_inds][:, 0:7].astype(
                                         np.float32)
                if merge_bbox:
                    nms.soft_nms_merge(top_bboxes[image_id][j + 1],
                                       Nt=nms_threshold,
                                       method=nms_algorithm,
                                       weight_exp=weight_exp)
                else:
                    nms.soft_nms(top_bboxes[image_id][j + 1],
                                 Nt=nms_threshold,
                                 method=nms_algorithm)
                top_bboxes[image_id][j + 1] = top_bboxes[image_id][j + 1][:,
                                                                          0:5]

            scores = np.hstack([
                top_bboxes[image_id][j][:, -1]
                for j in range(1, categories + 1)
            ])
            if len(scores) > max_per_image:
                kth = len(scores) - max_per_image
                thresh = np.partition(scores, kth)[kth]
                for j in range(1, categories + 1):
                    keep_inds = (top_bboxes[image_id][j][:, -1] >= thresh)
                    top_bboxes[image_id][j] = top_bboxes[image_id][j][
                        keep_inds]

            if debug:
                image_file = db.image_file(db_ind)
                image = cv2.imread(image_file)

                bboxes = {}
                for j in range(1, categories + 1):
                    keep_inds = (top_bboxes[image_id][j][:, -1] > 0.5)
                    cat_name = db.class_name(j)
                    cat_size = cv2.getTextSize(cat_name,
                                               cv2.FONT_HERSHEY_SIMPLEX, 0.5,
                                               2)[0]
                    color = np.random.random((3, )) * 0.6 + 0.4
                    color = color * 255
                    color = color.astype(np.int32).tolist()
                    for bbox in top_bboxes[image_id][j][keep_inds]:
                        bbox = bbox[0:4].astype(np.int32)
                        if bbox[1] - cat_size[1] - 2 < 0:
                            cv2.rectangle(image, (bbox[0], bbox[1] + 2),
                                          (bbox[0] + cat_size[0],
                                           bbox[1] + cat_size[1] + 2), color,
                                          -1)
                            cv2.putText(image,
                                        cat_name,
                                        (bbox[0], bbox[1] + cat_size[1] + 2),
                                        cv2.FONT_HERSHEY_SIMPLEX,
                                        0.5, (0, 0, 0),
                                        thickness=1)
                        else:
                            cv2.rectangle(image,
                                          (bbox[0], bbox[1] - cat_size[1] - 2),
                                          (bbox[0] + cat_size[0], bbox[1] - 2),
                                          color, -1)
                            cv2.putText(image,
                                        cat_name, (bbox[0], bbox[1] - 2),
                                        cv2.FONT_HERSHEY_SIMPLEX,
                                        0.5, (0, 0, 0),
                                        thickness=1)
                        cv2.rectangle(image, (bbox[0], bbox[1]),
                                      (bbox[2], bbox[3]), color, 2)
                debug_file = os.path.join(debug_dir, "{}.jpg".format(db_ind))

        detections = db.convert_to_coco(top_bboxes)
        with open(result_json, "w") as f:
            json.dump(detections, f)

    image_ids = [db.image_ids(ind) for ind in db_inds]
    with open(result_json, "r") as f:
        result_json = json.load(f)
    for cls_type in range(1, categories + 1):
        db.evaluate(result_json, [cls_type], image_ids)
    return 0
示例#23
0
def kp_detection(net, img):
    """
    get detection
    Args:
        net:
        img_file:

    Returns: a dict {img_file: {cls1: }}

    """
    K = 100

    width_scale = img.shape[1] / input_size[1]
    height_scale = img.shape[0] / input_size[0]

    # >> resize
    img = cv2.resize(img, input_size)
    height, width = img.shape[0:2]
    top_bboxes = {}

    detections = []

    for scale in scales:
        new_height = int(height * scale)
        new_width = int(width * scale)
        new_center = np.array([new_height // 2, new_width // 2])

        inp_height = new_height | 127
        inp_width = new_width | 127

        images = np.zeros((1, 3, inp_height, inp_width), dtype=np.float32)
        ratios = np.zeros((1, 2), dtype=np.float32)
        borders = np.zeros((1, 4), dtype=np.float32)
        sizes = np.zeros((1, 2), dtype=np.float32)

        out_height, out_width = (inp_height + 1) // 4, (inp_width + 1) // 4
        height_ratio = out_height / inp_height
        width_ratio = out_width / inp_width

        resized_image = cv2.resize(img, (new_width, new_height))
        resized_image, border, offset = crop_image(resized_image, new_center, [inp_height, inp_width])

        resized_image = resized_image / 255.
        images[0] = resized_image.transpose((2, 0, 1))
        borders[0] = border
        sizes[0] = [int(height * scale), int(width * scale)]
        ratios[0] = [height_ratio, width_ratio]

        images = np.concatenate((images, images[:, :, :, ::-1]), axis=0)
        images = torch.from_numpy(images)
        images = images.cuda()
        dets = kp_decode(net, images, K)
        dets = dets.reshape(2, -1, 8)
        dets[1, :, [0, 2]] = out_width - dets[1, :, [2, 0]]
        dets = dets.reshape(1, -1, 8)

        _rescale_dets(dets, ratios, borders, sizes)
        dets[:, :, 0:4] /= scale
        detections.append(dets)

    detections = np.concatenate(detections, axis=1)

    classes = detections[..., -1]
    classes = classes[0]
    detections = detections[0]

    # reject detections with negative scores
    keep_inds = (detections[:, 4] > -1)
    detections = detections[keep_inds]
    classes = classes[keep_inds]

    for j in range(categories):
        keep_inds = (classes == j)
        top_bboxes[j + 1] = detections[keep_inds][:, 0:7].astype(np.float32)
        soft_nms(top_bboxes[j + 1], Nt=nms_threshold, method=nms_algorithm)
        top_bboxes[j + 1] = top_bboxes[j + 1][:, 0:5]
        top_bboxes[j + 1][:, 0:4:2] *= width_scale
        top_bboxes[j + 1][:, 1:4:2] *= height_scale

        top_bboxes[j + 1] = top_bboxes[j + 1][top_bboxes[j + 1][:, -1] > 0.5]

    return top_bboxes