示例#1
0
    def evaluate(self, cfg, preds, output_dir, all_boxes, img_path, *args,
                 **kwargs):
        rank = cfg.RANK

        res_folder = os.path.join(output_dir, 'results')
        if not os.path.exists(res_folder):
            try:
                os.makedirs(res_folder)
            except Exception:
                logger.error('Fail to make {}'.format(res_folder))

        res_file = os.path.join(
            res_folder,
            'keypoints_{}_results_{}.json'.format(self.image_set, rank))

        # person x (keypoints)
        _kpts = []
        for idx, kpt in enumerate(preds):
            _kpts.append({
                'keypoints': kpt,
                'center': all_boxes[idx][0:2],
                'scale': all_boxes[idx][2:4],
                'area': all_boxes[idx][4],
                'score': all_boxes[idx][5],
                'image': int(img_path[idx][-16:-4])
            })
        # image x person x (keypoints)
        kpts = defaultdict(list)
        for kpt in _kpts:
            kpts[kpt['image']].append(kpt)

        # rescoring and oks nms
        num_joints = self.num_joints
        in_vis_thre = self.in_vis_thre
        oks_thre = self.oks_thre
        oks_nmsed_kpts = []
        for img in kpts.keys():
            img_kpts = kpts[img]
            for n_p in img_kpts:
                box_score = n_p['score']
                kpt_score = 0
                valid_num = 0
                for n_jt in range(0, num_joints):
                    t_s = n_p['keypoints'][n_jt][2]
                    if t_s > in_vis_thre:
                        kpt_score = kpt_score + t_s
                        valid_num = valid_num + 1
                if valid_num != 0:
                    kpt_score = kpt_score / valid_num
                # rescoring
                n_p['score'] = kpt_score * box_score

            if self.soft_nms:
                keep = soft_oks_nms(
                    [img_kpts[i] for i in range(len(img_kpts))], oks_thre)
            else:
                keep = oks_nms([img_kpts[i] for i in range(len(img_kpts))],
                               oks_thre)

            if len(keep) == 0:
                oks_nmsed_kpts.append(img_kpts)
            else:
                oks_nmsed_kpts.append([img_kpts[_keep] for _keep in keep])

        self._write_coco_keypoint_results(oks_nmsed_kpts, res_file)
        if 'test' not in self.image_set:
            info_str = self._do_python_keypoint_eval(res_file, res_folder)
            name_value = OrderedDict(info_str)
            return name_value, name_value['AP']
        else:
            return {'Null': 0}, 0
示例#2
0
def test_net(tester, dets, det_range, gpu_id, sigmas, vis_kps):

    dump_results = []

    start_time = time.time()

    img_start = det_range[0]
    img_id = 0
    img_id2 = 0
    pbar = tqdm(total=det_range[1] - img_start - 1, position=gpu_id)
    pbar.set_description("GPU %s" % str(gpu_id))
    while img_start < det_range[1]:
        img_end = img_start + 1
        im_info = dets[img_start]
        while img_end < det_range[1] and dets[img_end]['image_id'] == im_info[
                'image_id']:
            img_end += 1

        # all human detection results of a certain image
        cropped_data = dets[img_start:img_end]

        pbar.update(img_end - img_start)
        img_start = img_end

        kps_result = np.zeros((len(cropped_data), cfg.num_kps, 3))
        area_save = np.zeros(len(cropped_data))

        # cluster human detection results with test_batch_size
        for batch_id in range(0, len(cropped_data), cfg.test_batch_size):
            start_id = batch_id
            end_id = min(len(cropped_data), batch_id + cfg.test_batch_size)

            imgs = []
            crop_infos = []
            for i in range(start_id, end_id):
                img, crop_info = generate_batch(cropped_data[i], stage='test')
                imgs.append(img)
                crop_infos.append(crop_info)
            imgs = np.array(imgs)
            crop_infos = np.array(crop_infos)

            # forward
            heatmap = tester.predict_one([imgs])[0]

            if cfg.flip_test:
                flip_imgs = imgs[:, :, ::-1, :]
                flip_heatmap = tester.predict_one([flip_imgs])[0]

                flip_heatmap = flip_heatmap[:, :, ::-1, :]
                for (q, w) in cfg.kps_symmetry:
                    flip_heatmap_w, flip_heatmap_q = flip_heatmap[:, :, :, w].copy(
                    ), flip_heatmap[:, :, :, q].copy()
                    flip_heatmap[:, :, :,
                                 q], flip_heatmap[:, :, :,
                                                  w] = flip_heatmap_w, flip_heatmap_q
                flip_heatmap[:, :, 1:, :] = flip_heatmap.copy()[:, :, 0:-1, :]
                heatmap += flip_heatmap
                heatmap /= 2

            # for each human detection from clustered batch
            for image_id in range(start_id, end_id):

                for j in range(cfg.num_kps):
                    hm_j = heatmap[image_id - start_id, :, :, j]
                    idx = hm_j.argmax()
                    y, x = np.unravel_index(idx, hm_j.shape)

                    px = int(math.floor(x + 0.5))
                    py = int(math.floor(y + 0.5))
                    if 1 < px < cfg.output_shape[
                            1] - 1 and 1 < py < cfg.output_shape[0] - 1:
                        diff = np.array([
                            hm_j[py][px + 1] - hm_j[py][px - 1],
                            hm_j[py + 1][px] - hm_j[py - 1][px]
                        ])
                        diff = np.sign(diff)
                        x += diff[0] * .25
                        y += diff[1] * .25
                    kps_result[image_id, j, :2] = (x * cfg.input_shape[1] /
                                                   cfg.output_shape[1],
                                                   y * cfg.input_shape[0] /
                                                   cfg.output_shape[0])
                    kps_result[image_id, j, 2] = hm_j.max() / 255

                vis = False
                crop_info = crop_infos[image_id - start_id, :]
                area = (crop_info[2] - crop_info[0]) * (crop_info[3] -
                                                        crop_info[1])
                if vis and np.any(kps_result[image_id, :,
                                             2]) > 0.9 and area > 96**2:
                    tmpimg = imgs[image_id - start_id].copy()
                    tmpimg = cfg.denormalize_input(tmpimg)
                    tmpimg = tmpimg.astype('uint8')
                    tmpkps = np.zeros((3, cfg.num_kps))
                    tmpkps[:2, :] = kps_result[image_id, :, :2].transpose(1, 0)
                    tmpkps[2, :] = kps_result[image_id, :, 2]
                    _tmpimg = tmpimg.copy()
                    _tmpimg = cfg.vis_keypoints(_tmpimg, tmpkps)
                    cv2.imwrite(
                        osp.join(cfg.vis_dir,
                                 str(img_id) + '_output.jpg'), _tmpimg)
                    img_id += 1

                # map back to original images
                for j in range(cfg.num_kps):
                    kps_result[image_id, j, 0] = kps_result[image_id, j, 0] / cfg.input_shape[1] * (\
                    crop_infos[image_id - start_id][2] - crop_infos[image_id - start_id][0]) + crop_infos[image_id - start_id][0]
                    kps_result[image_id, j, 1] = kps_result[image_id, j, 1] / cfg.input_shape[0] * (\
                    crop_infos[image_id - start_id][3] - crop_infos[image_id - start_id][1]) + crop_infos[image_id - start_id][1]

                area_save[image_id] = (crop_infos[image_id - start_id][2] -
                                       crop_infos[image_id - start_id][0]) * (
                                           crop_infos[image_id - start_id][3] -
                                           crop_infos[image_id - start_id][1])

        #vis
        if vis_kps and np.any(kps_result[:, :, 2] > 0.8):
            tmpimg = cv2.imread(
                os.path.join(cfg.img_path, cropped_data[0]['imgpath']))
            tmpimg = tmpimg.astype('uint8')
            for i in range(len(kps_result)):
                tmpkps = np.zeros((3, cfg.num_kps))
                tmpkps[:2, :] = kps_result[i, :, :2].transpose(1, 0)
                tmpkps[2, :] = kps_result[i, :, 2]
                tmpimg = cfg.vis_keypoints(tmpimg, tmpkps)
            cv2.imwrite(osp.join(cfg.vis_dir, str(img_id2) + '.jpg'), tmpimg)
            img_id2 += 1

        score_result = np.copy(kps_result[:, :, 2])
        kps_result[:, :, 2] = 1
        kps_result = kps_result.reshape(-1, cfg.num_kps * 3)

        # rescoring and oks nms
        if cfg.dataset == 'COCO' or cfg.dataset == 'JTA':
            rescored_score = np.zeros((len(score_result)))
            for i in range(len(score_result)):
                score_mask = score_result[i] > cfg.score_thr
                if np.sum(score_mask) > 0:
                    rescored_score[i] = np.mean(
                        score_result[i][score_mask]) * cropped_data[i]['score']
            score_result = rescored_score
            keep = oks_nms(kps_result, score_result, area_save,
                           cfg.oks_nms_thr, sigmas)
            if len(keep) > 0:
                kps_result = kps_result[keep, :]
                score_result = score_result[keep]
                area_save = area_save[keep]
        elif cfg.dataset == 'PoseTrack':
            keep = oks_nms(kps_result, np.mean(score_result, axis=1),
                           area_save, cfg.oks_nms_thr)
            if len(keep) > 0:
                kps_result = kps_result[keep, :]
                score_result = score_result[keep, :]
                area_save = area_save[keep]

        # save result
        for i in range(len(kps_result)):
            if cfg.dataset == 'COCO' or cfg.dataset == 'JTA':
                result = dict(image_id=im_info['image_id'],
                              category_id=1,
                              keypoints=kps_result[i].round(3).tolist(),
                              score=float(round(score_result[i], 4)))
            elif cfg.dataset == 'PoseTrack':
                result = dict(image_id=im_info['image_id'],
                              category_id=1,
                              track_id=0,
                              scores=score_result[i].round(4).tolist(),
                              keypoints=kps_result[i].round(3).tolist())
            elif cfg.dataset == 'MPII':
                result = dict(image_id=im_info['image_id'],
                              scores=score_result[i].round(4).tolist(),
                              keypoints=kps_result[i].round(3).tolist())
            # elif cfg.dataset == 'JTA':
            #     result = dict(image_id=im_info['image_id'],category_id=1, scores=score_result[i].round(4).tolist(),
            #                   keypoints=kps_result[i].round(3).tolist())
            dump_results.append(result)
    pbar.close()
    del pbar

    return dump_results
示例#3
0
    def rescore_and_save_result(self, output_file, preds, all_boxes, img_path,
                                orig_boxes):
        assert output_file.endswith('.json') or output_file.endswith(
            '.npy'), "Only json and numpy output is supported"

        ensuredir(os.path.dirname(output_file))

        # person x (keypoints)
        _kpts = []
        for idx, kpt in enumerate(preds):
            _kpts.append({
                'keypoints': kpt,
                'center': all_boxes[idx][0:2],
                'scale': all_boxes[idx][2:4],
                'area': all_boxes[idx][4],
                'score': all_boxes[idx][5],
                'image': img_path[idx],
                'origbox': orig_boxes[idx]
            })

        # image x person x (keypoints)
        kpts = defaultdict(list)
        for kpt in _kpts:
            kpts[kpt['image']].append(kpt)

        # rescoring and oks nms
        num_joints = self.num_joints
        in_vis_thre = self.in_vis_thre
        oks_thre = self.oks_thre
        oks_nmsed_kpts = []
        nmsed_kpts_by_frame = defaultdict(list)
        for img in kpts.keys():
            img_kpts = kpts[img]
            for n_p in img_kpts:
                box_score = n_p['score']
                kpt_score = 0
                valid_num = 0
                for n_jt in range(0, num_joints):
                    t_s = n_p['keypoints'][n_jt][2]
                    if t_s > in_vis_thre:
                        kpt_score = kpt_score + t_s
                        valid_num = valid_num + 1
                if valid_num != 0:
                    kpt_score = kpt_score / valid_num
                # rescoring
                n_p['score'] = kpt_score * box_score

            if self.soft_nms:
                keep = soft_oks_nms(
                    [img_kpts[i] for i in range(len(img_kpts))], oks_thre)
            else:
                keep = oks_nms([img_kpts[i] for i in range(len(img_kpts))],
                               oks_thre)

            if len(keep) == 0:
                selected_kpts = img_kpts
            else:
                selected_kpts = [img_kpts[_keep] for _keep in keep]

            oks_nmsed_kpts.append(selected_kpts)
            nmsed_kpts_by_frame[img] = selected_kpts

        self._write_keypoint_results(nmsed_kpts_by_frame, output_file)
示例#4
0
    def evaluate(self, cfg, preds, output_dir, all_boxes, image_path,
                 file_names, *args):
        res_folder = os.path.join(output_dir, 'results')
        if not os.path.exists(res_folder):
            os.makedirs(res_folder)
        res_file = os.path.join(res_folder,
                                'keypoints_%s_results.json' % self.image_set)

        # person x (keypoints)
        _kpts = []

        for idx, kpt in enumerate(preds):

            #image_id=file_names[idx]
            #print((img_path[idx]))
            '''
            example=>(img_path[idx])data/posetrack/images/valid/000522_mpii_test/000080.jpg
            '''

            image_id = image_path[idx][-8:-4]
            folder_name = image_path[idx].split('/')[-2][0:6]

            if (self.image_set == 'valid' or 'val'):
                prefix = '1'
            else:
                prefix = '0'

            image_id = int(prefix + folder_name + image_id)
            _kpts.append({
                'keypoints': kpt,
                'center': all_boxes[idx][0:2],
                'scale': all_boxes[idx][2:4],
                'area': all_boxes[idx][4],
                'score': all_boxes[idx][5],
                #'image': (img_path[idx][-10:-4]),
                #'image': img_path[idx],
                #'image_id'  :image_path[idx]
                'image_id': image_id
                #int(img_path[idx][-10:-4])#
            })
        # image x person x (keypoints)
        kpts = defaultdict(list)
        for kpt in _kpts:
            kpts[kpt['image_id']].append(kpt)

        #logger.info('=>kpts--{}'.format(kpts))
        # rescoring and oks nms
        logger.info('=>len--kpts.keys()--{}'.format(len(kpts.keys())))

        num_joints = self.num_joints
        in_vis_thre = self.in_vis_thre
        oks_thre = self.oks_thre
        oks_nmsed_kpts = []

        for img in kpts.keys():

            img_kpts = kpts[img]
            for n_p in img_kpts:
                #logger.info('n_p--{}'.format(n_p))

                box_score = n_p['score']
                kpt_score = 0
                valid_num = 0
                for n_jt in range(0, num_joints):
                    t_s = n_p['keypoints'][n_jt][2]
                    if t_s > in_vis_thre:
                        kpt_score = kpt_score + t_s
                        valid_num = valid_num + 1
                if valid_num != 0:
                    kpt_score = kpt_score / valid_num
                # rescoring
                n_p['score'] = kpt_score * box_score

            keep = oks_nms([img_kpts[i] for i in range(len(img_kpts))],
                           oks_thre)
            if len(keep) == 0:
                oks_nmsed_kpts.append(img_kpts)

            else:

                oks_nmsed_kpts.append([img_kpts[_keep] for _keep in keep])

        self._write_coco_keypoint_results(oks_nmsed_kpts, res_file)

        pred_folder = os.path.join(self.root, 'posetrack_example_results',
                                   'test_pred/')
        gt_folder = os.path.join(self.root, 'posetrack_data', 'anns/')

        if 'test' not in self.image_set:
            info_str = self._do_python_keypoint_eval(res_file, res_folder)

            if "val" in self.image_set:
                self._do_posetrack_keypoint_eval(gt_folder, pred_folder)
            name_value = OrderedDict(info_str)
            return name_value, name_value['AP']
        else:
            return {'Null': 0}, 0
示例#5
0
def test_net(tester, input_pose, det_range, gpu_id):

    dump_results = []

    start_time = time.time()

    img_start = det_range[0]
    img_id = 0
    img_id2 = 0
    pbar = tqdm(total=det_range[1] - img_start - 1, position=gpu_id)
    pbar.set_description("GPU %s" % str(gpu_id))
    while img_start < det_range[1]:
        img_end = img_start + 1
        im_info = input_pose[img_start]
        while img_end < det_range[1] and input_pose[img_end][
                'image_id'] == im_info['image_id']:
            img_end += 1

        # all human detection results of a certain image
        cropped_data = input_pose[img_start:img_end]
        #pbar.set_description("GPU %s" % str(gpu_id))
        pbar.update(img_end - img_start)

        img_start = img_end

        kps_result = np.zeros((len(cropped_data), cfg.num_kps, 3))
        area_save = np.zeros(len(cropped_data))

        # cluster human detection results with test_batch_size
        for batch_id in range(0, len(cropped_data), cfg.test_batch_size):
            start_id = batch_id
            end_id = min(len(cropped_data), batch_id + cfg.test_batch_size)

            imgs = []
            input_pose_coords = []
            input_pose_valids = []
            input_pose_scores = []
            crop_infos = []
            for i in range(start_id, end_id):
                img, input_pose_coord, input_pose_valid, input_pose_score, crop_info = generate_batch(
                    cropped_data[i], stage='test')
                imgs.append(img)
                input_pose_coords.append(input_pose_coord)
                input_pose_valids.append(input_pose_valid)
                input_pose_scores.append(input_pose_score)
                crop_infos.append(crop_info)
            imgs = np.array(imgs)
            input_pose_coords = np.array(input_pose_coords)
            input_pose_valids = np.array(input_pose_valids)
            input_pose_scores = np.array(input_pose_scores)
            crop_infos = np.array(crop_infos)

            # forward
            coord = tester.predict_one(
                [imgs, input_pose_coords, input_pose_valids])[0]

            if cfg.flip_test:
                flip_imgs = imgs[:, :, ::-1, :]
                flip_input_pose_coords = input_pose_coords.copy()
                flip_input_pose_coords[:, :, 0] = cfg.input_shape[
                    1] - 1 - flip_input_pose_coords[:, :, 0]
                flip_input_pose_valids = input_pose_valids.copy()
                for (q, w) in cfg.kps_symmetry:
                    flip_input_pose_coords_w, flip_input_pose_coords_q = flip_input_pose_coords[:, w, :].copy(
                    ), flip_input_pose_coords[:, q, :].copy()
                    flip_input_pose_coords[:,
                                           q, :], flip_input_pose_coords[:,
                                                                         w, :] = flip_input_pose_coords_w, flip_input_pose_coords_q
                    flip_input_pose_valids_w, flip_input_pose_valids_q = flip_input_pose_valids[:, w].copy(
                    ), flip_input_pose_valids[:, q].copy()
                    flip_input_pose_valids[:,
                                           q], flip_input_pose_valids[:,
                                                                      w] = flip_input_pose_valids_w, flip_input_pose_valids_q

                flip_coord = tester.predict_one([
                    flip_imgs, flip_input_pose_coords, flip_input_pose_valids
                ])[0]

                flip_coord[:, :,
                           0] = cfg.input_shape[1] - 1 - flip_coord[:, :, 0]
                for (q, w) in cfg.kps_symmetry:
                    flip_coord_w, flip_coord_q = flip_coord[:, w, :].copy(
                    ), flip_coord[:, q, :].copy()
                    flip_coord[:,
                               q, :], flip_coord[:,
                                                 w, :] = flip_coord_w, flip_coord_q
                coord += flip_coord
                coord /= 2

            # for each human detection from clustered batch
            for image_id in range(start_id, end_id):

                kps_result[image_id, :, :2] = coord[image_id - start_id]
                kps_result[image_id, :,
                           2] = input_pose_scores[image_id - start_id]

                vis = False
                crop_info = crop_infos[image_id - start_id, :]
                area = (crop_info[2] - crop_info[0]) * (crop_info[3] -
                                                        crop_info[1])
                if vis and np.any(kps_result[image_id, :,
                                             2]) > 0.9 and area > 96**2:
                    tmpimg = imgs[image_id - start_id].copy()
                    tmpimg = cfg.denormalize_input(tmpimg)
                    tmpimg = tmpimg.astype('uint8')
                    tmpkps = np.zeros((3, cfg.num_kps))
                    tmpkps[:2, :] = kps_result[image_id, :, :2].transpose(1, 0)
                    tmpkps[2, :] = kps_result[image_id, :, 2]
                    _tmpimg = tmpimg.copy()
                    _tmpimg = cfg.vis_keypoints(_tmpimg, tmpkps)
                    cv2.imwrite(
                        osp.join(cfg.vis_dir,
                                 str(img_id) + '_output.jpg'), _tmpimg)
                    img_id += 1

                # map back to original images
                for j in range(cfg.num_kps):
                    kps_result[image_id, j, 0] = kps_result[image_id, j, 0] / cfg.input_shape[1] * (\
                    crop_infos[image_id - start_id][2] - crop_infos[image_id - start_id][0]) + crop_infos[image_id - start_id][0]
                    kps_result[image_id, j, 1] = kps_result[image_id, j, 1] / cfg.input_shape[0] * (\
                    crop_infos[image_id - start_id][3] - crop_infos[image_id - start_id][1]) + crop_infos[image_id - start_id][1]

                area_save[image_id] = (crop_infos[image_id - start_id][2] -
                                       crop_infos[image_id - start_id][0]) * (
                                           crop_infos[image_id - start_id][3] -
                                           crop_infos[image_id - start_id][1])

        #vis
        vis = False
        if vis and np.any(kps_result[:, :, 2] > 0.9):
            tmpimg = cv2.imread(
                os.path.join(cfg.img_path, cropped_data[0]['imgpath']))
            tmpimg = tmpimg.astype('uint8')
            for i in range(len(kps_result)):
                tmpkps = np.zeros((3, cfg.num_kps))
                tmpkps[:2, :] = kps_result[i, :, :2].transpose(1, 0)
                tmpkps[2, :] = kps_result[i, :, 2]
                tmpimg = cfg.vis_keypoints(tmpimg, tmpkps)
            cv2.imwrite(osp.join(cfg.vis_dir, str(img_id2) + '.jpg'), tmpimg)
            img_id2 += 1

        # oks nms
        if cfg.dataset in ['COCO', 'PoseTrack']:
            nms_kps = np.delete(kps_result, cfg.ignore_kps, 1)
            nms_score = np.mean(nms_kps[:, :, 2], axis=1)
            nms_kps[:, :, 2] = 1
            nms_kps = nms_kps.reshape(len(kps_result), -1)
            nms_sigmas = np.delete(cfg.kps_sigmas, cfg.ignore_kps)
            keep = oks_nms(nms_kps, nms_score, area_save, cfg.oks_nms_thr,
                           nms_sigmas)
            if len(keep) > 0:
                kps_result = kps_result[keep, :, :]
                area_save = area_save[keep]

        score_result = np.copy(kps_result[:, :, 2])
        kps_result[:, :, 2] = 1
        kps_result = kps_result.reshape(-1, cfg.num_kps * 3)

        # save result
        for i in range(len(kps_result)):
            if cfg.dataset == 'COCO':
                result = dict(image_id=im_info['image_id'],
                              category_id=1,
                              score=float(round(np.mean(score_result[i]), 4)),
                              keypoints=kps_result[i].round(3).tolist())
            elif cfg.dataset == 'PoseTrack':
                result = dict(image_id=im_info['image_id'],
                              category_id=1,
                              track_id=0,
                              scores=score_result[i].round(4).tolist(),
                              keypoints=kps_result[i].round(3).tolist())
            elif cfg.dataset == 'MPII':
                result = dict(image_id=im_info['image_id'],
                              scores=score_result[i].round(4).tolist(),
                              keypoints=kps_result[i].round(3).tolist())

            dump_results.append(result)

    return dump_results