Пример #1
0
 def predict(self, imgs, camera_parameter, template_name='Shelf', show=False, plt_id=0):
     #prima data se salveaza in info_dict predictiile 2D
     #imgs e o lista cu 3 tensori = 3 poze din 3 puncte de vedere diferite
     #un batch e o poza
     info_dict = self._infer_single2d ( imgs )
     #info_dict e un dictionar cu key-uri pe camere, fiecare key avand un dictionar format din mai multe dictionare, asemanator cu un JSON
     #asa cum zice si la descriere, mem dataset pune datasetul 2D in memorie
     self.dataset = MemDataset ( info_dict=info_dict, camera_parameter=camera_parameter,
                                 template_name=template_name )
     #acuma incepe distractia
     return self._estimate3d ( 0, show=show, plt_id=plt_id )
Пример #2
0
 def predict(self,
             imgs,
             camera_parameter,
             template_name='Shelf',
             show=False,
             plt_id=0):
     info_dict = self._infer_single2d(imgs)
     self.dataset = MemDataset(info_dict=info_dict,
                               camera_parameter=camera_parameter,
                               template_name=template_name)
     return self._estimate3d(0, show=show, plt_id=plt_id)
Пример #3
0
def export(model, loader, is_info_dicts=False, show=False):
    pose_list = list()
    for img_id, imgs in enumerate(tqdm(loader)):
        try:
            pass
        except Exception as e:
            pass
            # poses3d = model.estimate3d ( img_id=img_id, show=False )
        #if-ul asta e pe true daca se foloseste treaba aia cu preprocesare

        if is_info_dicts:
            #practc in info_dicts is imaginile, si are si heatmeaps si cropped images pt fiecare persoana, salvate din preprocesare
            info_dicts = numpify(imgs)

            model.dataset = MemDataset(info_dict=info_dicts,
                                       camera_parameter=camera_parameter,
                                       template_name='Unified')
            #nu inteleg de ce aici foloseste estimate si jos predict-is retardat
            #foloseste aici estimate pentru ca alea 2D sunt deja salvate, asa ca trebuie doar estimate alea 3D. la predict trebuie facut tot.
            poses3d = model._estimate3d(0, show=show)
        else:
            this_imgs = list()
            #imgs e o lista cu 3 tensori.
            #un img_batch e un tensor din acestia 3, si are dimensiunea (288,360,3) si mai trebuie sa aflu de unde vine
            for img_batch in imgs:
                #in this_imgs se aduna acelasi lucru ca in imgs, dar fara prima lista care are dimensiunea [1], asta face squeeze
                this_imgs.append(img_batch.squeeze().numpy())
            poses3d = model.predict(imgs=this_imgs,
                                    camera_parameter=camera_parameter,
                                    template_name='Unified',
                                    show=show,
                                    plt_id=img_id)

        pose_list.append(poses3d)
    return pose_list
Пример #4
0
def export(model, loader, is_info_dicts=False, show=False):
    pose_list = list()
    for img_id, imgs in enumerate(tqdm(loader)):
        try:
            pass
        except Exception as e:
            pass
            # poses3d = model.estimate3d ( img_id=img_id, show=False )
        if is_info_dicts:
            info_dicts = numpify(imgs)

            model.dataset = MemDataset(info_dict=info_dicts,
                                       camera_parameter=camera_parameter,
                                       template_name='Unified')
            poses3d = model._estimate3d(0, show=show)
        else:
            this_imgs = list()
            for img_batch in imgs:
                this_imgs.append(img_batch.squeeze().numpy())
            poses3d = model.predict(imgs=this_imgs,
                                    camera_parameter=camera_parameter,
                                    template_name='Unified',
                                    show=show,
                                    plt_id=img_id)

        pose_list.append(poses3d)
    return pose_list
Пример #5
0
def export(model, loader, is_info_dicts=False, show=False):
    pose_list = list()
    for img_id, imgs in enumerate(tqdm(loader)):
        try:
            pass
        except Exception as e:
            pass
            # poses3d = model.estimate3d ( img_id=img_id, show=False )
        if is_info_dicts:
            info_dicts = numpify(imgs)

            model.dataset = MemDataset(info_dict=info_dicts,
                                       camera_parameter=camera_parameter,
                                       template_name='Unified')
            poses3d = model._estimate3d(0, show=show)
        else:
            this_imgs = list()
            # undistort here
            for iimg, img_batch in enumerate(imgs):
                frame = img_batch.squeeze().numpy()
                if 'distCoef' in camera_parameter.keys():
                    mtx = camera_parameter['K'][iimg]
                    dist = camera_parameter['distCoef'][iimg]
                    frame = cv2.undistort(frame, mtx, dist, None)
                this_imgs.append(frame)
            poses3d = model.predict(imgs=this_imgs,
                                    camera_parameter=camera_parameter,
                                    template_name='Unified',
                                    show=show,
                                    plt_id=img_id)

        pose_list.append(poses3d)
    return pose_list
Пример #6
0
def export(model, loader, is_info_dicts=False, show=False):
    pose3d_list, pose2d_list, hand_bbox_list, hand_pose_list = list(), list(
    ), list(), list()
    print('num of frames: {}'.format(len(loader)))
    with torch.no_grad():
        t_start = time.time()
        for img_id, imgs in enumerate(tqdm(loader)):
            if is_info_dicts:
                info_dicts = numpify(imgs)

                model.dataset = MemDataset(info_dict=info_dicts,
                                           camera_parameter=camera_parameter,
                                           template_name='Unified')
                poses3d = model._estimate3d(0, show=show)
            else:  # √
                this_imgs = list()  # len(this_imgs) = 3
                for img_batch in imgs:
                    this_imgs.append(img_batch.squeeze().numpy()
                                     )  # this_imgs[0].shape = (288, 360, 3)
                poses3d, pose2d, hand_bbox, hand_pose = model.predict(
                    imgs=this_imgs,
                    camera_parameter=camera_parameter,
                    template_name='Unified',
                    show=show,
                    plt_id=img_id)

            pose3d_list.append(poses3d)
            pose2d_list.append(pose2d)
            hand_bbox_list.append(hand_bbox)
            hand_pose_list.append(hand_pose)
        t_end = time.time()
        print('overall avg time: {}'.format((t_end - t_start) / len(loader)))
    return pose3d_list, pose2d_list, hand_bbox_list, hand_pose_list
Пример #7
0
    def predict(self, imgs, camera_parameter, template_name='Shelf', show=False, plt_id=0):
        # 2d human pose estimation for all views
        info_dict = self._infer_single2d ( imgs )

        self.dataset = MemDataset ( info_dict=info_dict, camera_parameter=camera_parameter,
                                    template_name=template_name )
        
        # 3d huamn pose reconstruction
        pose3d = self._estimate3d ( 0, show=show, plt_id=plt_id )
        
        # hand bbox detection for all views
        pose2d, hand_bbox = self._detect_hand(imgs, info_dict)

        # 3d hand pose estimation
        cam_id = 1
        hand_pose = self._estimate_hand_pose(imgs, hand_bbox, camera_parameter, cam_id)
        
        return pose3d, pose2d, hand_bbox, hand_pose
Пример #8
0
def export(info_dicts, model, match, img_names):
    pose3d_list = list()
    N = len(info_dicts)
    print('num of frames: {}'.format(N))

    with torch.no_grad():
        t_start = time.time()
        for i, info in enumerate(info_dicts):
            if i % 1000 == 0:
                print('processing ({:5d}/{:5d})'.format(i, N))
            # import other info ('image_data', 'cropped_img')
            info_dict = dict()
            for cam_id in range(len(info)):
                idx = match['idx{}'.format(cam_id + 1)][i]
                img_name = img_names[cam_id][idx]
                assert idx == int(os.path.basename(img_name).split('_')[1])
                img = cv2.imread(img_name)
                info[cam_id]['image_data'] = cv2.cvtColor(
                    img.copy(), cv2.COLOR_BGR2RGB)
                for person_id in range(len(info[cam_id][0])):
                    bb = np.array(info[cam_id][0][person_id]['bbox'],
                                  dtype=int)
                    cropped_img = img[bb[1]:bb[3], bb[0]:bb[2]]
                    info[cam_id][0][person_id]['cropped_img'] = cv2.cvtColor(
                        cropped_img.copy(), cv2.COLOR_BGR2RGB)
                    info[cam_id][0][person_id]['pose2d'] = info[cam_id][0][
                        person_id]['pose2d'][:17].reshape(-1)
                    info[cam_id][0][person_id]['heatmap_data'] = []
                    info[cam_id][0][person_id]['heatmap_bbox'] = []
                info_dict[cam_id] = info[cam_id]

            satisfied = (np.array([len(vinfo[0]) for vinfo in info]) > 0).sum()
            if satisfied > 1:
                model.dataset = MemDataset(info_dict=info_dict,
                                           camera_parameter=camera_parameter,
                                           template_name='Unified')
                poses3d = model._estimate3d(0)
            else:
                pose3d = -1

            pose3d_list.append(poses3d)
            info_dicts[i] = -1
        t_end = time.time()
        print('overall avg time: {}'.format((t_end - t_start) / N))
    return pose3d_list
Пример #9
0
class MultiEstimator ( object ):
    def __init__(self, cfg, debug=False):
        self.est2d = Estimator_2d ( DEBUGGING=debug )
        #asta daca se foloseste CamStyle
        #self.extractor = FeatureExtractor ()
        #asta daca se foloseste OSNet
        self.extractor = FeatureExtractor(
            model_name='osnet_x1_0',
            model_path='backend/reid/models/osnet_x1_0_market_256x128_amsgrad_ep150_stp60_lr0.0015_b64_fb10_softmax_labelsmooth_flip.pth',
            device='cuda'
        )
        self.cfg = cfg
        self.dataset = None
    #in functia asta practic se fac toate chestiile
    def predict(self, imgs, camera_parameter, template_name='Shelf', show=False, plt_id=0):
        #prima data se salveaza in info_dict predictiile 2D
        #imgs e o lista cu 3 tensori = 3 poze din 3 puncte de vedere diferite
        #un batch e o poza
        info_dict = self._infer_single2d ( imgs )
        #info_dict e un dictionar cu key-uri pe camere, fiecare key avand un dictionar format din mai multe dictionare, asemanator cu un JSON
        #asa cum zice si la descriere, mem dataset pune datasetul 2D in memorie
        self.dataset = MemDataset ( info_dict=info_dict, camera_parameter=camera_parameter,
                                    template_name=template_name )
        #acuma incepe distractia
        return self._estimate3d ( 0, show=show, plt_id=plt_id )

    def _infer_single2d(self, imgs, img_id=0, dir='/home/jiangwen/tmp/Multi'):
        info_dict = dict ()
        #o ia pe batchuri, trebuie sa vad cat e imgs aici, dar cred ca e 3, ceea ce e putin dubios ca ar insemna ca is doar 3 imagini
        #am banuiala ca e 3 deoarece sunt 3 camere, si atunci proceseaza cate un frame pe rand, din 3 views de o data, dar nu stiu unde e for-ul care merge la urmatorul frame
        #e fals ce am zis mai sus. se ia pe cate o imagine de o data. In imgs sunt 3 imagini, si intr-un batch e o imagine. are dimensiunea 3 la capat fiidca ala e numarul de channeluri de la poze
        for cam_id, img in enumerate ( imgs ):
            #trimte la estimare un batch(3 imagini). pentru a confirma asta voi merge in ubuntu, revin.
            #e fals, trimte o poza din cele 3 dintr-un batch. are ultima dimensiune 3 fiindca ala e numarul de channeluri
            results = self.est2d.estimate_2d ( img, img_id )
            #in results e o imagine plus bounding boxes unde sunt si scheletele 2D
            #in this info dict, la key-ul 'image_data' pune poza colorata rgb
            this_info_dict = {'image_data': cv2.cvtColor ( img.copy (), cv2.COLOR_BGR2RGB )}
            #la key-ul 'img_id' face o lista
            this_info_dict[img_id] = list ()
            for person_id, result in enumerate ( results ):
                #in lista de img_id o sa avem un alt dictionar
                this_info_dict[img_id].append ( dict () )
                this_info_dict[img_id][person_id]['pose2d'] = result['keypoints']
                # NOTE: bbox is (x, y) (W, H) format where x and y is up-left point.
                this_info_dict[img_id][person_id]['bbox'] = result['bbox']
                bb = np.array ( result['bbox'], dtype=int )
                #practic creeaza persoane din rezultate unde sunt bounding boxes
                cropped_img = img[bb[1]:bb[1] + bb[3], bb[0]:bb[0] + bb[2]]
                # numpy format of crop idx is changed to json
                #as vrea sa vad ce e in result. pana acum sigur sunt keyppoints, bbox, crop si heatmaps
                this_info_dict[img_id][person_id]['heatmap_bbox'] = result['crops'].astype ( int ).tolist ()
                this_info_dict[img_id][person_id]['heatmap_data'] = result['heatmaps']
                this_info_dict[img_id][person_id]['cropped_img'] = cv2.cvtColor ( cropped_img.copy (),
                                                                                  cv2.COLOR_BGR2RGB )
            #e tot asa pe camere, se infereaza pe camere separat poza 2D
            info_dict[cam_id] = this_info_dict
        return info_dict

    def _estimate3d(self, img_id, show=False, plt_id=0):
        #banuiesc doar ca in dataset[img_id] sunt 3 imagini de la cele 3 camere, sau o singura imagine, will never find out
        #din ce zice in CamStyle, in dataset[img_id] se afla un object iterabil cu imagini, nume, pids, cam_id, deci prima presupunere de mai sus e adevarata, bravo mie
        data_batch = self.dataset[img_id]
        #merg sa vad cum se calculeaza matricea de afinitate. cfg rerank e False
        affinity_mat = self.extractor.get_affinity ( data_batch, rerank=self.cfg.rerank )
        if self.cfg.rerank:
            affinity_mat = torch.from_numpy ( affinity_mat )
            affinity_mat = torch.max ( affinity_mat, affinity_mat.t () )
            affinity_mat = 1 - affinity_mat
        else:
            affinity_mat = affinity_mat.cpu ()
        #cate persoane sunt in grupul de imagini
        dimGroup = self.dataset.dimGroup[img_id]

        info_list = list ()
        for cam_id in self.dataset.cam_names:
            #o lista la care se concateneaza cele 3 imagini de la cele 3 camere
            info_list += self.dataset.info_dict[cam_id][img_id]

        #o matrice de pose-uri reshapeuite dupa numarul de jointuri pe care il accepta detectorul 3D
        pose_mat = np.array ( [i['pose2d'] for i in info_list] ).reshape ( -1, model_cfg.joint_num, 3 )[..., :2]
        #nu stiu ce e in self.dataset.F dar e un calcul de parametrii camerelor
        #se trimit detectiile 2D, tensorul ala cu parametrii camerelor despre care trebuie neaparat sa citesc si numarul de persoane din cele 3 imagini, ca practic is detectii 2D pt toate persoanele astea
        geo_affinity_mat = geometry_affinity ( pose_mat.copy (), self.dataset.F.numpy (),
                                               self.dataset.dimGroup[img_id] )
        geo_affinity_mat = torch.tensor ( geo_affinity_mat )
        #metrica chiar e geometry mean
        if self.cfg.metric == 'geometry mean':
            W = torch.sqrt ( affinity_mat * geo_affinity_mat )
        elif self.cfg.metric == 'circle':
            W = torch.sqrt ( (affinity_mat ** 2 + geo_affinity_mat ** 2) / 2 )
        elif self.cfg.metric == 'Geometry only':
            W = torch.tensor ( geo_affinity_mat )
        elif self.cfg.metric == 'ReID only':
            W = torch.tensor ( affinity_mat )
        else:
            logger.critical ( 'Get into default option, are you intend to do it?' )
            _alpha = 0.8
            W = 0.8 * affinity_mat + (1 - _alpha) * geo_affinity_mat
        W[torch.isnan ( W )] = 0  # Some times (Shelf 452th img eg.) torch.sqrt will return nan if its too small
        sub_imgid2cam = np.zeros ( pose_mat.shape[0], dtype=np.int32  )
        for idx, i in enumerate ( range ( len ( dimGroup ) - 1 ) ):
            sub_imgid2cam[dimGroup[i]:dimGroup[i + 1]] = idx

        #de aici incep chestiile complexe
        num_person = 10
        #x0 e un tensor cu valori random. Nu stiu cat e W.shape[0] but i would like to find out.
        X0 = torch.rand ( W.shape[0], num_person )

        # Use spectral method to initialize assignment matrix.
        #spectral method e o metoda de a rezolva ecuatiile diferentiale de care nu am auzit niciodata
        #nu stiu nici asigment matrix ce e
        if self.cfg.spectral:
            #un eigen vector e un vector unitate aparent, adica nu isi schimba directia cand o transformare liniara este aplicata
            eig_value, eig_vector = W.eig ( eigenvectors=True )
            _, eig_idx = torch.sort ( eig_value[:, 0], descending=True )

            if W.shape[1] >= num_person:
                X0 = eig_vector[eig_idx[:num_person]].t ()
            else:
                X0[:, :W.shape[1]] = eig_vector.t ()


        #merg in matchSVT sa vad ce se intampla
        #match_mat e matricea de permutare unde sunt marcate perechile de oameni care se matchuie
        match_mat = matchSVT ( W, dimGroup, alpha=self.cfg.alpha_SVT, _lambda=self.cfg.lambda_SVT,
                               dual_stochastic_SVT=self.cfg.dual_stochastic_SVT )

        #habar n-am ce face aici,posibil sa elimine detectiile false din prima matrice
        bin_match = match_mat[:, torch.nonzero ( torch.sum ( match_mat, dim=0 ) > 1.9 ).squeeze ()] > 0.9
        bin_match = bin_match.reshape ( W.shape[0], -1 )
        #cate matchuri sunt atatia vectori goli se pun in matched list
        matched_list = [[] for i in range ( bin_match.shape[1] )]
        for sub_imgid, row in enumerate ( bin_match ):
            if row.sum () != 0:
                pid = row.argmax ()
                #se adauga id-ul imaginii la pid-ul imaginii care are suma randului diferita de 0, deci practic exista un match
                matched_list[pid].append ( sub_imgid )
        #practic face ca toti vectorii sa fie de tipul numpy.array()
        matched_list = [np.array ( i ) for i in matched_list]
        #hybrid is true
        if self.cfg.hybrid:
            multi_pose3d = self._hybrid_kernel ( matched_list, pose_mat, sub_imgid2cam, img_id )
            chosen_img = [[]] * len ( sub_imgid2cam )
        else:
            multi_pose3d, chosen_img = self._top_down_pose_kernel ( geo_affinity_mat, matched_list, pose_mat,
                                                                    sub_imgid2cam )
        if show:  # hybrid not implemented yet.
            bin_match = match_mat[:, torch.nonzero ( torch.sum ( match_mat, dim=0 ) > 0.9 ).squeeze ()] > 0.9
            bin_match = bin_match.reshape ( W.shape[0], -1 )
            matched_list = [[] for i in range ( bin_match.shape[1] )]
            for sub_imgid, row in enumerate ( bin_match ):
                if row.sum () != 0:
                    pid = row.argmax ()
                    matched_list[pid].append ( sub_imgid )
            matched_list = [np.array ( i ) for i in matched_list]
            #astea 2 de jos is de rezultate vizuale
            show_panel_mem ( self.dataset, matched_list, info_list, sub_imgid2cam, img_id, affinity_mat,
                             geo_affinity_mat, W, plt_id, multi_pose3d )
            plotPaperRows ( self.dataset, matched_list, info_list, sub_imgid2cam, img_id, affinity_mat,
                            geo_affinity_mat, W, plt_id, multi_pose3d )

        return multi_pose3d

    def _hybrid_kernel(self, matched_list, pose_mat, sub_imgid2cam, img_id):
        return pictorial.hybrid_kernel ( self, matched_list, pose_mat, sub_imgid2cam, img_id )
        multi_pose3d = list ()

        for person in matched_list:
            # use bottom-up approach to get the 3D pose of person
            if person.shape[0] <= 1:
                continue

            # step1: use the 2D joint of person to triangulate the 3D joints candidates

            # person's 17 3D joints candidates
            candidates = np.zeros ( (17, person.shape[0] * (person.shape[0] - 1) // 2, 3) )
            # 17xC^2_nx3
            cnt = 0
            for i in range ( person.shape[0] ):
                for j in range ( i + 1, person.shape[0] ):
                    cam_id_i, cam_id_j = sub_imgid2cam[person[i]], sub_imgid2cam[person[j]]
                    projmat_i, projmat_j = self.dataset.P[cam_id_i], self.dataset.P[cam_id_j]
                    pose2d_i, pose2d_j = pose_mat[person[i]].T, pose_mat[person[j]].T
                    pose3d_homo = cv2.triangulatePoints ( projmat_i, projmat_j, pose2d_i, pose2d_j )
                    pose3d_ij = pose3d_homo[:3] / pose3d_homo[3]
                    candidates[:, cnt] += pose3d_ij.T
                    cnt += 1

            unary = self.dataset.get_unary ( person, sub_imgid2cam, candidates, img_id )

            # step2: use the max-product algorithm to inference to get the 3d joint of the person

            # change the coco order
            coco_2_skel = [0, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16]
            candidates = np.array ( candidates )[coco_2_skel]
            unary = unary[coco_2_skel]
            skel = pictorial.getskel ()
            # construct pictorial model
            edges = pictorial.getPictoStruct ( skel, self.dataset.distribution )
            xp = pictorial.inferPict3D_MaxProd ( unary, edges, candidates )
            human = np.array ( [candidates[i][j] for i, j in zip ( range ( xp.shape[0] ), xp )] )
            human_coco = np.zeros ( (17, 3) )
            human_coco[[0, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16]] = human
            human_coco[[1, 2, 3, 4]] = human_coco[0]  # Just make visualize beauty not real ear and eye
            human_coco = human_coco.T
            if self.cfg.reprojection_refine and len ( person ) > 2:
                for joint_idx in range ( human_coco.shape[1] ):
                    reprojected_error = np.zeros ( len ( person ) )
                    for idx, pid in enumerate ( person ):
                        human_coco_homo = np.ones ( 4 )
                        human_coco_homo[:3] = human_coco[:, joint_idx]
                        projected_pose_homo = self.dataset.P[sub_imgid2cam[pid]] @ human_coco_homo
                        projected_pose = projected_pose_homo[:2] / projected_pose_homo[2]
                        reprojected_error[idx] += np.linalg.norm ( projected_pose - pose_mat[pid, joint_idx] )
                    # import IPython; IPython.embed()
                    # pose_select = reprojected_error < self.cfg.refine_threshold
                    pose_select = (
                                          reprojected_error - reprojected_error.mean ()) / reprojected_error.std () < self.cfg.refine_threshold
                    if pose_select.sum () >= 2:
                        Ps = list ()
                        Ys = list ()
                        for idx, is_selected in enumerate ( pose_select ):
                            if is_selected:
                                Ps.append ( self.dataset.P[sub_imgid2cam[person[idx]]] )
                                Ys.append ( pose_mat[person[idx], joint_idx].reshape ( -1, 1 ) )
                        Ps = torch.tensor ( Ps, dtype=torch.float32 )
                        Ys = torch.tensor ( Ys, dtype=torch.float32 )
                        Xs = multiTriIter ( Ps, Ys )
                        refined_pose = (Xs[:3] / Xs[3]).numpy ()
                        human_coco[:, joint_idx] = refined_pose.reshape ( -1 )
            if True or check_bone_length ( human_coco ):
                multi_pose3d.append ( human_coco )
        return multi_pose3d

    def _top_down_pose_kernel(self, geo_affinity_mat, matched_list, pose_mat, sub_imgid2cam):
        multi_pose3d = list ()
        chosen_img = list ()
        for person in matched_list:
            Graph = geo_affinity_mat[person][:, person].clone ().numpy ()
            Graph *= (1 - np.eye ( Graph.shape[0] ))  # make diagonal 0
            if len ( Graph ) < 2:
                continue
            elif len ( Graph ) > 2:
                if self.cfg.use_mincut:
                    cut0, cut1 = find_mincut ( Graph.copy () )
                    cut = cut0 if len ( cut0 ) > len ( cut1 ) else cut1
                    cut = cut.astype ( int )
                    sub_imageid = person[cut]
                else:
                    sub_imageid = get_min_reprojection_error ( person, self.dataset, pose_mat, sub_imgid2cam )
            else:
                sub_imageid = person

            _, rank = torch.sort ( geo_affinity_mat[sub_imageid][:, sub_imageid].sum ( dim=0 ) )
            sub_imageid = sub_imageid[rank[:2]]
            cam_id_0, cam_id_1 = sub_imgid2cam[sub_imageid[0]], sub_imgid2cam[sub_imageid[1]]
            projmat_0, projmat_1 = self.dataset.P[cam_id_0], self.dataset.P[cam_id_1]
            pose2d_0, pose2d_1 = pose_mat[sub_imageid[0]].T, pose_mat[sub_imageid[1]].T
            pose3d_homo = cv2.triangulatePoints ( projmat_0, projmat_1, pose2d_0, pose2d_1 )
            if self.cfg.use_bundle:
                pose3d_homo = bundle_adjustment ( pose3d_homo, person, self.dataset, pose_mat, sub_imgid2cam,
                                                  logging=logger )
            pose3d = pose3d_homo[:3] / (pose3d_homo[3] + 10e-6)
            # pose3d -= ((pose3d[:, 11] + pose3d[:, 12]) / 2).reshape ( 3, -1 ) # No need to normalize to hip
            if check_bone_length ( pose3d ):
                multi_pose3d.append ( pose3d )
            else:
                # logging.info ( f'A pose proposal deleted on {img_id}:{person}' )
                sub_imageid = list ()
                pass
            chosen_img.append ( sub_imageid )
        return multi_pose3d, chosen_img
Пример #10
0
class MultiEstimator ( object ):
    def __init__(self, cfg, debug=False):
        self.est2d = Estimator_2d ( DEBUGGING=debug )
        self.extractor = FeatureExtractor ()
        self.cfg = cfg
        self.dataset = None
        # hand bbox
        self.yolo = YOLO("/home/yxs/lit/mvpose/backend/yolo_hand_detection/models/cross-hands.cfg", \
            "/home/yxs/lit/mvpose/backend/yolo_hand_detection/models/cross-hands.weights", ["hand"])
        self.sample_range = 120
        self.expand_rate = 1.5
        # hand pose
        self.tester = Tester('49')
        self.tester._make_batch_generator('test', 'all')
        self.tester._make_model()
        # coord transformation
        self.transform = lambda R, P: (R.t() @ P.t()).t()

    def expand_bbox(self, b, x_max, y_max):
        xl, xr = np.clip(int(b[0]-b[2]*(self.expand_rate-1)/2.0), 0, x_max), np.clip(int(b[0]+b[2]+b[2]*(self.expand_rate-1)/2.0), 0, x_max)
        yt, yb = np.clip(int(b[1]-b[3]*(self.expand_rate-1)/2.0), 0, y_max), np.clip(int(b[1]+b[3]+b[3]*(self.expand_rate-1)/2.0), 0, y_max)
        expanded_b = [xl,yt,xr-xl,yb-yt]
        return expanded_b

    def predict(self, imgs, camera_parameter, template_name='Shelf', show=False, plt_id=0):
        # 2d human pose estimation for all views
        info_dict = self._infer_single2d ( imgs )

        self.dataset = MemDataset ( info_dict=info_dict, camera_parameter=camera_parameter,
                                    template_name=template_name )
        
        # 3d huamn pose reconstruction
        pose3d = self._estimate3d ( 0, show=show, plt_id=plt_id )
        
        # hand bbox detection for all views
        pose2d, hand_bbox = self._detect_hand(imgs, info_dict)

        # 3d hand pose estimation
        cam_id = 1
        hand_pose = self._estimate_hand_pose(imgs, hand_bbox, camera_parameter, cam_id)
        
        return pose3d, pose2d, hand_bbox, hand_pose

    def _detect_hand(self, imgs, info_dict):
        pose2d, hand_bbox  = [], []
        for cam_id, img in enumerate(imgs):   # 遍历每个视角
            pose_tmp, hand_tmp = [], []
            for res in info_dict[cam_id][0]:  # 遍历每个人
                pose_tmp.append(res['pose2d'])
                for joint_id in [9,10]:
                    # get rough bbox
                    cx1, cy1 = res['pose2d'][joint_id*3], res['pose2d'][joint_id*3+1]  # 左/右手关节位置
                    max_x1, max_y1 = img.shape[1] - 1, img.shape[0] - 1
                    xl1, xr1 = np.clip(int(cx1-self.sample_range), 0, max_x1), np.clip(int(cx1+self.sample_range), 0, max_x1)  # rough bbox的x方向范围
                    yt1, yb1 = np.clip(int(cy1-self.sample_range), 0, max_y1), np.clip(int(cy1+self.sample_range), 0, max_y1)  # rough bbox的y方向范围
                    cropped_img = img[yt1:yb1, xl1:xr1, :]
                    # get accurate bbox
                    cnt, conf, bbox = yolo_hand_detection(self.yolo, cropped_img)
                    bbox = [[b[0]+xl1, b[1]+yt1, b[2], b[3]] for b in bbox]            # 将bbox转换到原图坐标系下 [x,y,w,h]
                    if cnt == 0:
                        hand_tmp.append(bbox)
                    else:
                        dist = [(cx1-(b[0]+b[2]/2))**2 + (cy1-(b[1]+b[3]/2))**2 for b in bbox]    # 计算每个bbox的中心到joint的距离
                        hand_tmp.append(self.expand_bbox(bbox[np.argmin(dist)], max_x1, max_y1))  # 选择距离joint最近的bbox)
            pose2d.append(pose_tmp)
            hand_bbox.append(hand_tmp)
        return pose2d, hand_bbox

    def _estimate_hand_pose(self, imgs, hand_bbox, camera_parameter, cam_id):
        img, bbox = imgs[cam_id], hand_bbox[cam_id]
        R = torch.tensor(camera_parameter['RT'][cam_id][:,:-1].astype(np.float32))  # cam_id视角的旋转矩阵
        hand_pose = []
        with torch.no_grad():
            for b in bbox:
                if len(b) > 0:
                    # crop img
                    img_patch = cv2.resize(img[b[1]:b[1]+b[3], b[0]:b[0]+b[2], :], (int(cfg.input_img_shape[1]), int(cfg.input_img_shape[0])), interpolation=cv2.INTER_LINEAR)
                    img_patch = img_patch.astype(np.float32)

                    trans = transforms.ToTensor()
                    input = trans(img_patch.astype(np.float32))/255.
                    input = input.unsqueeze(0)
                    
                    inputs = {'img':input}

                    out = self.tester.model(inputs, 'test')
                    joint_coord_out = out['joint_coord'].cpu().numpy()
                    hand_type_out = out['hand_type'].cpu().numpy()

                    idx = np.argmax(hand_type_out)
                    if hand_type_out[0][idx] < 0.8:   # 忽略置信度小于0.8的结果
                        hand_pose.append([])
                        continue
                    print(hand_type_out, idx)
                    cur_pose = joint_coord_out[0, 21*idx:21*(idx+1), :] - joint_coord_out[0, 21*idx, :]  # 选择置信度更高的pose,并平移到以手腕为坐标原点的位置
                    hand_pose.append(self.transform(R, torch.tensor(cur_pose)).numpy())
                else:
                    hand_pose.append([])

        return hand_pose

    def _infer_single2d(self, imgs, img_id=0, dir='/home/jiangwen/tmp/Multi'):
        '''
        img_id一直等于0
        results[0].keys() = dict_keys(['image_id', 'category_id', 'score', 'keypoints', 'bbox', 'heatmaps', 'crops'])
        '''
        info_dict = dict ()
        for cam_id, img in enumerate ( imgs ):
            results = self.est2d.estimate_2d ( img, img_id )                                                  # type(results): list
            # results = self.est2d.light_estimate_2d(img)
            this_info_dict = {'image_data': cv2.cvtColor ( img.copy (), cv2.COLOR_BGR2RGB )}
            this_info_dict[img_id] = list ()
            for person_id, result in enumerate ( results ):                                                   # type(result): dict
                this_info_dict[img_id].append ( dict () )
                this_info_dict[img_id][person_id]['pose2d'] = result['keypoints']                             # len(result['keypoints]) = 51 -> (17*3)
                # NOTE: bbox is (x, y) (W, H) format where x and y is up-left point.
                this_info_dict[img_id][person_id]['bbox'] = result['bbox']
                bb = np.array ( result['bbox'], dtype=int )
                cropped_img = img[bb[1]:bb[1] + bb[3], bb[0]:bb[0] + bb[2]]
                # numpy format of crop idx is changed to json
                this_info_dict[img_id][person_id]['heatmap_bbox'] = result['crops'].astype ( int ).tolist ()  # = [x, y, x+w, y+h]
                this_info_dict[img_id][person_id]['heatmap_data'] = result['heatmaps']                        # .shape = (17, h, w)
                this_info_dict[img_id][person_id]['cropped_img'] = cv2.cvtColor ( cropped_img.copy (),
                                                                                  cv2.COLOR_BGR2RGB )

            info_dict[cam_id] = this_info_dict
        return info_dict

    def _estimate3d(self, img_id, show=False, plt_id=0):
        data_batch = self.dataset[img_id]
        # ReID affinity matrix
        affinity_mat = self.extractor.get_affinity ( data_batch, rerank=self.cfg.rerank )
        if self.cfg.rerank:
            affinity_mat = torch.from_numpy ( affinity_mat )
            affinity_mat = torch.max ( affinity_mat, affinity_mat.t () )
            affinity_mat = 1 - affinity_mat
        else:
            affinity_mat = affinity_mat.cpu ()
        dimGroup = self.dataset.dimGroup[img_id]

        info_list = list ()
        for cam_id in self.dataset.cam_names:
            info_list += self.dataset.info_dict[cam_id][img_id]    # list of dict: 每个人一个dict

        # Geometry affinity matrix
        pose_mat = np.array ( [i['pose2d'] for i in info_list] ).reshape ( -1, model_cfg.joint_num, 3 )[..., :2]   # shape = (累计人数, num_joint, 2)
        geo_affinity_mat = geometry_affinity ( pose_mat.copy (), self.dataset.F.numpy (),
                                               self.dataset.dimGroup[img_id] )
        geo_affinity_mat = torch.tensor ( geo_affinity_mat )
        if self.cfg.metric == 'geometry mean':
            W = torch.sqrt ( affinity_mat * geo_affinity_mat )
        elif self.cfg.metric == 'circle':
            W = torch.sqrt ( (affinity_mat ** 2 + geo_affinity_mat ** 2) / 2 )
        elif self.cfg.metric == 'Geometry only':
            W = torch.tensor ( geo_affinity_mat )
        elif self.cfg.metric == 'ReID only':
            W = torch.tensor ( affinity_mat )
        else:
            logger.critical ( 'Get into default option, are you intend to do it?' )
            _alpha = 0.8
            W = 0.8 * affinity_mat + (1 - _alpha) * geo_affinity_mat
        W[torch.isnan ( W )] = 0  # Some times (Shelf 452th img eg.) torch.sqrt will return nan if its too small
        sub_imgid2cam = np.zeros ( pose_mat.shape[0], dtype=np.int32 )
        for idx, i in enumerate ( range ( len ( dimGroup ) - 1 ) ):
            sub_imgid2cam[dimGroup[i]:dimGroup[i + 1]] = idx     # sub_imgid2cam[person_id] = cam_id

        num_person = 10
        X0 = torch.rand ( W.shape[0], num_person )

        # Use spectral method to initialize assignment matrix.
        if self.cfg.spectral:
            eig_value, eig_vector = W.eig ( eigenvectors=True )
            _, eig_idx = torch.sort ( eig_value[:, 0], descending=True )

            if W.shape[1] >= num_person:
                X0 = eig_vector[eig_idx[:num_person]].t ()
            else:
                X0[:, :W.shape[1]] = eig_vector.t ()



        match_mat = matchSVT ( W, dimGroup, alpha=self.cfg.alpha_SVT, _lambda=self.cfg.lambda_SVT,
                               dual_stochastic_SVT=self.cfg.dual_stochastic_SVT )


        bin_match = match_mat[:, torch.nonzero ( torch.sum ( match_mat, dim=0 ) > 1.9 ).squeeze ()] > 0.9
        bin_match = bin_match.reshape ( W.shape[0], -1 )

        matched_list = [[] for i in range ( bin_match.shape[1] )]
        for sub_imgid, row in enumerate ( bin_match ):
            if row.sum () != 0:
                pid = row.argmax ()
                matched_list[pid].append ( sub_imgid )

        matched_list = [np.array ( i ) for i in matched_list]
        if self.cfg.hybrid:
            multi_pose3d = self._hybrid_kernel ( matched_list, pose_mat, sub_imgid2cam, img_id )
            chosen_img = [[]] * len ( sub_imgid2cam )
        else:
            multi_pose3d, chosen_img = self._top_down_pose_kernel ( geo_affinity_mat, matched_list, pose_mat,
                                                                    sub_imgid2cam )
        if show:  # hybrid not implemented yet.
            bin_match = match_mat[:, torch.nonzero ( torch.sum ( match_mat, dim=0 ) > 0.9 ).squeeze ()] > 0.9
            bin_match = bin_match.reshape ( W.shape[0], -1 )
            matched_list = [[] for i in range ( bin_match.shape[1] )]
            for sub_imgid, row in enumerate ( bin_match ):
                if row.sum () != 0:
                    pid = row.argmax ()
                    matched_list[pid].append ( sub_imgid )
            matched_list = [np.array ( i ) for i in matched_list]
            show_panel_mem ( self.dataset, matched_list, info_list, sub_imgid2cam, img_id, affinity_mat,
                             geo_affinity_mat, W, plt_id, multi_pose3d )
            plotPaperRows ( self.dataset, matched_list, info_list, sub_imgid2cam, img_id, affinity_mat,
                            geo_affinity_mat, W, plt_id, multi_pose3d )

        return multi_pose3d

    def _hybrid_kernel(self, matched_list, pose_mat, sub_imgid2cam, img_id):
        return pictorial.hybrid_kernel ( self, matched_list, pose_mat, sub_imgid2cam, img_id )
        multi_pose3d = list ()

        for person in matched_list:
            # use bottom-up approach to get the 3D pose of person
            if person.shape[0] <= 1:
                continue

            # step1: use the 2D joint of person to triangulate the 3D joints candidates

            # person's 17 3D joints candidates
            candidates = np.zeros ( (17, person.shape[0] * (person.shape[0] - 1) // 2, 3) )
            # 17xC^2_nx3
            cnt = 0
            for i in range ( person.shape[0] ):
                for j in range ( i + 1, person.shape[0] ):
                    cam_id_i, cam_id_j = sub_imgid2cam[person[i]], sub_imgid2cam[person[j]]
                    projmat_i, projmat_j = self.dataset.P[cam_id_i], self.dataset.P[cam_id_j]
                    pose2d_i, pose2d_j = pose_mat[person[i]].T, pose_mat[person[j]].T
                    pose3d_homo = cv2.triangulatePoints ( projmat_i, projmat_j, pose2d_i, pose2d_j )
                    pose3d_ij = pose3d_homo[:3] / pose3d_homo[3]
                    candidates[:, cnt] += pose3d_ij.T
                    cnt += 1

            unary = self.dataset.get_unary ( person, sub_imgid2cam, candidates, img_id )

            # step2: use the max-product algorithm to inference to get the 3d joint of the person

            # change the coco order
            coco_2_skel = [0, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16]
            candidates = np.array ( candidates )[coco_2_skel]
            unary = unary[coco_2_skel]
            skel = pictorial.getskel ()
            # construct pictorial model
            edges = pictorial.getPictoStruct ( skel, self.dataset.distribution )
            xp = pictorial.inferPict3D_MaxProd ( unary, edges, candidates )
            human = np.array ( [candidates[i][j] for i, j in zip ( range ( xp.shape[0] ), xp )] )
            human_coco = np.zeros ( (17, 3) )
            human_coco[[0, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16]] = human
            human_coco[[1, 2, 3, 4]] = human_coco[0]  # Just make visualize beauty not real ear and eye
            human_coco = human_coco.T
            if self.cfg.reprojection_refine and len ( person ) > 2:
                for joint_idx in range ( human_coco.shape[1] ):
                    reprojected_error = np.zeros ( len ( person ) )
                    for idx, pid in enumerate ( person ):
                        human_coco_homo = np.ones ( 4 )
                        human_coco_homo[:3] = human_coco[:, joint_idx]
                        projected_pose_homo = self.dataset.P[sub_imgid2cam[pid]] @ human_coco_homo
                        projected_pose = projected_pose_homo[:2] / projected_pose_homo[2]
                        reprojected_error[idx] += np.linalg.norm ( projected_pose - pose_mat[pid, joint_idx] )
                    # import IPython; IPython.embed()
                    # pose_select = reprojected_error < self.cfg.refine_threshold
                    pose_select = (
                                          reprojected_error - reprojected_error.mean ()) / reprojected_error.std () < self.cfg.refine_threshold
                    if pose_select.sum () >= 2:
                        Ps = list ()
                        Ys = list ()
                        for idx, is_selected in enumerate ( pose_select ):
                            if is_selected:
                                Ps.append ( self.dataset.P[sub_imgid2cam[person[idx]]] )
                                Ys.append ( pose_mat[person[idx], joint_idx].reshape ( -1, 1 ) )
                        Ps = torch.tensor ( Ps, dtype=torch.float32 )
                        Ys = torch.tensor ( Ys, dtype=torch.float32 )
                        Xs = multiTriIter ( Ps, Ys )
                        refined_pose = (Xs[:3] / Xs[3]).numpy ()
                        human_coco[:, joint_idx] = refined_pose.reshape ( -1 )
            if True or check_bone_length ( human_coco ):
                multi_pose3d.append ( human_coco )
        return multi_pose3d

    def _top_down_pose_kernel(self, geo_affinity_mat, matched_list, pose_mat, sub_imgid2cam):
        multi_pose3d = list ()
        chosen_img = list ()
        for person in matched_list:
            Graph = geo_affinity_mat[person][:, person].clone ().numpy ()
            Graph *= (1 - np.eye ( Graph.shape[0] ))  # make diagonal 0
            if len ( Graph ) < 2:
                continue
            elif len ( Graph ) > 2:
                if self.cfg.use_mincut:
                    cut0, cut1 = find_mincut ( Graph.copy () )
                    cut = cut0 if len ( cut0 ) > len ( cut1 ) else cut1
                    cut = cut.astype ( int )
                    sub_imageid = person[cut]
                else:
                    sub_imageid = get_min_reprojection_error ( person, self.dataset, pose_mat, sub_imgid2cam )
            else:
                sub_imageid = person

            _, rank = torch.sort ( geo_affinity_mat[sub_imageid][:, sub_imageid].sum ( dim=0 ) )
            sub_imageid = sub_imageid[rank[:2]]
            cam_id_0, cam_id_1 = sub_imgid2cam[sub_imageid[0]], sub_imgid2cam[sub_imageid[1]]
            projmat_0, projmat_1 = self.dataset.P[cam_id_0], self.dataset.P[cam_id_1]
            pose2d_0, pose2d_1 = pose_mat[sub_imageid[0]].T, pose_mat[sub_imageid[1]].T
            pose3d_homo = cv2.triangulatePoints ( projmat_0, projmat_1, pose2d_0, pose2d_1 )
            if self.cfg.use_bundle:
                pose3d_homo = bundle_adjustment ( pose3d_homo, person, self.dataset, pose_mat, sub_imgid2cam,
                                                  logging=logger )
            pose3d = pose3d_homo[:3] / (pose3d_homo[3] + 10e-6)
            # pose3d -= ((pose3d[:, 11] + pose3d[:, 12]) / 2).reshape ( 3, -1 ) # No need to normalize to hip
            if check_bone_length ( pose3d ):
                multi_pose3d.append ( pose3d )
            else:
                # logging.info ( f'A pose proposal deleted on {img_id}:{person}' )
                sub_imageid = list ()
                pass
            chosen_img.append ( sub_imageid )
        return multi_pose3d, chosen_img
Пример #11
0
class MultiEstimator (object):
    def __init__(self, cfg, debug=False):
        self.est2d = Estimator_2d(DEBUGGING=debug)
        self.extractor = FeatureExtractor()
        self.cfg = cfg
        self.dataset = None

    def predict(self, imgs, camera_parameter, template_name='Shelf', show=False, plt_id=0):
        info_dict = self._infer_single2d ( imgs )
        self.dataset = MemDataset ( info_dict=info_dict, camera_parameter=camera_parameter,
                                    template_name=template_name )
        return self._estimate3d ( 0, show=show, plt_id=plt_id )

    def _infer_single2d(self, imgs, img_id=0, dir='/home/jiangwen/tmp/Multi'):
        info_dict = dict ()
        for cam_id, img in enumerate ( imgs ):
            results = self.est2d.estimate_2d ( img, img_id )
            this_info_dict = {'image_data': cv2.cvtColor ( img.copy (), cv2.COLOR_BGR2RGB )}
            this_info_dict[img_id] = list ()
            for person_id, result in enumerate ( results ):
                this_info_dict[img_id].append ( dict () )
                this_info_dict[img_id][person_id]['pose2d'] = result['keypoints']
                # NOTE: bbox is (x, y) (W, H) format where x and y is up-left point.
                this_info_dict[img_id][person_id]['bbox'] = result['bbox']
                bb = np.array ( result['bbox'], dtype=int )
                cropped_img = img[bb[1]:bb[1] + bb[3], bb[0]:bb[0] + bb[2]]
                # numpy format of crop idx is changed to json
                this_info_dict[img_id][person_id]['heatmap_bbox'] = result['crops'].astype ( int ).tolist ()
                this_info_dict[img_id][person_id]['heatmap_data'] = result['heatmaps']
                this_info_dict[img_id][person_id]['cropped_img'] = cv2.cvtColor ( cropped_img.copy (),
                                                                                  cv2.COLOR_BGR2RGB )

            info_dict[cam_id] = this_info_dict
        return info_dict

    def _estimate3d(self, img_id, show=False, plt_id=0):
        data_batch = self.dataset[img_id]
        affinity_mat = self.extractor.get_affinity ( data_batch, rerank=self.cfg.rerank )
        if self.cfg.rerank:
            affinity_mat = torch.from_numpy ( affinity_mat )
            affinity_mat = torch.max ( affinity_mat, affinity_mat.t () )
            affinity_mat = 1 - affinity_mat
        else:
            affinity_mat = affinity_mat.cpu ()
        dimGroup = self.dataset.dimGroup[img_id]

        info_list = list ()
        for cam_id in self.dataset.cam_names:
            info_list += self.dataset.info_dict[cam_id][img_id]

        pose_mat = np.array ( [i['pose2d'] for i in info_list] ).reshape ( -1, model_cfg.joint_num, 3 )[..., :2]
        geo_affinity_mat = geometry_affinity ( pose_mat.copy (), self.dataset.F.numpy (),
                                               self.dataset.dimGroup[img_id] )
        geo_affinity_mat = torch.tensor ( geo_affinity_mat )
        if self.cfg.metric == 'geometry mean':
            W = torch.sqrt ( affinity_mat * geo_affinity_mat )
        elif self.cfg.metric == 'circle':
            W = torch.sqrt ( (affinity_mat ** 2 + geo_affinity_mat ** 2) / 2 )
        elif self.cfg.metric == 'Geometry only':
            W = torch.tensor ( geo_affinity_mat )
        elif self.cfg.metric == 'ReID only':
            W = torch.tensor ( affinity_mat )
        else:
            logger.critical ( 'Get into default option, are you intend to do it?' )
            _alpha = 0.3
            W = 0.3 * affinity_mat + (1 - _alpha) * geo_affinity_mat
        W[torch.isnan ( W )] = 0  # Some times (Shelf 452th img eg.) torch.sqrt will return nan if its too small
        sub_imgid2cam = np.zeros ( pose_mat.shape[0], dtype=np.int32 )
        for idx, i in enumerate ( range ( len ( dimGroup ) - 1 ) ):
            sub_imgid2cam[dimGroup[i]:dimGroup[i + 1]] = idx

        num_person = 10
        X0 = torch.rand ( W.shape[0], num_person )

        # Use spectral method to initialize assignment matrix.
        if self.cfg.spectral:
            eig_value, eig_vector = W.eig ( eigenvectors=True )
            _, eig_idx = torch.sort ( eig_value[:, 0], descending=True )

            if W.shape[1] >= num_person:
                X0 = eig_vector[eig_idx[:num_person]].t ()
            else:
                X0[:, :W.shape[1]] = eig_vector.t ()

        match_mat = matchSVT(W, dimGroup, alpha=self.cfg.alpha_SVT, _lambda=self.cfg.lambda_SVT,
                             dual_stochastic_SVT=self.cfg.dual_stochastic_SVT)


        bin_match = match_mat[:, torch.nonzero ( torch.sum ( match_mat, dim=0 ) > 1.9 ).squeeze ()] > 0.9
        bin_match = bin_match.reshape ( W.shape[0], -1 )

        matched_list = [[] for i in range ( bin_match.shape[1] )]
        for sub_imgid, row in enumerate ( bin_match ):
            if row.sum () != 0:
                pid = row.argmax ()
                matched_list[pid].append ( sub_imgid )

        matched_list = [np.array ( i ) for i in matched_list]
        if self.cfg.hybrid:
            multi_pose3d = self._hybrid_kernel ( matched_list, pose_mat, sub_imgid2cam, img_id )
            chosen_img = [[]] * len ( sub_imgid2cam )
        else:
            multi_pose3d, chosen_img = self._top_down_pose_kernel ( geo_affinity_mat, matched_list, pose_mat,
                                                                    sub_imgid2cam )
        if show:  # hybrid not implemented yet.
            bin_match = match_mat[:, torch.nonzero ( torch.sum ( match_mat, dim=0 ) > 0.9 ).squeeze ()] > 0.9
            bin_match = bin_match.reshape ( W.shape[0], -1 )
            matched_list = [[] for i in range ( bin_match.shape[1] )]
            for sub_imgid, row in enumerate ( bin_match ):
                if row.sum () != 0:
                    pid = row.argmax ()
                    matched_list[pid].append ( sub_imgid )
            matched_list = [np.array ( i ) for i in matched_list]
            show_panel_mem ( self.dataset, matched_list, info_list, sub_imgid2cam, img_id, affinity_mat,
                             geo_affinity_mat, W, plt_id, multi_pose3d )
            plotPaperRows ( self.dataset, matched_list, info_list, sub_imgid2cam, img_id, affinity_mat,
                            geo_affinity_mat, W, plt_id, multi_pose3d )

        return multi_pose3d

    def _hybrid_kernel(self, matched_list, pose_mat, sub_imgid2cam, img_id):
        return pictorial.hybrid_kernel ( self, matched_list, pose_mat, sub_imgid2cam, img_id )
        multi_pose3d = list ()

        for person in matched_list:
            # use bottom-up approach to get the 3D pose of person
            if person.shape[0] <= 1:
                continue

            # step1: use the 2D joint of person to triangulate the 3D joints candidates

            # person's 17 3D joints candidates
            candidates = np.zeros ( (17, person.shape[0] * (person.shape[0] - 1) // 2, 3) )
            # 17xC^2_nx3
            cnt = 0
            for i in range ( person.shape[0] ):
                for j in range ( i + 1, person.shape[0] ):
                    cam_id_i, cam_id_j = sub_imgid2cam[person[i]], sub_imgid2cam[person[j]]
                    projmat_i, projmat_j = self.dataset.P[cam_id_i], self.dataset.P[cam_id_j]
                    pose2d_i, pose2d_j = pose_mat[person[i]].T, pose_mat[person[j]].T
                    pose3d_homo = cv2.triangulatePoints ( projmat_i, projmat_j, pose2d_i, pose2d_j )
                    pose3d_ij = pose3d_homo[:3] / pose3d_homo[3]
                    candidates[:, cnt] += pose3d_ij.T
                    cnt += 1

            unary = self.dataset.get_unary ( person, sub_imgid2cam, candidates, img_id )

            # step2: use the max-product algorithm to inference to get the 3d joint of the person

            # change the coco order
            coco_2_skel = [0, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16]
            candidates = np.array ( candidates )[coco_2_skel]
            unary = unary[coco_2_skel]
            skel = pictorial.getskel ()
            # construct pictorial model
            edges = pictorial.getPictoStruct ( skel, self.dataset.distribution )
            xp = pictorial.inferPict3D_MaxProd ( unary, edges, candidates )
            human = np.array ( [candidates[i][j] for i, j in zip ( range ( xp.shape[0] ), xp )] )
            human_coco = np.zeros ( (17, 3) )
            human_coco[[0, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16]] = human
            human_coco[[1, 2, 3, 4]] = human_coco[0]  # Just make visualize beauty not real ear and eye
            human_coco = human_coco.T
            if self.cfg.reprojection_refine and len ( person ) > 2:
                for joint_idx in range ( human_coco.shape[1] ):
                    reprojected_error = np.zeros ( len ( person ) )
                    for idx, pid in enumerate ( person ):
                        human_coco_homo = np.ones ( 4 )
                        human_coco_homo[:3] = human_coco[:, joint_idx]
                        projected_pose_homo = self.dataset.P[sub_imgid2cam[pid]] @ human_coco_homo
                        projected_pose = projected_pose_homo[:2] / projected_pose_homo[2]
                        reprojected_error[idx] += np.linalg.norm ( projected_pose - pose_mat[pid, joint_idx] )
                    # import IPython; IPython.embed()
                    # pose_select = reprojected_error < self.cfg.refine_threshold
                    pose_select = (reprojected_error - reprojected_error.mean ()) / reprojected_error.std () < self.cfg.refine_threshold
                    if pose_select.sum () >= 2:
                        Ps = list ()
                        Ys = list ()
                        for idx, is_selected in enumerate ( pose_select ):
                            if is_selected:
                                Ps.append ( self.dataset.P[sub_imgid2cam[person[idx]]] )
                                Ys.append ( pose_mat[person[idx], joint_idx].reshape ( -1, 1 ) )
                        Ps = torch.tensor ( Ps, dtype=torch.float32 )
                        Ys = torch.tensor ( Ys, dtype=torch.float32 )
                        Xs = multiTriIter ( Ps, Ys )
                        refined_pose = (Xs[:3] / Xs[3]).numpy ()
                        human_coco[:, joint_idx] = refined_pose.reshape ( -1 )
            if True or check_bone_length ( human_coco ):
                multi_pose3d.append ( human_coco )
        return multi_pose3d

    def _top_down_pose_kernel(self, geo_affinity_mat, matched_list, pose_mat, sub_imgid2cam):
        multi_pose3d = list ()
        chosen_img = list ()
        for person in matched_list:
            Graph = geo_affinity_mat[person][:, person].clone ().numpy ()
            Graph *= (1 - np.eye ( Graph.shape[0] ))  # make diagonal 0
            if len ( Graph ) < 2:
                continue
            elif len ( Graph ) > 2:
                if self.cfg.use_mincut:
                    cut0, cut1 = find_mincut ( Graph.copy () )
                    cut = cut0 if len ( cut0 ) > len ( cut1 ) else cut1
                    cut = cut.astype ( int )
                    sub_imageid = person[cut]
                else:
                    sub_imageid = get_min_reprojection_error ( person, self.dataset, pose_mat, sub_imgid2cam )
            else:
                sub_imageid = person

            _, rank = torch.sort ( geo_affinity_mat[sub_imageid][:, sub_imageid].sum ( dim=0 ) )
            sub_imageid = sub_imageid[rank[:2]]
            cam_id_0, cam_id_1 = sub_imgid2cam[sub_imageid[0]], sub_imgid2cam[sub_imageid[1]]
            projmat_0, projmat_1 = self.dataset.P[cam_id_0], self.dataset.P[cam_id_1]
            pose2d_0, pose2d_1 = pose_mat[sub_imageid[0]].T, pose_mat[sub_imageid[1]].T
            pose3d_homo = cv2.triangulatePoints ( projmat_0, projmat_1, pose2d_0, pose2d_1 )
            if self.cfg.use_bundle:
                pose3d_homo = bundle_adjustment ( pose3d_homo, person, self.dataset, pose_mat, sub_imgid2cam,
                                                  logging=logger )
            pose3d = pose3d_homo[:3] / (pose3d_homo[3] + 10e-6)
            # pose3d -= ((pose3d[:, 11] + pose3d[:, 12]) / 2).reshape ( 3, -1 ) # No need to normalize to hip
            if check_bone_length ( pose3d ):
                multi_pose3d.append ( pose3d )
            else:
                # logging.info ( f'A pose proposal deleted on {img_id}:{person}' )
                sub_imageid = list ()
                pass
            chosen_img.append ( sub_imageid )
        return multi_pose3d, chosen_img
Пример #12
0
def evaluate(model, actor3D, range_, loader, is_info_dicts=False, dump_dir=None):
    #face un numpy array de 3 dimensiuni. actor3D e un array care are alti 3 array, fiecare din ei cu dimensiunea (2000,1).
    #asadar check_result are shape-ul (2000,3,10)
    check_result = np.zeros ( (len ( actor3D[0] ), len ( actor3D ), 10), dtype=np.int32 )
    accuracy_cnt = 0
    error_cnt = 0
    for idx, imgs in enumerate ( tqdm ( loader ) ):
        img_id = range_[idx]
        try:
            if is_info_dicts:
                info_dicts = numpify ( imgs )
                model.dataset = MemDataset ( info_dict=info_dicts, camera_parameter=camera_parameter,
                                             template_name='Unified' )
                poses3d = model._estimate3d ( 0, show=False )
            else:
                #face exact acelasi lucru ca si la demo
                # imgs e o lista cu 3 tensori.
                # un img_batch e un tensor din acestia 3, si are dimensiunea (288,360,3) si mai trebuie sa aflu de unde vine
                this_imgs = list ()
                for img_batch in imgs:
                    this_imgs.append ( img_batch.squeeze ().numpy () )
                poses3d = model.predict ( imgs=this_imgs, camera_parameter=camera_parameter, template_name='Unified',
                                          show=False )
        except Exception as e:
            logger.critical ( e )
            poses3d = False

        for pid in range ( len ( actor3D ) ):
            if actor3D[pid][img_id][0].shape == (1, 0) or actor3D[pid][img_id][0].shape == (0, 0):

                continue

            if not poses3d:
                check_result[img_id, pid, :] = -1
                logger.error ( f'Cannot get any pose in img:{img_id}' )
                continue
            model_poses = np.stack ( [coco2shelf3D ( i ) for i in deepcopy ( poses3d )] )
            gt_pose = actor3D[pid][img_id][0]
            dist = vectorize_distance ( np.expand_dims ( gt_pose, 0 ), model_poses )
            model_pose = model_poses[np.argmin ( dist[0] )]

            bones = [[0, 1], [1, 2], [3, 4], [4, 5], [6, 7], [7, 8], [9, 10], [10, 11], [12, 13]]
            for i, bone in enumerate ( bones ):
                start_point, end_point = bone
                if is_right ( model_pose[start_point], model_pose[end_point], gt_pose[start_point],
                              gt_pose[end_point] ):
                    check_result[img_id, pid, i] = 1
                    accuracy_cnt += 1
                else:
                    check_result[img_id, pid, i] = -1
                    error_cnt += 1
            gt_hip = (gt_pose[2] + gt_pose[3]) / 2
            model_hip = (model_pose[2] + model_pose[3]) / 2
            if is_right ( model_hip, model_pose[12], gt_hip, gt_pose[12] ):
                check_result[img_id, pid, -1] = 1
                accuracy_cnt += 1
            else:
                check_result[img_id, pid, -1] = -1
                error_cnt += 1
    bone_group = OrderedDict (
        [('Head', np.array ( [8] )), ('Torso', np.array ( [9] )), ('Upper arms', np.array ( [5, 6] )),
         ('Lower arms', np.array ( [4, 7] )), ('Upper legs', np.array ( [1, 2] )),
         ('Lower legs', np.array ( [0, 3] ))] )

    total_avg = np.sum ( check_result > 0 ) / np.sum ( np.abs ( check_result ) )
    person_wise_avg = np.sum ( check_result > 0, axis=(0, 2) ) / np.sum ( np.abs ( check_result ), axis=(0, 2) )

    bone_wise_result = OrderedDict ()
    bone_person_wise_result = OrderedDict ()
    for k, v in bone_group.items ():
        bone_wise_result[k] = np.sum ( check_result[:, :, v] > 0 ) / np.sum ( np.abs ( check_result[:, :, v] ) )
        bone_person_wise_result[k] = np.sum ( check_result[:, :, v] > 0, axis=(0, 2) ) / np.sum (
            np.abs ( check_result[:, :, v] ), axis=(0, 2) )

    tb = PrettyTable ()
    tb.field_names = ['Bone Group'] + [f'Actor {i}' for i in range ( bone_person_wise_result['Head'].shape[0] )] + [
        'Average']
    list_tb = [tb.field_names]
    for k, v in bone_person_wise_result.items ():

        this_row = [k] + [np.char.mod ( '%.4f', i ) for i in v] + [np.char.mod ( '%.4f', np.sum ( v ) / len ( v ) )]
        list_tb.append ( [float ( i ) if isinstance ( i, type ( np.array ( [] ) ) ) else i for i in this_row] )
        tb.add_row ( this_row )
    this_row = ['Total'] + [np.char.mod ( '%.4f', i ) for i in person_wise_avg] + [
        np.char.mod ( '%.4f', np.sum ( person_wise_avg ) / len ( person_wise_avg ) )]
    tb.add_row ( this_row )
    list_tb.append ( [float ( i ) if isinstance ( i, type ( np.array ( [] ) ) ) else i for i in this_row] )
    if dump_dir:
        np.save ( osp.join ( dump_dir, time.strftime ( str ( model_cfg.testing_on ) + "_%Y_%m_%d_%H_%M",
                                                       time.localtime ( time.time () ) ) ), check_result )
        with open ( osp.join ( dump_dir,
                               time.strftime ( str ( model_cfg.testing_on ) + "_%Y_%m_%d_%H_%M.csv",
                                               time.localtime ( time.time () ) ) ), 'w' ) as f:
            writer = csv.writer ( f )
            writer.writerows ( list_tb )
            writer.writerow ( [model_cfg] )
    print ( tb )
    print ( model_cfg )
    return check_result, list_tb