Exemplo n.º 1
0
def generate_kpts(video_file):
    final_result, video_name = handle_video(video_file)

    # ============ Changing ++++++++++

    kpts = []
    no_person = []
    for i in range(len(final_result)):
        if not final_result[i]['result']:  # No people
            no_person.append(i)
            kpts.append(None)
            continue

        kpt = max(final_result[i]['result'],
                  key=lambda x: x['proposal_score'].data[0] * calculate_area(x['keypoints']), )['keypoints']

        kpts.append(kpt.data.numpy())

        for n in no_person:
            kpts[n] = kpts[-1]
        no_person.clear()

    for n in no_person:
        kpts[n] = kpts[-1] if kpts[-1] else kpts[n - 1]

    # ============ Changing End ++++++++++
    print(args.outputpath)
    name = '{0}/{1}.npz'.format(args.outputpath, video_name)
    kpts = np.array(kpts).astype(np.float32)
    print('kpts npz save in ', name)
    np.savez_compressed(name, kpts=kpts)

    return kpts
Exemplo n.º 2
0
def remove_irrelevant(no_track_result, save_percent=0.5):
    """
    Do pruning for the image that more than 20 people appear.

    :param no_track_result: AlphaPose result json dict before pruning
    :param save_percent: Kept percentage, (0, 1]
    :return: pruned result dict
    """
    id_map = defaultdict(list)

    for result in no_track_result:
        id_map[result['image_id']].append(result)

    relevant_result = []
    for values in id_map.values():
        num = len(values)
        if num > 0:
            values.sort(key=lambda m: m['score'] * calculate_area(m['keypoints']), reverse=True)
            relevant_result.extend(values[: int(num * save_percent)])

    return relevant_result
Exemplo n.º 3
0
    def update(self):

        time1 = time.time()

        _, frame = self.stream.read()
        # frame = cv2.resize(frame, (frame.shape[1]//2,frame.shape[0]//2))

        #TODO TESTING
        # frame[:,:200,:]=0
        # frame[:,450:,:]=0


        img_k, self.orig_img, im_dim_list_k = prep_frame(frame, self.inp_dim)
        
        img = [img_k]
        im_name = ["im_name"]
        im_dim_list = [im_dim_list_k] 

        img = torch.cat(img)
        im_dim_list = torch.FloatTensor(im_dim_list).repeat(1, 2)

        time2 = time.time()


        with torch.no_grad():
            ### detector 
            #########################
            # Human Detection
            img = img.cuda()
            prediction = self.det_model(img, CUDA=True)
            # NMS process
            dets = dynamic_write_results(prediction, opt.confidence,
                                        opt.num_classes, nms=True, nms_conf=opt.nms_thesh)
            if isinstance(dets, int) or dets.shape[0] == 0:   
                self.visualize2dnoperson()
                return None
                
            
            dets = dets.cpu()
            im_dim_list = torch.index_select(im_dim_list, 0, dets[:, 0].long())
            scaling_factor = torch.min(self.det_inp_dim / im_dim_list, 1)[0].view(-1, 1)

            # coordinate transfer
            dets[:, [1, 3]] -= (self.det_inp_dim - scaling_factor * im_dim_list[:, 0].view(-1, 1)) / 2
            dets[:, [2, 4]] -= (self.det_inp_dim - scaling_factor * im_dim_list[:, 1].view(-1, 1)) / 2

            dets[:, 1:5] /= scaling_factor
            for j in range(dets.shape[0]):
                dets[j, [1, 3]] = torch.clamp(dets[j, [1, 3]], 0.0, im_dim_list[j, 0])
                dets[j, [2, 4]] = torch.clamp(dets[j, [2, 4]], 0.0, im_dim_list[j, 1])
            boxes = dets[:, 1:5]
            scores = dets[:, 5:6]

            boxes_k = boxes[dets[:, 0] == 0]
            if isinstance(boxes_k, int) or boxes_k.shape[0] == 0:
                self.visualize2dnoperson()
                raise NotImplementedError
                return None
            inps = torch.zeros(boxes_k.size(0), 3, opt.inputResH, opt.inputResW)
            pt1 = torch.zeros(boxes_k.size(0), 2)
            pt2 = torch.zeros(boxes_k.size(0), 2)

            time3 = time.time()


            ### processor 
            #########################
            inp = im_to_torch(cv2.cvtColor(self.orig_img, cv2.COLOR_BGR2RGB))
            inps, pt1, pt2 = self.crop_from_dets(inp, boxes, inps, pt1, pt2)

            ### generator
            #########################            
            self.orig_img = np.array(self.orig_img, dtype=np.uint8)
            # location prediction (n, kp, 2) | score prediction (n, kp, 1)

            datalen = inps.size(0)
            batchSize = 20 #args.posebatch()
            leftover = 0
            if datalen % batchSize:
                leftover = 1
            num_batches = datalen // batchSize + leftover
            hm = []

            time4 = time.time()

            for j in range(num_batches):
                inps_j = inps[j * batchSize:min((j + 1) * batchSize, datalen)].cuda()
                hm_j = self.pose_model(inps_j)
                hm.append(hm_j)
            
            
            hm = torch.cat(hm)
            hm = hm.cpu().data

            preds_hm, preds_img, preds_scores = getPrediction(
                hm, pt1, pt2, opt.inputResH, opt.inputResW, opt.outputResH, opt.outputResW)
            result = pose_nms(
                boxes, scores, preds_img, preds_scores)

            time5 = time.time() 
            
                    
            if not result: # No people
                self.visualize2dnoperson()
                return None
            else:
                self.kpt = max(result,
                        key=lambda x: x['proposal_score'].data[0] * calculate_area(x['keypoints']), )['keypoints']
                self.visualize2d()
                return self.kpt 

            time6 = time.time()
            print("process time : {} ".format(time6 - time5))
Exemplo n.º 4
0
def handle_video(video_file):
    # =========== common ===============
    args.video = video_file
    base_name = os.path.basename(args.video)
    video_name = base_name[:base_name.rfind('.')]
    # =========== end common ===============

    img_path = f'outputs/alpha_pose_{video_name}/split_image/'

    # =========== image ===============
    args.inputpath = img_path
    args.outputpath = f'outputs/alpha_pose_{video_name}'
    if os.path.exists(args.outputpath):
        shutil.rmtree(f'{args.outputpath}/vis', ignore_errors=True)
    else:
        os.mkdir(args.outputpath)

    # if not len(video_file):
    #     raise IOError('Error: must contain --video')

    if len(img_path) and img_path != '/':
        for root, dirs, files in os.walk(img_path):
            im_names = sorted([f for f in files if 'png' in f or 'jpg' in f])
    else:
        raise IOError('Error: must contain either --indir/--list')

    # Load input images
    data_loader = ImageLoader(im_names, batchSize=args.detbatch,
                              format='yolo').start()
    print(f'Totally {data_loader.datalen} images')
    # =========== end image ===============

    # =========== video ===============
    # args.outputpath = f'outputs/alpha_pose_{video_name}'
    # if os.path.exists(args.outputpath):
    #     shutil.rmtree(f'{args.outputpath}/vis', ignore_errors=True)
    # else:
    #     os.mkdir(args.outputpath)
    #
    # videofile = args.video
    # mode = args.mode
    #
    # if not len(videofile):
    #     raise IOError('Error: must contain --video')
    #
    # # Load input video
    # data_loader = VideoLoader(videofile, batchSize=args.detbatch).start()
    # (fourcc, fps, frameSize) = data_loader.videoinfo()
    #
    # print('the video is {} f/s'.format(fps))
    # =========== end video ===============

    # Load detection loader
    print('Loading YOLO model..')
    sys.stdout.flush()
    det_loader = DetectionLoader(data_loader, batchSize=args.detbatch).start()
    #  start a thread to read frames from the file video stream
    det_processor = DetectionProcessor(det_loader).start()

    # Load pose model
    pose_dataset = Mscoco()
    if args.fast_inference:
        pose_model = InferenNet_fast(4 * 1 + 1, pose_dataset)
    else:
        pose_model = InferenNet(4 * 1 + 1, pose_dataset)
    pose_model.cuda()
    pose_model.eval()

    runtime_profile = {'dt': [], 'pt': [], 'pn': []}

    # Data writer
    save_path = os.path.join(
        args.outputpath,
        'AlphaPose_' + ntpath.basename(video_file).split('.')[0] + '.avi')
    # writer = DataWriter(args.save_video, save_path, cv2.VideoWriter_fourcc(*'XVID'), fps, frameSize).start()
    writer = DataWriter(args.save_video).start()

    print('Start pose estimation...')
    im_names_desc = tqdm(range(data_loader.length()))
    batchSize = args.posebatch
    for i in im_names_desc:

        start_time = getTime()
        with torch.no_grad():
            (inps, orig_img, im_name, boxes, scores, pt1,
             pt2) = det_processor.read()
            if orig_img is None:
                print(f'{i}-th image read None: handle_video')
                break
            if boxes is None or boxes.nelement() == 0:
                writer.save(None, None, None, None, None, orig_img,
                            im_name.split('/')[-1])
                continue

            ckpt_time, det_time = getTime(start_time)
            runtime_profile['dt'].append(det_time)
            # Pose Estimation

            datalen = inps.size(0)
            leftover = 0
            if datalen % batchSize:
                leftover = 1
            num_batches = datalen // batchSize + leftover
            hm = []
            for j in range(num_batches):
                inps_j = inps[j * batchSize:min((j + 1) *
                                                batchSize, datalen)].cuda()
                hm_j = pose_model(inps_j)
                hm.append(hm_j)
            hm = torch.cat(hm)
            ckpt_time, pose_time = getTime(ckpt_time)
            runtime_profile['pt'].append(pose_time)

            hm = hm.cpu().data
            writer.save(boxes, scores, hm, pt1, pt2, orig_img,
                        im_name.split('/')[-1])

            ckpt_time, post_time = getTime(ckpt_time)
            runtime_profile['pn'].append(post_time)

        if args.profile:
            # TQDM
            im_names_desc.set_description(
                'det time: {dt:.4f} | pose time: {pt:.4f} | post processing: {pn:.4f}'
                .format(dt=np.mean(runtime_profile['dt']),
                        pt=np.mean(runtime_profile['pt']),
                        pn=np.mean(runtime_profile['pn'])))

    if (args.save_img or args.save_video) and not args.vis_fast:
        print(
            '===========================> Rendering remaining images in the queue...'
        )
        print(
            '===========================> If this step takes too long, you can enable the --vis_fast flag to use fast rendering (real-time).'
        )
    while writer.running():
        pass
    writer.stop()
    final_result = writer.results()
    write_json(final_result, args.outputpath)

    kpts = []
    for i in range(len(final_result)):
        kpt = max(final_result[i]['result'],
                  key=lambda x: x['proposal_score'].data[0] * calculate_area(x[
                      'keypoints']))['keypoints']
        kpts.append(kpt.data.numpy())

    name = f'{args.outputpath}/{video_name}.npz'
    kpts = np.array(kpts).astype(np.float32)
    print('kpts npz save in ', name)
    np.savez_compressed(name, kpts=kpts)

    return kpts
Exemplo n.º 5
0
    def update(self):

        # keep looping infinitely
        while True:
            sys.stdout.flush()
            print("generator len : " + str(self.Q.qsize()))

            # if the thread indicator variable is set, stop the
            # thread
            # if self.stopped:
            #     cv2.destroyAllWindows()
            #     if self.save_video:
            #         self.stream.release()
            #     return
            # otherwise, ensure the queue is not empty
            if not self.det_processor.Q.empty():

                with torch.no_grad():
                    (inps, orig_img, im_name, boxes, scores, pt1,
                     pt2) = self.det_processor.read()

                    if orig_img is None:
                        sys.stdout.flush()
                        print(f'{im_name} image read None: handle_video')
                        break

                    orig_img = np.array(orig_img, dtype=np.uint8)
                    if boxes is None or boxes.nelement() == 0:
                        (boxes, scores, hm_data, pt1, pt2, orig_img,
                         im_name) = (None, None, None, None, None, orig_img,
                                     im_name.split('/')[-1])

                        res = {'keypoints': -1, 'image': orig_img}
                        self.Q.put(res)  #TODO

                        # cv2.imwrite("/home/hrs/Desktop/dd/now.jpg", orig_img)

                        # img = orig_img
                        # cv2.imshow("AlphaPose Demo", img)
                        # cv2.waitKey(30)
                        ######################################################################################
                        # self.image = self.ax_in.imshow(orig_img, aspect='equal')
                        # self.image.set_data(orig_img)
                        # plt.draw()
                        # plt.pause(0.000000000000000001)
                        ######################################################################################

                        # if opt.save_img or opt.save_video or opt.vis:
                        #     img = orig_img
                        #     if opt.vis:
                        #         cv2.imshow("AlphaPose Demo", img)
                        #         cv2.waitKey(30)
                        #     if opt.save_img:
                        #         cv2.imwrite(os.path.join(opt.outputpath, 'vis', im_name), img)
                        #     if opt.save_video:
                        #         self.stream.write(img)
                    else:
                        # location prediction (n, kp, 2) | score prediction (n, kp, 1)

                        datalen = inps.size(0)
                        batchSize = 20  #args.posebatch()
                        leftover = 0
                        if datalen % batchSize:
                            leftover = 1
                        num_batches = datalen // batchSize + leftover
                        hm = []

                        # sys.stdout.flush()
                        # print("hhhh")

                        for j in range(num_batches):
                            inps_j = inps[j * batchSize:min(
                                (j + 1) * batchSize, datalen)].cuda()
                            hm_j = self.pose_model(inps_j)
                            hm.append(hm_j)

                        # time1 = time.time()
                        hm = torch.cat(hm)
                        hm = hm.cpu().data

                        (boxes, scores, hm_data, pt1, pt2, orig_img,
                         im_name) = (boxes, scores, hm, pt1, pt2, orig_img,
                                     im_name.split('/')[-1])

                        if opt.matching:
                            preds = getMultiPeakPrediction(
                                hm_data, pt1.numpy(), pt2.numpy(),
                                opt.inputResH, opt.inputResW, opt.outputResH,
                                opt.outputResW)
                            result = matching(boxes, scores.numpy(), preds)
                        else:
                            preds_hm, preds_img, preds_scores = getPrediction(
                                hm_data, pt1, pt2, opt.inputResH,
                                opt.inputResW, opt.outputResH, opt.outputResW)
                            result = pose_nms(boxes, scores, preds_img,
                                              preds_scores)
                        result = {'imgname': im_name, 'result': result}
                        self.final_result.append(result)

                        # time2 = time.time()
                        # print(time2-time1)
                        ######################################################################################
                        # img = vis_frame(orig_img, result)

                        # cv2.imshow("AlphaPose Demo", img)
                        # cv2.imwrite("/home/hrs/Desktop/dd/now.jpg", img)
                        # cv2.waitKey(30)
                        ########################################################################
                        # self.point.set_offsets(keypoints[self.i])

                        # self.image = self.ax_in.imshow(orig_img, aspect='equal')
                        # self.image.set_data(orig_img)
                        # plt.draw()
                        # plt.pause(0.000000000000000001)
                        ##########################################################################
                        if not result['result']:  # No people
                            res = {'keypoints': -1, 'image': orig_img}
                            self.Q.put(res)  #TODO
                        else:
                            kpt = max(
                                result['result'],
                                key=lambda x: x['proposal_score'].data[0] *
                                calculate_area(x['keypoints']),
                            )['keypoints']

                            res = {'keypoints': kpt, 'image': orig_img}

                            self.Q.put(res)

                            # kpt_np = kpt.numpy()
                            # n = kpt_np.shape[0]
                            # print(kpt_np.shape)
                            # point_list = [(kpt_np[m, 0], kpt_np[m, 1]) for m in range(17)]
                            # for point in point_list:
                            #     cv2.circle(pose_img, point, 1, (0, 43, 32), 4)

                        # cv2.imshow(self.window, pose_img)
                        # cv2.waitKey()

                        # if opt.save_img or opt.save_video or opt.vis:
                        #     img = vis_frame(orig_img, result)
                        #     if opt.vis:
                        #         cv2.imshow("AlphaPose Demo", img)
                        #         cv2.waitKey(30)
                        #     if opt.save_img:
                        #         cv2.imwrite(os.path.join(opt.outputpath, 'vis', im_name), img)
                        #     if opt.save_video:
                        #         self.stream.write(img)
            else:
                time.sleep(0.1)