예제 #1
0
 def detect(self, img, bbox):
     crop_image, detail = self.crop_image(img, bbox)
     crop_image = (crop_image - 127.0) / 127.0
     crop_image = np.array([np.transpose(crop_image, (2, 0, 1))]).astype(np.float32)
     start = time.time()
     raw = self.sess.run(None, {self.input_name: crop_image})[0][0]
     end = time.time()
     print("ONNX Inference Time: {:.6f}".format(end - start))
     landmark = raw[0:136].reshape((-1, 2))
     landmark[:, 0] = landmark[:, 0] * detail[1] + detail[3]
     landmark[:, 1] = landmark[:, 1] * detail[0] + detail[2]
     landmark = self.tracker.track(img, landmark)
     _, PRY_3d = get_head_pose(landmark, img)
     return landmark, PRY_3d[:, 0]
 def detect(self, img, bbox):
     crop_image, detail = self.crop_image(img, bbox)
     crop_image = (crop_image - 127.0) / 127.0
     crop_image = np.array([np.transpose(crop_image, (2, 0, 1))])
     crop_image = torch.tensor(crop_image).float().cuda()
     with torch.no_grad():
         start = time.time()
         raw = self.model(crop_image)[0].cpu().numpy()
         end = time.time()
         print("PyTorch Inference Time: {:.6f}".format(end - start))
         landmark = raw[0:136].reshape((-1, 2))
     landmark[:, 0] = landmark[:, 0] * detail[1] + detail[3]
     landmark[:, 1] = landmark[:, 1] * detail[0] + detail[2]
     landmark = self.tracker.track(img, landmark)
     _, PRY_3d = get_head_pose(landmark, img)
     return landmark, PRY_3d[:, 0]
예제 #3
0
 def detect(self, img, bbox):
     crop_image, detail = self.crop_image(img, bbox)
     crop_image = (crop_image - 127.0) / 127.0
     crop_image = np.array([np.transpose(crop_image, (2, 0, 1))]).astype(np.float32)
     tmp_input = MNN.Tensor((1, 3, *self.detection_size), MNN.Halide_Type_Float, crop_image,
                            MNN.Tensor_DimensionType_Caffe)
     self.input_tensor.copyFrom(tmp_input)
     start = time.time()
     self.interpreter.runSession(self.session)
     raw = np.array(self.interpreter.getSessionOutput(self.session).getData())
     end = time.time()
     print("MNN Inference Time: {:.6f}".format(end - start))
     landmark = raw[0:136].reshape((-1, 2))
     landmark[:, 0] = landmark[:, 0] * detail[1] + detail[3]
     landmark[:, 1] = landmark[:, 1] * detail[0] + detail[2]
     landmark = self.tracker.track(img, landmark)
     _, PRY_3d = get_head_pose(landmark, img)
     return landmark, PRY_3d[:, 0]
    def __getitem__(self, item):
        """Data augmentation function."""
        dp = self.lst[item]
        fname = dp['image_path']
        keypoints = dp['keypoints']
        bbox = dp['bbox']
        if keypoints is not None:
            if ".jpg" in fname:
                image = jpeg.imread(fname)
                # image = cv2.imread(fname)
            else:
                image = cv2.imread(fname)
            label = np.array(keypoints, dtype=np.float).reshape((-1, 2))
            bbox = np.array(bbox)
            crop_image, label = self.augmentationCropImage(
                image, bbox, label, self.training_flag)

            if self.training_flag:
                if random.uniform(0, 1) > 0.5:
                    crop_image, label = Mirror(crop_image,
                                               label=label,
                                               symmetry=symmetry)
                if random.uniform(0, 1) > 0.0:
                    angle = random.uniform(-45, 45)
                    crop_image, label = Rotate_aug(crop_image,
                                                   label=label,
                                                   angle=angle)
                if random.uniform(0, 1) > 0.5:
                    strength = random.uniform(0, 50)
                    crop_image, label = Affine_aug(crop_image,
                                                   strength=strength,
                                                   label=label)
                if random.uniform(0, 1) > 0.5:
                    crop_image = self.color_augmentor(crop_image)
                if random.uniform(0, 1) > 0.5:
                    crop_image = pixel_jitter(crop_image, 15)
                if random.uniform(0, 1) > 0.5:
                    crop_image = Img_dropout(crop_image, 0.2)
                if random.uniform(0, 1) > 0.5:
                    crop_image = Padding_aug(crop_image, 0.3)
            reprojectdst, euler_angle = get_head_pose(label, crop_image)
            PRY = euler_angle.reshape([-1]).astype(np.float32) / 90.
            cla_label = np.zeros([4])
            if dp['left_eye_close']:
                cla_label[0] = 1
            if dp['right_eye_close']:
                cla_label[1] = 1
            if dp['mouth_close']:
                cla_label[2] = 1
            if dp['big_mouth_open']:
                cla_label[3] = 1
            crop_image_height, crop_image_width, _ = crop_image.shape
            # for point in label:
            #     crop_image = cv2.circle(crop_image, tuple(point.astype(np.int)), 3, (255, 0, 0), -1, 1)
            # cv2.imshow("", crop_image)
            # cv2.waitKey()

            label = label.astype(np.float32)
            label[:, 0] = label[:, 0] / crop_image_width
            label[:, 1] = label[:, 1] / crop_image_height

            crop_image = crop_image.astype(np.float32)
            label = label.reshape([-1]).astype(np.float32)
            cla_label = cla_label.astype(np.float32)
            label = np.concatenate([label, PRY, cla_label], axis=0)

        crop_image = (crop_image - 127.0) / 127.0
        crop_image = np.transpose(crop_image, (2, 0, 1)).astype(np.float32)
        return crop_image, label