def detect(self, img, bbox): crop_image, detail = self.crop_image(img, bbox) crop_image = (crop_image - 127.0) / 127.0 crop_image = np.array([np.transpose(crop_image, (2, 0, 1))]).astype(np.float32) start = time.time() raw = self.sess.run(None, {self.input_name: crop_image})[0][0] end = time.time() print("ONNX Inference Time: {:.6f}".format(end - start)) landmark = raw[0:136].reshape((-1, 2)) landmark[:, 0] = landmark[:, 0] * detail[1] + detail[3] landmark[:, 1] = landmark[:, 1] * detail[0] + detail[2] landmark = self.tracker.track(img, landmark) _, PRY_3d = get_head_pose(landmark, img) return landmark, PRY_3d[:, 0]
def detect(self, img, bbox): crop_image, detail = self.crop_image(img, bbox) crop_image = (crop_image - 127.0) / 127.0 crop_image = np.array([np.transpose(crop_image, (2, 0, 1))]) crop_image = torch.tensor(crop_image).float().cuda() with torch.no_grad(): start = time.time() raw = self.model(crop_image)[0].cpu().numpy() end = time.time() print("PyTorch Inference Time: {:.6f}".format(end - start)) landmark = raw[0:136].reshape((-1, 2)) landmark[:, 0] = landmark[:, 0] * detail[1] + detail[3] landmark[:, 1] = landmark[:, 1] * detail[0] + detail[2] landmark = self.tracker.track(img, landmark) _, PRY_3d = get_head_pose(landmark, img) return landmark, PRY_3d[:, 0]
def detect(self, img, bbox): crop_image, detail = self.crop_image(img, bbox) crop_image = (crop_image - 127.0) / 127.0 crop_image = np.array([np.transpose(crop_image, (2, 0, 1))]).astype(np.float32) tmp_input = MNN.Tensor((1, 3, *self.detection_size), MNN.Halide_Type_Float, crop_image, MNN.Tensor_DimensionType_Caffe) self.input_tensor.copyFrom(tmp_input) start = time.time() self.interpreter.runSession(self.session) raw = np.array(self.interpreter.getSessionOutput(self.session).getData()) end = time.time() print("MNN Inference Time: {:.6f}".format(end - start)) landmark = raw[0:136].reshape((-1, 2)) landmark[:, 0] = landmark[:, 0] * detail[1] + detail[3] landmark[:, 1] = landmark[:, 1] * detail[0] + detail[2] landmark = self.tracker.track(img, landmark) _, PRY_3d = get_head_pose(landmark, img) return landmark, PRY_3d[:, 0]
def __getitem__(self, item): """Data augmentation function.""" dp = self.lst[item] fname = dp['image_path'] keypoints = dp['keypoints'] bbox = dp['bbox'] if keypoints is not None: if ".jpg" in fname: image = jpeg.imread(fname) # image = cv2.imread(fname) else: image = cv2.imread(fname) label = np.array(keypoints, dtype=np.float).reshape((-1, 2)) bbox = np.array(bbox) crop_image, label = self.augmentationCropImage( image, bbox, label, self.training_flag) if self.training_flag: if random.uniform(0, 1) > 0.5: crop_image, label = Mirror(crop_image, label=label, symmetry=symmetry) if random.uniform(0, 1) > 0.0: angle = random.uniform(-45, 45) crop_image, label = Rotate_aug(crop_image, label=label, angle=angle) if random.uniform(0, 1) > 0.5: strength = random.uniform(0, 50) crop_image, label = Affine_aug(crop_image, strength=strength, label=label) if random.uniform(0, 1) > 0.5: crop_image = self.color_augmentor(crop_image) if random.uniform(0, 1) > 0.5: crop_image = pixel_jitter(crop_image, 15) if random.uniform(0, 1) > 0.5: crop_image = Img_dropout(crop_image, 0.2) if random.uniform(0, 1) > 0.5: crop_image = Padding_aug(crop_image, 0.3) reprojectdst, euler_angle = get_head_pose(label, crop_image) PRY = euler_angle.reshape([-1]).astype(np.float32) / 90. cla_label = np.zeros([4]) if dp['left_eye_close']: cla_label[0] = 1 if dp['right_eye_close']: cla_label[1] = 1 if dp['mouth_close']: cla_label[2] = 1 if dp['big_mouth_open']: cla_label[3] = 1 crop_image_height, crop_image_width, _ = crop_image.shape # for point in label: # crop_image = cv2.circle(crop_image, tuple(point.astype(np.int)), 3, (255, 0, 0), -1, 1) # cv2.imshow("", crop_image) # cv2.waitKey() label = label.astype(np.float32) label[:, 0] = label[:, 0] / crop_image_width label[:, 1] = label[:, 1] / crop_image_height crop_image = crop_image.astype(np.float32) label = label.reshape([-1]).astype(np.float32) cla_label = cla_label.astype(np.float32) label = np.concatenate([label, PRY, cla_label], axis=0) crop_image = (crop_image - 127.0) / 127.0 crop_image = np.transpose(crop_image, (2, 0, 1)).astype(np.float32) return crop_image, label